xref: /aosp_15_r20/external/boringssl/src/gen/bcm/ghash-x86_64-linux.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
7.text
8.globl	gcm_init_clmul
9.hidden gcm_init_clmul
10.type	gcm_init_clmul,@function
11.align	16
12gcm_init_clmul:
13.cfi_startproc
14
15_CET_ENDBR
16.L_init_clmul:
17	movdqu	(%rsi),%xmm2
18	pshufd	$78,%xmm2,%xmm2
19
20
21	pshufd	$255,%xmm2,%xmm4
22	movdqa	%xmm2,%xmm3
23	psllq	$1,%xmm2
24	pxor	%xmm5,%xmm5
25	psrlq	$63,%xmm3
26	pcmpgtd	%xmm4,%xmm5
27	pslldq	$8,%xmm3
28	por	%xmm3,%xmm2
29
30
31	pand	.L0x1c2_polynomial(%rip),%xmm5
32	pxor	%xmm5,%xmm2
33
34
35	pshufd	$78,%xmm2,%xmm6
36	movdqa	%xmm2,%xmm0
37	pxor	%xmm2,%xmm6
38	movdqa	%xmm0,%xmm1
39	pshufd	$78,%xmm0,%xmm3
40	pxor	%xmm0,%xmm3
41.byte	102,15,58,68,194,0
42.byte	102,15,58,68,202,17
43.byte	102,15,58,68,222,0
44	pxor	%xmm0,%xmm3
45	pxor	%xmm1,%xmm3
46
47	movdqa	%xmm3,%xmm4
48	psrldq	$8,%xmm3
49	pslldq	$8,%xmm4
50	pxor	%xmm3,%xmm1
51	pxor	%xmm4,%xmm0
52
53	movdqa	%xmm0,%xmm4
54	movdqa	%xmm0,%xmm3
55	psllq	$5,%xmm0
56	pxor	%xmm0,%xmm3
57	psllq	$1,%xmm0
58	pxor	%xmm3,%xmm0
59	psllq	$57,%xmm0
60	movdqa	%xmm0,%xmm3
61	pslldq	$8,%xmm0
62	psrldq	$8,%xmm3
63	pxor	%xmm4,%xmm0
64	pxor	%xmm3,%xmm1
65
66
67	movdqa	%xmm0,%xmm4
68	psrlq	$1,%xmm0
69	pxor	%xmm4,%xmm1
70	pxor	%xmm0,%xmm4
71	psrlq	$5,%xmm0
72	pxor	%xmm4,%xmm0
73	psrlq	$1,%xmm0
74	pxor	%xmm1,%xmm0
75	pshufd	$78,%xmm2,%xmm3
76	pshufd	$78,%xmm0,%xmm4
77	pxor	%xmm2,%xmm3
78	movdqu	%xmm2,0(%rdi)
79	pxor	%xmm0,%xmm4
80	movdqu	%xmm0,16(%rdi)
81.byte	102,15,58,15,227,8
82	movdqu	%xmm4,32(%rdi)
83	movdqa	%xmm0,%xmm1
84	pshufd	$78,%xmm0,%xmm3
85	pxor	%xmm0,%xmm3
86.byte	102,15,58,68,194,0
87.byte	102,15,58,68,202,17
88.byte	102,15,58,68,222,0
89	pxor	%xmm0,%xmm3
90	pxor	%xmm1,%xmm3
91
92	movdqa	%xmm3,%xmm4
93	psrldq	$8,%xmm3
94	pslldq	$8,%xmm4
95	pxor	%xmm3,%xmm1
96	pxor	%xmm4,%xmm0
97
98	movdqa	%xmm0,%xmm4
99	movdqa	%xmm0,%xmm3
100	psllq	$5,%xmm0
101	pxor	%xmm0,%xmm3
102	psllq	$1,%xmm0
103	pxor	%xmm3,%xmm0
104	psllq	$57,%xmm0
105	movdqa	%xmm0,%xmm3
106	pslldq	$8,%xmm0
107	psrldq	$8,%xmm3
108	pxor	%xmm4,%xmm0
109	pxor	%xmm3,%xmm1
110
111
112	movdqa	%xmm0,%xmm4
113	psrlq	$1,%xmm0
114	pxor	%xmm4,%xmm1
115	pxor	%xmm0,%xmm4
116	psrlq	$5,%xmm0
117	pxor	%xmm4,%xmm0
118	psrlq	$1,%xmm0
119	pxor	%xmm1,%xmm0
120	movdqa	%xmm0,%xmm5
121	movdqa	%xmm0,%xmm1
122	pshufd	$78,%xmm0,%xmm3
123	pxor	%xmm0,%xmm3
124.byte	102,15,58,68,194,0
125.byte	102,15,58,68,202,17
126.byte	102,15,58,68,222,0
127	pxor	%xmm0,%xmm3
128	pxor	%xmm1,%xmm3
129
130	movdqa	%xmm3,%xmm4
131	psrldq	$8,%xmm3
132	pslldq	$8,%xmm4
133	pxor	%xmm3,%xmm1
134	pxor	%xmm4,%xmm0
135
136	movdqa	%xmm0,%xmm4
137	movdqa	%xmm0,%xmm3
138	psllq	$5,%xmm0
139	pxor	%xmm0,%xmm3
140	psllq	$1,%xmm0
141	pxor	%xmm3,%xmm0
142	psllq	$57,%xmm0
143	movdqa	%xmm0,%xmm3
144	pslldq	$8,%xmm0
145	psrldq	$8,%xmm3
146	pxor	%xmm4,%xmm0
147	pxor	%xmm3,%xmm1
148
149
150	movdqa	%xmm0,%xmm4
151	psrlq	$1,%xmm0
152	pxor	%xmm4,%xmm1
153	pxor	%xmm0,%xmm4
154	psrlq	$5,%xmm0
155	pxor	%xmm4,%xmm0
156	psrlq	$1,%xmm0
157	pxor	%xmm1,%xmm0
158	pshufd	$78,%xmm5,%xmm3
159	pshufd	$78,%xmm0,%xmm4
160	pxor	%xmm5,%xmm3
161	movdqu	%xmm5,48(%rdi)
162	pxor	%xmm0,%xmm4
163	movdqu	%xmm0,64(%rdi)
164.byte	102,15,58,15,227,8
165	movdqu	%xmm4,80(%rdi)
166	ret
167.cfi_endproc
168
169.size	gcm_init_clmul,.-gcm_init_clmul
170.globl	gcm_gmult_clmul
171.hidden gcm_gmult_clmul
172.type	gcm_gmult_clmul,@function
173.align	16
174gcm_gmult_clmul:
175.cfi_startproc
176_CET_ENDBR
177.L_gmult_clmul:
178	movdqu	(%rdi),%xmm0
179	movdqa	.Lbswap_mask(%rip),%xmm5
180	movdqu	(%rsi),%xmm2
181	movdqu	32(%rsi),%xmm4
182.byte	102,15,56,0,197
183	movdqa	%xmm0,%xmm1
184	pshufd	$78,%xmm0,%xmm3
185	pxor	%xmm0,%xmm3
186.byte	102,15,58,68,194,0
187.byte	102,15,58,68,202,17
188.byte	102,15,58,68,220,0
189	pxor	%xmm0,%xmm3
190	pxor	%xmm1,%xmm3
191
192	movdqa	%xmm3,%xmm4
193	psrldq	$8,%xmm3
194	pslldq	$8,%xmm4
195	pxor	%xmm3,%xmm1
196	pxor	%xmm4,%xmm0
197
198	movdqa	%xmm0,%xmm4
199	movdqa	%xmm0,%xmm3
200	psllq	$5,%xmm0
201	pxor	%xmm0,%xmm3
202	psllq	$1,%xmm0
203	pxor	%xmm3,%xmm0
204	psllq	$57,%xmm0
205	movdqa	%xmm0,%xmm3
206	pslldq	$8,%xmm0
207	psrldq	$8,%xmm3
208	pxor	%xmm4,%xmm0
209	pxor	%xmm3,%xmm1
210
211
212	movdqa	%xmm0,%xmm4
213	psrlq	$1,%xmm0
214	pxor	%xmm4,%xmm1
215	pxor	%xmm0,%xmm4
216	psrlq	$5,%xmm0
217	pxor	%xmm4,%xmm0
218	psrlq	$1,%xmm0
219	pxor	%xmm1,%xmm0
220.byte	102,15,56,0,197
221	movdqu	%xmm0,(%rdi)
222	ret
223.cfi_endproc
224.size	gcm_gmult_clmul,.-gcm_gmult_clmul
225.globl	gcm_ghash_clmul
226.hidden gcm_ghash_clmul
227.type	gcm_ghash_clmul,@function
228.align	32
229gcm_ghash_clmul:
230.cfi_startproc
231
232_CET_ENDBR
233.L_ghash_clmul:
234	movdqa	.Lbswap_mask(%rip),%xmm10
235
236	movdqu	(%rdi),%xmm0
237	movdqu	(%rsi),%xmm2
238	movdqu	32(%rsi),%xmm7
239.byte	102,65,15,56,0,194
240
241	subq	$0x10,%rcx
242	jz	.Lodd_tail
243
244	movdqu	16(%rsi),%xmm6
245	cmpq	$0x30,%rcx
246	jb	.Lskip4x
247
248	subq	$0x30,%rcx
249	movq	$0xA040608020C0E000,%rax
250	movdqu	48(%rsi),%xmm14
251	movdqu	64(%rsi),%xmm15
252
253
254
255
256	movdqu	48(%rdx),%xmm3
257	movdqu	32(%rdx),%xmm11
258.byte	102,65,15,56,0,218
259.byte	102,69,15,56,0,218
260	movdqa	%xmm3,%xmm5
261	pshufd	$78,%xmm3,%xmm4
262	pxor	%xmm3,%xmm4
263.byte	102,15,58,68,218,0
264.byte	102,15,58,68,234,17
265.byte	102,15,58,68,231,0
266
267	movdqa	%xmm11,%xmm13
268	pshufd	$78,%xmm11,%xmm12
269	pxor	%xmm11,%xmm12
270.byte	102,68,15,58,68,222,0
271.byte	102,68,15,58,68,238,17
272.byte	102,68,15,58,68,231,16
273	xorps	%xmm11,%xmm3
274	xorps	%xmm13,%xmm5
275	movups	80(%rsi),%xmm7
276	xorps	%xmm12,%xmm4
277
278	movdqu	16(%rdx),%xmm11
279	movdqu	0(%rdx),%xmm8
280.byte	102,69,15,56,0,218
281.byte	102,69,15,56,0,194
282	movdqa	%xmm11,%xmm13
283	pshufd	$78,%xmm11,%xmm12
284	pxor	%xmm8,%xmm0
285	pxor	%xmm11,%xmm12
286.byte	102,69,15,58,68,222,0
287	movdqa	%xmm0,%xmm1
288	pshufd	$78,%xmm0,%xmm8
289	pxor	%xmm0,%xmm8
290.byte	102,69,15,58,68,238,17
291.byte	102,68,15,58,68,231,0
292	xorps	%xmm11,%xmm3
293	xorps	%xmm13,%xmm5
294
295	leaq	64(%rdx),%rdx
296	subq	$0x40,%rcx
297	jc	.Ltail4x
298
299	jmp	.Lmod4_loop
300.align	32
301.Lmod4_loop:
302.byte	102,65,15,58,68,199,0
303	xorps	%xmm12,%xmm4
304	movdqu	48(%rdx),%xmm11
305.byte	102,69,15,56,0,218
306.byte	102,65,15,58,68,207,17
307	xorps	%xmm3,%xmm0
308	movdqu	32(%rdx),%xmm3
309	movdqa	%xmm11,%xmm13
310.byte	102,68,15,58,68,199,16
311	pshufd	$78,%xmm11,%xmm12
312	xorps	%xmm5,%xmm1
313	pxor	%xmm11,%xmm12
314.byte	102,65,15,56,0,218
315	movups	32(%rsi),%xmm7
316	xorps	%xmm4,%xmm8
317.byte	102,68,15,58,68,218,0
318	pshufd	$78,%xmm3,%xmm4
319
320	pxor	%xmm0,%xmm8
321	movdqa	%xmm3,%xmm5
322	pxor	%xmm1,%xmm8
323	pxor	%xmm3,%xmm4
324	movdqa	%xmm8,%xmm9
325.byte	102,68,15,58,68,234,17
326	pslldq	$8,%xmm8
327	psrldq	$8,%xmm9
328	pxor	%xmm8,%xmm0
329	movdqa	.L7_mask(%rip),%xmm8
330	pxor	%xmm9,%xmm1
331.byte	102,76,15,110,200
332
333	pand	%xmm0,%xmm8
334.byte	102,69,15,56,0,200
335	pxor	%xmm0,%xmm9
336.byte	102,68,15,58,68,231,0
337	psllq	$57,%xmm9
338	movdqa	%xmm9,%xmm8
339	pslldq	$8,%xmm9
340.byte	102,15,58,68,222,0
341	psrldq	$8,%xmm8
342	pxor	%xmm9,%xmm0
343	pxor	%xmm8,%xmm1
344	movdqu	0(%rdx),%xmm8
345
346	movdqa	%xmm0,%xmm9
347	psrlq	$1,%xmm0
348.byte	102,15,58,68,238,17
349	xorps	%xmm11,%xmm3
350	movdqu	16(%rdx),%xmm11
351.byte	102,69,15,56,0,218
352.byte	102,15,58,68,231,16
353	xorps	%xmm13,%xmm5
354	movups	80(%rsi),%xmm7
355.byte	102,69,15,56,0,194
356	pxor	%xmm9,%xmm1
357	pxor	%xmm0,%xmm9
358	psrlq	$5,%xmm0
359
360	movdqa	%xmm11,%xmm13
361	pxor	%xmm12,%xmm4
362	pshufd	$78,%xmm11,%xmm12
363	pxor	%xmm9,%xmm0
364	pxor	%xmm8,%xmm1
365	pxor	%xmm11,%xmm12
366.byte	102,69,15,58,68,222,0
367	psrlq	$1,%xmm0
368	pxor	%xmm1,%xmm0
369	movdqa	%xmm0,%xmm1
370.byte	102,69,15,58,68,238,17
371	xorps	%xmm11,%xmm3
372	pshufd	$78,%xmm0,%xmm8
373	pxor	%xmm0,%xmm8
374
375.byte	102,68,15,58,68,231,0
376	xorps	%xmm13,%xmm5
377
378	leaq	64(%rdx),%rdx
379	subq	$0x40,%rcx
380	jnc	.Lmod4_loop
381
382.Ltail4x:
383.byte	102,65,15,58,68,199,0
384.byte	102,65,15,58,68,207,17
385.byte	102,68,15,58,68,199,16
386	xorps	%xmm12,%xmm4
387	xorps	%xmm3,%xmm0
388	xorps	%xmm5,%xmm1
389	pxor	%xmm0,%xmm1
390	pxor	%xmm4,%xmm8
391
392	pxor	%xmm1,%xmm8
393	pxor	%xmm0,%xmm1
394
395	movdqa	%xmm8,%xmm9
396	psrldq	$8,%xmm8
397	pslldq	$8,%xmm9
398	pxor	%xmm8,%xmm1
399	pxor	%xmm9,%xmm0
400
401	movdqa	%xmm0,%xmm4
402	movdqa	%xmm0,%xmm3
403	psllq	$5,%xmm0
404	pxor	%xmm0,%xmm3
405	psllq	$1,%xmm0
406	pxor	%xmm3,%xmm0
407	psllq	$57,%xmm0
408	movdqa	%xmm0,%xmm3
409	pslldq	$8,%xmm0
410	psrldq	$8,%xmm3
411	pxor	%xmm4,%xmm0
412	pxor	%xmm3,%xmm1
413
414
415	movdqa	%xmm0,%xmm4
416	psrlq	$1,%xmm0
417	pxor	%xmm4,%xmm1
418	pxor	%xmm0,%xmm4
419	psrlq	$5,%xmm0
420	pxor	%xmm4,%xmm0
421	psrlq	$1,%xmm0
422	pxor	%xmm1,%xmm0
423	addq	$0x40,%rcx
424	jz	.Ldone
425	movdqu	32(%rsi),%xmm7
426	subq	$0x10,%rcx
427	jz	.Lodd_tail
428.Lskip4x:
429
430
431
432
433
434	movdqu	(%rdx),%xmm8
435	movdqu	16(%rdx),%xmm3
436.byte	102,69,15,56,0,194
437.byte	102,65,15,56,0,218
438	pxor	%xmm8,%xmm0
439
440	movdqa	%xmm3,%xmm5
441	pshufd	$78,%xmm3,%xmm4
442	pxor	%xmm3,%xmm4
443.byte	102,15,58,68,218,0
444.byte	102,15,58,68,234,17
445.byte	102,15,58,68,231,0
446
447	leaq	32(%rdx),%rdx
448	nop
449	subq	$0x20,%rcx
450	jbe	.Leven_tail
451	nop
452	jmp	.Lmod_loop
453
454.align	32
455.Lmod_loop:
456	movdqa	%xmm0,%xmm1
457	movdqa	%xmm4,%xmm8
458	pshufd	$78,%xmm0,%xmm4
459	pxor	%xmm0,%xmm4
460
461.byte	102,15,58,68,198,0
462.byte	102,15,58,68,206,17
463.byte	102,15,58,68,231,16
464
465	pxor	%xmm3,%xmm0
466	pxor	%xmm5,%xmm1
467	movdqu	(%rdx),%xmm9
468	pxor	%xmm0,%xmm8
469.byte	102,69,15,56,0,202
470	movdqu	16(%rdx),%xmm3
471
472	pxor	%xmm1,%xmm8
473	pxor	%xmm9,%xmm1
474	pxor	%xmm8,%xmm4
475.byte	102,65,15,56,0,218
476	movdqa	%xmm4,%xmm8
477	psrldq	$8,%xmm8
478	pslldq	$8,%xmm4
479	pxor	%xmm8,%xmm1
480	pxor	%xmm4,%xmm0
481
482	movdqa	%xmm3,%xmm5
483
484	movdqa	%xmm0,%xmm9
485	movdqa	%xmm0,%xmm8
486	psllq	$5,%xmm0
487	pxor	%xmm0,%xmm8
488.byte	102,15,58,68,218,0
489	psllq	$1,%xmm0
490	pxor	%xmm8,%xmm0
491	psllq	$57,%xmm0
492	movdqa	%xmm0,%xmm8
493	pslldq	$8,%xmm0
494	psrldq	$8,%xmm8
495	pxor	%xmm9,%xmm0
496	pshufd	$78,%xmm5,%xmm4
497	pxor	%xmm8,%xmm1
498	pxor	%xmm5,%xmm4
499
500	movdqa	%xmm0,%xmm9
501	psrlq	$1,%xmm0
502.byte	102,15,58,68,234,17
503	pxor	%xmm9,%xmm1
504	pxor	%xmm0,%xmm9
505	psrlq	$5,%xmm0
506	pxor	%xmm9,%xmm0
507	leaq	32(%rdx),%rdx
508	psrlq	$1,%xmm0
509.byte	102,15,58,68,231,0
510	pxor	%xmm1,%xmm0
511
512	subq	$0x20,%rcx
513	ja	.Lmod_loop
514
515.Leven_tail:
516	movdqa	%xmm0,%xmm1
517	movdqa	%xmm4,%xmm8
518	pshufd	$78,%xmm0,%xmm4
519	pxor	%xmm0,%xmm4
520
521.byte	102,15,58,68,198,0
522.byte	102,15,58,68,206,17
523.byte	102,15,58,68,231,16
524
525	pxor	%xmm3,%xmm0
526	pxor	%xmm5,%xmm1
527	pxor	%xmm0,%xmm8
528	pxor	%xmm1,%xmm8
529	pxor	%xmm8,%xmm4
530	movdqa	%xmm4,%xmm8
531	psrldq	$8,%xmm8
532	pslldq	$8,%xmm4
533	pxor	%xmm8,%xmm1
534	pxor	%xmm4,%xmm0
535
536	movdqa	%xmm0,%xmm4
537	movdqa	%xmm0,%xmm3
538	psllq	$5,%xmm0
539	pxor	%xmm0,%xmm3
540	psllq	$1,%xmm0
541	pxor	%xmm3,%xmm0
542	psllq	$57,%xmm0
543	movdqa	%xmm0,%xmm3
544	pslldq	$8,%xmm0
545	psrldq	$8,%xmm3
546	pxor	%xmm4,%xmm0
547	pxor	%xmm3,%xmm1
548
549
550	movdqa	%xmm0,%xmm4
551	psrlq	$1,%xmm0
552	pxor	%xmm4,%xmm1
553	pxor	%xmm0,%xmm4
554	psrlq	$5,%xmm0
555	pxor	%xmm4,%xmm0
556	psrlq	$1,%xmm0
557	pxor	%xmm1,%xmm0
558	testq	%rcx,%rcx
559	jnz	.Ldone
560
561.Lodd_tail:
562	movdqu	(%rdx),%xmm8
563.byte	102,69,15,56,0,194
564	pxor	%xmm8,%xmm0
565	movdqa	%xmm0,%xmm1
566	pshufd	$78,%xmm0,%xmm3
567	pxor	%xmm0,%xmm3
568.byte	102,15,58,68,194,0
569.byte	102,15,58,68,202,17
570.byte	102,15,58,68,223,0
571	pxor	%xmm0,%xmm3
572	pxor	%xmm1,%xmm3
573
574	movdqa	%xmm3,%xmm4
575	psrldq	$8,%xmm3
576	pslldq	$8,%xmm4
577	pxor	%xmm3,%xmm1
578	pxor	%xmm4,%xmm0
579
580	movdqa	%xmm0,%xmm4
581	movdqa	%xmm0,%xmm3
582	psllq	$5,%xmm0
583	pxor	%xmm0,%xmm3
584	psllq	$1,%xmm0
585	pxor	%xmm3,%xmm0
586	psllq	$57,%xmm0
587	movdqa	%xmm0,%xmm3
588	pslldq	$8,%xmm0
589	psrldq	$8,%xmm3
590	pxor	%xmm4,%xmm0
591	pxor	%xmm3,%xmm1
592
593
594	movdqa	%xmm0,%xmm4
595	psrlq	$1,%xmm0
596	pxor	%xmm4,%xmm1
597	pxor	%xmm0,%xmm4
598	psrlq	$5,%xmm0
599	pxor	%xmm4,%xmm0
600	psrlq	$1,%xmm0
601	pxor	%xmm1,%xmm0
602.Ldone:
603.byte	102,65,15,56,0,194
604	movdqu	%xmm0,(%rdi)
605	ret
606.cfi_endproc
607
608.size	gcm_ghash_clmul,.-gcm_ghash_clmul
609.globl	gcm_init_avx
610.hidden gcm_init_avx
611.type	gcm_init_avx,@function
612.align	32
613gcm_init_avx:
614.cfi_startproc
615
616_CET_ENDBR
617	vzeroupper
618
619	vmovdqu	(%rsi),%xmm2
620	vpshufd	$78,%xmm2,%xmm2
621
622
623	vpshufd	$255,%xmm2,%xmm4
624	vpsrlq	$63,%xmm2,%xmm3
625	vpsllq	$1,%xmm2,%xmm2
626	vpxor	%xmm5,%xmm5,%xmm5
627	vpcmpgtd	%xmm4,%xmm5,%xmm5
628	vpslldq	$8,%xmm3,%xmm3
629	vpor	%xmm3,%xmm2,%xmm2
630
631
632	vpand	.L0x1c2_polynomial(%rip),%xmm5,%xmm5
633	vpxor	%xmm5,%xmm2,%xmm2
634
635	vpunpckhqdq	%xmm2,%xmm2,%xmm6
636	vmovdqa	%xmm2,%xmm0
637	vpxor	%xmm2,%xmm6,%xmm6
638	movq	$4,%r10
639	jmp	.Linit_start_avx
640.align	32
641.Linit_loop_avx:
642	vpalignr	$8,%xmm3,%xmm4,%xmm5
643	vmovdqu	%xmm5,-16(%rdi)
644	vpunpckhqdq	%xmm0,%xmm0,%xmm3
645	vpxor	%xmm0,%xmm3,%xmm3
646	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
647	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
648	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
649	vpxor	%xmm0,%xmm1,%xmm4
650	vpxor	%xmm4,%xmm3,%xmm3
651
652	vpslldq	$8,%xmm3,%xmm4
653	vpsrldq	$8,%xmm3,%xmm3
654	vpxor	%xmm4,%xmm0,%xmm0
655	vpxor	%xmm3,%xmm1,%xmm1
656	vpsllq	$57,%xmm0,%xmm3
657	vpsllq	$62,%xmm0,%xmm4
658	vpxor	%xmm3,%xmm4,%xmm4
659	vpsllq	$63,%xmm0,%xmm3
660	vpxor	%xmm3,%xmm4,%xmm4
661	vpslldq	$8,%xmm4,%xmm3
662	vpsrldq	$8,%xmm4,%xmm4
663	vpxor	%xmm3,%xmm0,%xmm0
664	vpxor	%xmm4,%xmm1,%xmm1
665
666	vpsrlq	$1,%xmm0,%xmm4
667	vpxor	%xmm0,%xmm1,%xmm1
668	vpxor	%xmm4,%xmm0,%xmm0
669	vpsrlq	$5,%xmm4,%xmm4
670	vpxor	%xmm4,%xmm0,%xmm0
671	vpsrlq	$1,%xmm0,%xmm0
672	vpxor	%xmm1,%xmm0,%xmm0
673.Linit_start_avx:
674	vmovdqa	%xmm0,%xmm5
675	vpunpckhqdq	%xmm0,%xmm0,%xmm3
676	vpxor	%xmm0,%xmm3,%xmm3
677	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
678	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
679	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
680	vpxor	%xmm0,%xmm1,%xmm4
681	vpxor	%xmm4,%xmm3,%xmm3
682
683	vpslldq	$8,%xmm3,%xmm4
684	vpsrldq	$8,%xmm3,%xmm3
685	vpxor	%xmm4,%xmm0,%xmm0
686	vpxor	%xmm3,%xmm1,%xmm1
687	vpsllq	$57,%xmm0,%xmm3
688	vpsllq	$62,%xmm0,%xmm4
689	vpxor	%xmm3,%xmm4,%xmm4
690	vpsllq	$63,%xmm0,%xmm3
691	vpxor	%xmm3,%xmm4,%xmm4
692	vpslldq	$8,%xmm4,%xmm3
693	vpsrldq	$8,%xmm4,%xmm4
694	vpxor	%xmm3,%xmm0,%xmm0
695	vpxor	%xmm4,%xmm1,%xmm1
696
697	vpsrlq	$1,%xmm0,%xmm4
698	vpxor	%xmm0,%xmm1,%xmm1
699	vpxor	%xmm4,%xmm0,%xmm0
700	vpsrlq	$5,%xmm4,%xmm4
701	vpxor	%xmm4,%xmm0,%xmm0
702	vpsrlq	$1,%xmm0,%xmm0
703	vpxor	%xmm1,%xmm0,%xmm0
704	vpshufd	$78,%xmm5,%xmm3
705	vpshufd	$78,%xmm0,%xmm4
706	vpxor	%xmm5,%xmm3,%xmm3
707	vmovdqu	%xmm5,0(%rdi)
708	vpxor	%xmm0,%xmm4,%xmm4
709	vmovdqu	%xmm0,16(%rdi)
710	leaq	48(%rdi),%rdi
711	subq	$1,%r10
712	jnz	.Linit_loop_avx
713
714	vpalignr	$8,%xmm4,%xmm3,%xmm5
715	vmovdqu	%xmm5,-16(%rdi)
716
717	vzeroupper
718	ret
719
720.cfi_endproc
721.size	gcm_init_avx,.-gcm_init_avx
722.globl	gcm_gmult_avx
723.hidden gcm_gmult_avx
724.type	gcm_gmult_avx,@function
725.align	32
726gcm_gmult_avx:
727.cfi_startproc
728_CET_ENDBR
729	jmp	.L_gmult_clmul
730.cfi_endproc
731.size	gcm_gmult_avx,.-gcm_gmult_avx
732.globl	gcm_ghash_avx
733.hidden gcm_ghash_avx
734.type	gcm_ghash_avx,@function
735.align	32
736gcm_ghash_avx:
737.cfi_startproc
738
739_CET_ENDBR
740	vzeroupper
741
742	vmovdqu	(%rdi),%xmm10
743	leaq	.L0x1c2_polynomial(%rip),%r10
744	leaq	64(%rsi),%rsi
745	vmovdqu	.Lbswap_mask(%rip),%xmm13
746	vpshufb	%xmm13,%xmm10,%xmm10
747	cmpq	$0x80,%rcx
748	jb	.Lshort_avx
749	subq	$0x80,%rcx
750
751	vmovdqu	112(%rdx),%xmm14
752	vmovdqu	0-64(%rsi),%xmm6
753	vpshufb	%xmm13,%xmm14,%xmm14
754	vmovdqu	32-64(%rsi),%xmm7
755
756	vpunpckhqdq	%xmm14,%xmm14,%xmm9
757	vmovdqu	96(%rdx),%xmm15
758	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
759	vpxor	%xmm14,%xmm9,%xmm9
760	vpshufb	%xmm13,%xmm15,%xmm15
761	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
762	vmovdqu	16-64(%rsi),%xmm6
763	vpunpckhqdq	%xmm15,%xmm15,%xmm8
764	vmovdqu	80(%rdx),%xmm14
765	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
766	vpxor	%xmm15,%xmm8,%xmm8
767
768	vpshufb	%xmm13,%xmm14,%xmm14
769	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
770	vpunpckhqdq	%xmm14,%xmm14,%xmm9
771	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
772	vmovdqu	48-64(%rsi),%xmm6
773	vpxor	%xmm14,%xmm9,%xmm9
774	vmovdqu	64(%rdx),%xmm15
775	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
776	vmovdqu	80-64(%rsi),%xmm7
777
778	vpshufb	%xmm13,%xmm15,%xmm15
779	vpxor	%xmm0,%xmm3,%xmm3
780	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
781	vpxor	%xmm1,%xmm4,%xmm4
782	vpunpckhqdq	%xmm15,%xmm15,%xmm8
783	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
784	vmovdqu	64-64(%rsi),%xmm6
785	vpxor	%xmm2,%xmm5,%xmm5
786	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
787	vpxor	%xmm15,%xmm8,%xmm8
788
789	vmovdqu	48(%rdx),%xmm14
790	vpxor	%xmm3,%xmm0,%xmm0
791	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
792	vpxor	%xmm4,%xmm1,%xmm1
793	vpshufb	%xmm13,%xmm14,%xmm14
794	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
795	vmovdqu	96-64(%rsi),%xmm6
796	vpxor	%xmm5,%xmm2,%xmm2
797	vpunpckhqdq	%xmm14,%xmm14,%xmm9
798	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
799	vmovdqu	128-64(%rsi),%xmm7
800	vpxor	%xmm14,%xmm9,%xmm9
801
802	vmovdqu	32(%rdx),%xmm15
803	vpxor	%xmm0,%xmm3,%xmm3
804	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
805	vpxor	%xmm1,%xmm4,%xmm4
806	vpshufb	%xmm13,%xmm15,%xmm15
807	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
808	vmovdqu	112-64(%rsi),%xmm6
809	vpxor	%xmm2,%xmm5,%xmm5
810	vpunpckhqdq	%xmm15,%xmm15,%xmm8
811	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
812	vpxor	%xmm15,%xmm8,%xmm8
813
814	vmovdqu	16(%rdx),%xmm14
815	vpxor	%xmm3,%xmm0,%xmm0
816	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
817	vpxor	%xmm4,%xmm1,%xmm1
818	vpshufb	%xmm13,%xmm14,%xmm14
819	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
820	vmovdqu	144-64(%rsi),%xmm6
821	vpxor	%xmm5,%xmm2,%xmm2
822	vpunpckhqdq	%xmm14,%xmm14,%xmm9
823	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
824	vmovdqu	176-64(%rsi),%xmm7
825	vpxor	%xmm14,%xmm9,%xmm9
826
827	vmovdqu	(%rdx),%xmm15
828	vpxor	%xmm0,%xmm3,%xmm3
829	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
830	vpxor	%xmm1,%xmm4,%xmm4
831	vpshufb	%xmm13,%xmm15,%xmm15
832	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
833	vmovdqu	160-64(%rsi),%xmm6
834	vpxor	%xmm2,%xmm5,%xmm5
835	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
836
837	leaq	128(%rdx),%rdx
838	cmpq	$0x80,%rcx
839	jb	.Ltail_avx
840
841	vpxor	%xmm10,%xmm15,%xmm15
842	subq	$0x80,%rcx
843	jmp	.Loop8x_avx
844
845.align	32
846.Loop8x_avx:
847	vpunpckhqdq	%xmm15,%xmm15,%xmm8
848	vmovdqu	112(%rdx),%xmm14
849	vpxor	%xmm0,%xmm3,%xmm3
850	vpxor	%xmm15,%xmm8,%xmm8
851	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
852	vpshufb	%xmm13,%xmm14,%xmm14
853	vpxor	%xmm1,%xmm4,%xmm4
854	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
855	vmovdqu	0-64(%rsi),%xmm6
856	vpunpckhqdq	%xmm14,%xmm14,%xmm9
857	vpxor	%xmm2,%xmm5,%xmm5
858	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
859	vmovdqu	32-64(%rsi),%xmm7
860	vpxor	%xmm14,%xmm9,%xmm9
861
862	vmovdqu	96(%rdx),%xmm15
863	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
864	vpxor	%xmm3,%xmm10,%xmm10
865	vpshufb	%xmm13,%xmm15,%xmm15
866	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
867	vxorps	%xmm4,%xmm11,%xmm11
868	vmovdqu	16-64(%rsi),%xmm6
869	vpunpckhqdq	%xmm15,%xmm15,%xmm8
870	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
871	vpxor	%xmm5,%xmm12,%xmm12
872	vxorps	%xmm15,%xmm8,%xmm8
873
874	vmovdqu	80(%rdx),%xmm14
875	vpxor	%xmm10,%xmm12,%xmm12
876	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
877	vpxor	%xmm11,%xmm12,%xmm12
878	vpslldq	$8,%xmm12,%xmm9
879	vpxor	%xmm0,%xmm3,%xmm3
880	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
881	vpsrldq	$8,%xmm12,%xmm12
882	vpxor	%xmm9,%xmm10,%xmm10
883	vmovdqu	48-64(%rsi),%xmm6
884	vpshufb	%xmm13,%xmm14,%xmm14
885	vxorps	%xmm12,%xmm11,%xmm11
886	vpxor	%xmm1,%xmm4,%xmm4
887	vpunpckhqdq	%xmm14,%xmm14,%xmm9
888	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
889	vmovdqu	80-64(%rsi),%xmm7
890	vpxor	%xmm14,%xmm9,%xmm9
891	vpxor	%xmm2,%xmm5,%xmm5
892
893	vmovdqu	64(%rdx),%xmm15
894	vpalignr	$8,%xmm10,%xmm10,%xmm12
895	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
896	vpshufb	%xmm13,%xmm15,%xmm15
897	vpxor	%xmm3,%xmm0,%xmm0
898	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
899	vmovdqu	64-64(%rsi),%xmm6
900	vpunpckhqdq	%xmm15,%xmm15,%xmm8
901	vpxor	%xmm4,%xmm1,%xmm1
902	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
903	vxorps	%xmm15,%xmm8,%xmm8
904	vpxor	%xmm5,%xmm2,%xmm2
905
906	vmovdqu	48(%rdx),%xmm14
907	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
908	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
909	vpshufb	%xmm13,%xmm14,%xmm14
910	vpxor	%xmm0,%xmm3,%xmm3
911	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
912	vmovdqu	96-64(%rsi),%xmm6
913	vpunpckhqdq	%xmm14,%xmm14,%xmm9
914	vpxor	%xmm1,%xmm4,%xmm4
915	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
916	vmovdqu	128-64(%rsi),%xmm7
917	vpxor	%xmm14,%xmm9,%xmm9
918	vpxor	%xmm2,%xmm5,%xmm5
919
920	vmovdqu	32(%rdx),%xmm15
921	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
922	vpshufb	%xmm13,%xmm15,%xmm15
923	vpxor	%xmm3,%xmm0,%xmm0
924	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
925	vmovdqu	112-64(%rsi),%xmm6
926	vpunpckhqdq	%xmm15,%xmm15,%xmm8
927	vpxor	%xmm4,%xmm1,%xmm1
928	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
929	vpxor	%xmm15,%xmm8,%xmm8
930	vpxor	%xmm5,%xmm2,%xmm2
931	vxorps	%xmm12,%xmm10,%xmm10
932
933	vmovdqu	16(%rdx),%xmm14
934	vpalignr	$8,%xmm10,%xmm10,%xmm12
935	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
936	vpshufb	%xmm13,%xmm14,%xmm14
937	vpxor	%xmm0,%xmm3,%xmm3
938	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
939	vmovdqu	144-64(%rsi),%xmm6
940	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
941	vxorps	%xmm11,%xmm12,%xmm12
942	vpunpckhqdq	%xmm14,%xmm14,%xmm9
943	vpxor	%xmm1,%xmm4,%xmm4
944	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
945	vmovdqu	176-64(%rsi),%xmm7
946	vpxor	%xmm14,%xmm9,%xmm9
947	vpxor	%xmm2,%xmm5,%xmm5
948
949	vmovdqu	(%rdx),%xmm15
950	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
951	vpshufb	%xmm13,%xmm15,%xmm15
952	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
953	vmovdqu	160-64(%rsi),%xmm6
954	vpxor	%xmm12,%xmm15,%xmm15
955	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
956	vpxor	%xmm10,%xmm15,%xmm15
957
958	leaq	128(%rdx),%rdx
959	subq	$0x80,%rcx
960	jnc	.Loop8x_avx
961
962	addq	$0x80,%rcx
963	jmp	.Ltail_no_xor_avx
964
965.align	32
966.Lshort_avx:
967	vmovdqu	-16(%rdx,%rcx,1),%xmm14
968	leaq	(%rdx,%rcx,1),%rdx
969	vmovdqu	0-64(%rsi),%xmm6
970	vmovdqu	32-64(%rsi),%xmm7
971	vpshufb	%xmm13,%xmm14,%xmm15
972
973	vmovdqa	%xmm0,%xmm3
974	vmovdqa	%xmm1,%xmm4
975	vmovdqa	%xmm2,%xmm5
976	subq	$0x10,%rcx
977	jz	.Ltail_avx
978
979	vpunpckhqdq	%xmm15,%xmm15,%xmm8
980	vpxor	%xmm0,%xmm3,%xmm3
981	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
982	vpxor	%xmm15,%xmm8,%xmm8
983	vmovdqu	-32(%rdx),%xmm14
984	vpxor	%xmm1,%xmm4,%xmm4
985	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
986	vmovdqu	16-64(%rsi),%xmm6
987	vpshufb	%xmm13,%xmm14,%xmm15
988	vpxor	%xmm2,%xmm5,%xmm5
989	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
990	vpsrldq	$8,%xmm7,%xmm7
991	subq	$0x10,%rcx
992	jz	.Ltail_avx
993
994	vpunpckhqdq	%xmm15,%xmm15,%xmm8
995	vpxor	%xmm0,%xmm3,%xmm3
996	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
997	vpxor	%xmm15,%xmm8,%xmm8
998	vmovdqu	-48(%rdx),%xmm14
999	vpxor	%xmm1,%xmm4,%xmm4
1000	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1001	vmovdqu	48-64(%rsi),%xmm6
1002	vpshufb	%xmm13,%xmm14,%xmm15
1003	vpxor	%xmm2,%xmm5,%xmm5
1004	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1005	vmovdqu	80-64(%rsi),%xmm7
1006	subq	$0x10,%rcx
1007	jz	.Ltail_avx
1008
1009	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1010	vpxor	%xmm0,%xmm3,%xmm3
1011	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1012	vpxor	%xmm15,%xmm8,%xmm8
1013	vmovdqu	-64(%rdx),%xmm14
1014	vpxor	%xmm1,%xmm4,%xmm4
1015	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1016	vmovdqu	64-64(%rsi),%xmm6
1017	vpshufb	%xmm13,%xmm14,%xmm15
1018	vpxor	%xmm2,%xmm5,%xmm5
1019	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1020	vpsrldq	$8,%xmm7,%xmm7
1021	subq	$0x10,%rcx
1022	jz	.Ltail_avx
1023
1024	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1025	vpxor	%xmm0,%xmm3,%xmm3
1026	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1027	vpxor	%xmm15,%xmm8,%xmm8
1028	vmovdqu	-80(%rdx),%xmm14
1029	vpxor	%xmm1,%xmm4,%xmm4
1030	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1031	vmovdqu	96-64(%rsi),%xmm6
1032	vpshufb	%xmm13,%xmm14,%xmm15
1033	vpxor	%xmm2,%xmm5,%xmm5
1034	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1035	vmovdqu	128-64(%rsi),%xmm7
1036	subq	$0x10,%rcx
1037	jz	.Ltail_avx
1038
1039	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1040	vpxor	%xmm0,%xmm3,%xmm3
1041	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1042	vpxor	%xmm15,%xmm8,%xmm8
1043	vmovdqu	-96(%rdx),%xmm14
1044	vpxor	%xmm1,%xmm4,%xmm4
1045	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1046	vmovdqu	112-64(%rsi),%xmm6
1047	vpshufb	%xmm13,%xmm14,%xmm15
1048	vpxor	%xmm2,%xmm5,%xmm5
1049	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1050	vpsrldq	$8,%xmm7,%xmm7
1051	subq	$0x10,%rcx
1052	jz	.Ltail_avx
1053
1054	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1055	vpxor	%xmm0,%xmm3,%xmm3
1056	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1057	vpxor	%xmm15,%xmm8,%xmm8
1058	vmovdqu	-112(%rdx),%xmm14
1059	vpxor	%xmm1,%xmm4,%xmm4
1060	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1061	vmovdqu	144-64(%rsi),%xmm6
1062	vpshufb	%xmm13,%xmm14,%xmm15
1063	vpxor	%xmm2,%xmm5,%xmm5
1064	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1065	vmovq	184-64(%rsi),%xmm7
1066	subq	$0x10,%rcx
1067	jmp	.Ltail_avx
1068
1069.align	32
1070.Ltail_avx:
1071	vpxor	%xmm10,%xmm15,%xmm15
1072.Ltail_no_xor_avx:
1073	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1074	vpxor	%xmm0,%xmm3,%xmm3
1075	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1076	vpxor	%xmm15,%xmm8,%xmm8
1077	vpxor	%xmm1,%xmm4,%xmm4
1078	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1079	vpxor	%xmm2,%xmm5,%xmm5
1080	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1081
1082	vmovdqu	(%r10),%xmm12
1083
1084	vpxor	%xmm0,%xmm3,%xmm10
1085	vpxor	%xmm1,%xmm4,%xmm11
1086	vpxor	%xmm2,%xmm5,%xmm5
1087
1088	vpxor	%xmm10,%xmm5,%xmm5
1089	vpxor	%xmm11,%xmm5,%xmm5
1090	vpslldq	$8,%xmm5,%xmm9
1091	vpsrldq	$8,%xmm5,%xmm5
1092	vpxor	%xmm9,%xmm10,%xmm10
1093	vpxor	%xmm5,%xmm11,%xmm11
1094
1095	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1096	vpalignr	$8,%xmm10,%xmm10,%xmm10
1097	vpxor	%xmm9,%xmm10,%xmm10
1098
1099	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1100	vpalignr	$8,%xmm10,%xmm10,%xmm10
1101	vpxor	%xmm11,%xmm10,%xmm10
1102	vpxor	%xmm9,%xmm10,%xmm10
1103
1104	cmpq	$0,%rcx
1105	jne	.Lshort_avx
1106
1107	vpshufb	%xmm13,%xmm10,%xmm10
1108	vmovdqu	%xmm10,(%rdi)
1109	vzeroupper
1110	ret
1111.cfi_endproc
1112
1113.size	gcm_ghash_avx,.-gcm_ghash_avx
1114.section	.rodata
1115.align	64
1116.Lbswap_mask:
1117.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1118.L0x1c2_polynomial:
1119.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1120.L7_mask:
1121.long	7,0,7,0
1122.align	64
1123
1124.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1125.align	64
1126.text
1127#endif
1128