xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-apple.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
7.text
8
9
10chacha20_poly1305_constants:
11
12.section	__DATA,__const
13.p2align	6
14L$chacha20_consts:
15.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
16.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
17L$rol8:
18.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
19.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
20L$rol16:
21.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
22.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
23L$avx2_init:
24.long	0,0,0,0
25L$sse_inc:
26.long	1,0,0,0
27L$avx2_inc:
28.long	2,0,0,0,2,0,0,0
29L$clamp:
30.quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
31.quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
32.p2align	4
33L$and_masks:
34.byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
35.byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
36.byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
37.byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
38.byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
39.byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
40.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
41.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
42.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
44.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
45.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
46.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
47.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
48.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
49.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
50.text
51
52
53.p2align	6
54poly_hash_ad_internal:
55
56
57	xorq	%r10,%r10
58	xorq	%r11,%r11
59	xorq	%r12,%r12
60	cmpq	$13,%r8
61	jne	L$hash_ad_loop
62L$poly_fast_tls_ad:
63
64	movq	(%rcx),%r10
65	movq	5(%rcx),%r11
66	shrq	$24,%r11
67	movq	$1,%r12
68	movq	0+0+0(%rbp),%rax
69	movq	%rax,%r15
70	mulq	%r10
71	movq	%rax,%r13
72	movq	%rdx,%r14
73	movq	0+0+0(%rbp),%rax
74	mulq	%r11
75	imulq	%r12,%r15
76	addq	%rax,%r14
77	adcq	%rdx,%r15
78	movq	8+0+0(%rbp),%rax
79	movq	%rax,%r9
80	mulq	%r10
81	addq	%rax,%r14
82	adcq	$0,%rdx
83	movq	%rdx,%r10
84	movq	8+0+0(%rbp),%rax
85	mulq	%r11
86	addq	%rax,%r15
87	adcq	$0,%rdx
88	imulq	%r12,%r9
89	addq	%r10,%r15
90	adcq	%rdx,%r9
91	movq	%r13,%r10
92	movq	%r14,%r11
93	movq	%r15,%r12
94	andq	$3,%r12
95	movq	%r15,%r13
96	andq	$-4,%r13
97	movq	%r9,%r14
98	shrdq	$2,%r9,%r15
99	shrq	$2,%r9
100	addq	%r13,%r15
101	adcq	%r14,%r9
102	addq	%r15,%r10
103	adcq	%r9,%r11
104	adcq	$0,%r12
105
106	ret
107L$hash_ad_loop:
108
109	cmpq	$16,%r8
110	jb	L$hash_ad_tail
111	addq	0+0(%rcx),%r10
112	adcq	8+0(%rcx),%r11
113	adcq	$1,%r12
114	movq	0+0+0(%rbp),%rax
115	movq	%rax,%r15
116	mulq	%r10
117	movq	%rax,%r13
118	movq	%rdx,%r14
119	movq	0+0+0(%rbp),%rax
120	mulq	%r11
121	imulq	%r12,%r15
122	addq	%rax,%r14
123	adcq	%rdx,%r15
124	movq	8+0+0(%rbp),%rax
125	movq	%rax,%r9
126	mulq	%r10
127	addq	%rax,%r14
128	adcq	$0,%rdx
129	movq	%rdx,%r10
130	movq	8+0+0(%rbp),%rax
131	mulq	%r11
132	addq	%rax,%r15
133	adcq	$0,%rdx
134	imulq	%r12,%r9
135	addq	%r10,%r15
136	adcq	%rdx,%r9
137	movq	%r13,%r10
138	movq	%r14,%r11
139	movq	%r15,%r12
140	andq	$3,%r12
141	movq	%r15,%r13
142	andq	$-4,%r13
143	movq	%r9,%r14
144	shrdq	$2,%r9,%r15
145	shrq	$2,%r9
146	addq	%r13,%r15
147	adcq	%r14,%r9
148	addq	%r15,%r10
149	adcq	%r9,%r11
150	adcq	$0,%r12
151
152	leaq	16(%rcx),%rcx
153	subq	$16,%r8
154	jmp	L$hash_ad_loop
155L$hash_ad_tail:
156	cmpq	$0,%r8
157	je	L$hash_ad_done
158
159	xorq	%r13,%r13
160	xorq	%r14,%r14
161	xorq	%r15,%r15
162	addq	%r8,%rcx
163L$hash_ad_tail_loop:
164	shldq	$8,%r13,%r14
165	shlq	$8,%r13
166	movzbq	-1(%rcx),%r15
167	xorq	%r15,%r13
168	decq	%rcx
169	decq	%r8
170	jne	L$hash_ad_tail_loop
171
172	addq	%r13,%r10
173	adcq	%r14,%r11
174	adcq	$1,%r12
175	movq	0+0+0(%rbp),%rax
176	movq	%rax,%r15
177	mulq	%r10
178	movq	%rax,%r13
179	movq	%rdx,%r14
180	movq	0+0+0(%rbp),%rax
181	mulq	%r11
182	imulq	%r12,%r15
183	addq	%rax,%r14
184	adcq	%rdx,%r15
185	movq	8+0+0(%rbp),%rax
186	movq	%rax,%r9
187	mulq	%r10
188	addq	%rax,%r14
189	adcq	$0,%rdx
190	movq	%rdx,%r10
191	movq	8+0+0(%rbp),%rax
192	mulq	%r11
193	addq	%rax,%r15
194	adcq	$0,%rdx
195	imulq	%r12,%r9
196	addq	%r10,%r15
197	adcq	%rdx,%r9
198	movq	%r13,%r10
199	movq	%r14,%r11
200	movq	%r15,%r12
201	andq	$3,%r12
202	movq	%r15,%r13
203	andq	$-4,%r13
204	movq	%r9,%r14
205	shrdq	$2,%r9,%r15
206	shrq	$2,%r9
207	addq	%r13,%r15
208	adcq	%r14,%r9
209	addq	%r15,%r10
210	adcq	%r9,%r11
211	adcq	$0,%r12
212
213
214L$hash_ad_done:
215	ret
216
217
218
219.globl	_chacha20_poly1305_open
220.private_extern _chacha20_poly1305_open
221
222.p2align	6
223_chacha20_poly1305_open:
224
225_CET_ENDBR
226	pushq	%rbp
227
228	pushq	%rbx
229
230	pushq	%r12
231
232	pushq	%r13
233
234	pushq	%r14
235
236	pushq	%r15
237
238
239
240	pushq	%r9
241
242	subq	$288 + 0 + 32,%rsp
243
244
245	leaq	32(%rsp),%rbp
246	andq	$-32,%rbp
247
248	movq	%rdx,%rbx
249	movq	%r8,0+0+32(%rbp)
250	movq	%rbx,8+0+32(%rbp)
251
252	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
253	andl	$288,%eax
254	xorl	$288,%eax
255	jz	chacha20_poly1305_open_avx2
256
257	cmpq	$128,%rbx
258	jbe	L$open_sse_128
259
260	movdqa	L$chacha20_consts(%rip),%xmm0
261	movdqu	0(%r9),%xmm4
262	movdqu	16(%r9),%xmm8
263	movdqu	32(%r9),%xmm12
264
265	movdqa	%xmm12,%xmm7
266
267	movdqa	%xmm4,0+48(%rbp)
268	movdqa	%xmm8,0+64(%rbp)
269	movdqa	%xmm12,0+96(%rbp)
270	movq	$10,%r10
271L$open_sse_init_rounds:
272	paddd	%xmm4,%xmm0
273	pxor	%xmm0,%xmm12
274	pshufb	L$rol16(%rip),%xmm12
275	paddd	%xmm12,%xmm8
276	pxor	%xmm8,%xmm4
277	movdqa	%xmm4,%xmm3
278	pslld	$12,%xmm3
279	psrld	$20,%xmm4
280	pxor	%xmm3,%xmm4
281	paddd	%xmm4,%xmm0
282	pxor	%xmm0,%xmm12
283	pshufb	L$rol8(%rip),%xmm12
284	paddd	%xmm12,%xmm8
285	pxor	%xmm8,%xmm4
286	movdqa	%xmm4,%xmm3
287	pslld	$7,%xmm3
288	psrld	$25,%xmm4
289	pxor	%xmm3,%xmm4
290.byte	102,15,58,15,228,4
291.byte	102,69,15,58,15,192,8
292.byte	102,69,15,58,15,228,12
293	paddd	%xmm4,%xmm0
294	pxor	%xmm0,%xmm12
295	pshufb	L$rol16(%rip),%xmm12
296	paddd	%xmm12,%xmm8
297	pxor	%xmm8,%xmm4
298	movdqa	%xmm4,%xmm3
299	pslld	$12,%xmm3
300	psrld	$20,%xmm4
301	pxor	%xmm3,%xmm4
302	paddd	%xmm4,%xmm0
303	pxor	%xmm0,%xmm12
304	pshufb	L$rol8(%rip),%xmm12
305	paddd	%xmm12,%xmm8
306	pxor	%xmm8,%xmm4
307	movdqa	%xmm4,%xmm3
308	pslld	$7,%xmm3
309	psrld	$25,%xmm4
310	pxor	%xmm3,%xmm4
311.byte	102,15,58,15,228,12
312.byte	102,69,15,58,15,192,8
313.byte	102,69,15,58,15,228,4
314
315	decq	%r10
316	jne	L$open_sse_init_rounds
317
318	paddd	L$chacha20_consts(%rip),%xmm0
319	paddd	0+48(%rbp),%xmm4
320
321	pand	L$clamp(%rip),%xmm0
322	movdqa	%xmm0,0+0(%rbp)
323	movdqa	%xmm4,0+16(%rbp)
324
325	movq	%r8,%r8
326	call	poly_hash_ad_internal
327L$open_sse_main_loop:
328	cmpq	$256,%rbx
329	jb	L$open_sse_tail
330
331	movdqa	L$chacha20_consts(%rip),%xmm0
332	movdqa	0+48(%rbp),%xmm4
333	movdqa	0+64(%rbp),%xmm8
334	movdqa	%xmm0,%xmm1
335	movdqa	%xmm4,%xmm5
336	movdqa	%xmm8,%xmm9
337	movdqa	%xmm0,%xmm2
338	movdqa	%xmm4,%xmm6
339	movdqa	%xmm8,%xmm10
340	movdqa	%xmm0,%xmm3
341	movdqa	%xmm4,%xmm7
342	movdqa	%xmm8,%xmm11
343	movdqa	0+96(%rbp),%xmm15
344	paddd	L$sse_inc(%rip),%xmm15
345	movdqa	%xmm15,%xmm14
346	paddd	L$sse_inc(%rip),%xmm14
347	movdqa	%xmm14,%xmm13
348	paddd	L$sse_inc(%rip),%xmm13
349	movdqa	%xmm13,%xmm12
350	paddd	L$sse_inc(%rip),%xmm12
351	movdqa	%xmm12,0+96(%rbp)
352	movdqa	%xmm13,0+112(%rbp)
353	movdqa	%xmm14,0+128(%rbp)
354	movdqa	%xmm15,0+144(%rbp)
355
356
357
358	movq	$4,%rcx
359	movq	%rsi,%r8
360L$open_sse_main_loop_rounds:
361	movdqa	%xmm8,0+80(%rbp)
362	movdqa	L$rol16(%rip),%xmm8
363	paddd	%xmm7,%xmm3
364	paddd	%xmm6,%xmm2
365	paddd	%xmm5,%xmm1
366	paddd	%xmm4,%xmm0
367	pxor	%xmm3,%xmm15
368	pxor	%xmm2,%xmm14
369	pxor	%xmm1,%xmm13
370	pxor	%xmm0,%xmm12
371.byte	102,69,15,56,0,248
372.byte	102,69,15,56,0,240
373.byte	102,69,15,56,0,232
374.byte	102,69,15,56,0,224
375	movdqa	0+80(%rbp),%xmm8
376	paddd	%xmm15,%xmm11
377	paddd	%xmm14,%xmm10
378	paddd	%xmm13,%xmm9
379	paddd	%xmm12,%xmm8
380	pxor	%xmm11,%xmm7
381	addq	0+0(%r8),%r10
382	adcq	8+0(%r8),%r11
383	adcq	$1,%r12
384
385	leaq	16(%r8),%r8
386	pxor	%xmm10,%xmm6
387	pxor	%xmm9,%xmm5
388	pxor	%xmm8,%xmm4
389	movdqa	%xmm8,0+80(%rbp)
390	movdqa	%xmm7,%xmm8
391	psrld	$20,%xmm8
392	pslld	$32-20,%xmm7
393	pxor	%xmm8,%xmm7
394	movdqa	%xmm6,%xmm8
395	psrld	$20,%xmm8
396	pslld	$32-20,%xmm6
397	pxor	%xmm8,%xmm6
398	movdqa	%xmm5,%xmm8
399	psrld	$20,%xmm8
400	pslld	$32-20,%xmm5
401	pxor	%xmm8,%xmm5
402	movdqa	%xmm4,%xmm8
403	psrld	$20,%xmm8
404	pslld	$32-20,%xmm4
405	pxor	%xmm8,%xmm4
406	movq	0+0+0(%rbp),%rax
407	movq	%rax,%r15
408	mulq	%r10
409	movq	%rax,%r13
410	movq	%rdx,%r14
411	movq	0+0+0(%rbp),%rax
412	mulq	%r11
413	imulq	%r12,%r15
414	addq	%rax,%r14
415	adcq	%rdx,%r15
416	movdqa	L$rol8(%rip),%xmm8
417	paddd	%xmm7,%xmm3
418	paddd	%xmm6,%xmm2
419	paddd	%xmm5,%xmm1
420	paddd	%xmm4,%xmm0
421	pxor	%xmm3,%xmm15
422	pxor	%xmm2,%xmm14
423	pxor	%xmm1,%xmm13
424	pxor	%xmm0,%xmm12
425.byte	102,69,15,56,0,248
426.byte	102,69,15,56,0,240
427.byte	102,69,15,56,0,232
428.byte	102,69,15,56,0,224
429	movdqa	0+80(%rbp),%xmm8
430	paddd	%xmm15,%xmm11
431	paddd	%xmm14,%xmm10
432	paddd	%xmm13,%xmm9
433	paddd	%xmm12,%xmm8
434	pxor	%xmm11,%xmm7
435	pxor	%xmm10,%xmm6
436	movq	8+0+0(%rbp),%rax
437	movq	%rax,%r9
438	mulq	%r10
439	addq	%rax,%r14
440	adcq	$0,%rdx
441	movq	%rdx,%r10
442	movq	8+0+0(%rbp),%rax
443	mulq	%r11
444	addq	%rax,%r15
445	adcq	$0,%rdx
446	pxor	%xmm9,%xmm5
447	pxor	%xmm8,%xmm4
448	movdqa	%xmm8,0+80(%rbp)
449	movdqa	%xmm7,%xmm8
450	psrld	$25,%xmm8
451	pslld	$32-25,%xmm7
452	pxor	%xmm8,%xmm7
453	movdqa	%xmm6,%xmm8
454	psrld	$25,%xmm8
455	pslld	$32-25,%xmm6
456	pxor	%xmm8,%xmm6
457	movdqa	%xmm5,%xmm8
458	psrld	$25,%xmm8
459	pslld	$32-25,%xmm5
460	pxor	%xmm8,%xmm5
461	movdqa	%xmm4,%xmm8
462	psrld	$25,%xmm8
463	pslld	$32-25,%xmm4
464	pxor	%xmm8,%xmm4
465	movdqa	0+80(%rbp),%xmm8
466	imulq	%r12,%r9
467	addq	%r10,%r15
468	adcq	%rdx,%r9
469.byte	102,15,58,15,255,4
470.byte	102,69,15,58,15,219,8
471.byte	102,69,15,58,15,255,12
472.byte	102,15,58,15,246,4
473.byte	102,69,15,58,15,210,8
474.byte	102,69,15,58,15,246,12
475.byte	102,15,58,15,237,4
476.byte	102,69,15,58,15,201,8
477.byte	102,69,15,58,15,237,12
478.byte	102,15,58,15,228,4
479.byte	102,69,15,58,15,192,8
480.byte	102,69,15,58,15,228,12
481	movdqa	%xmm8,0+80(%rbp)
482	movdqa	L$rol16(%rip),%xmm8
483	paddd	%xmm7,%xmm3
484	paddd	%xmm6,%xmm2
485	paddd	%xmm5,%xmm1
486	paddd	%xmm4,%xmm0
487	pxor	%xmm3,%xmm15
488	pxor	%xmm2,%xmm14
489	movq	%r13,%r10
490	movq	%r14,%r11
491	movq	%r15,%r12
492	andq	$3,%r12
493	movq	%r15,%r13
494	andq	$-4,%r13
495	movq	%r9,%r14
496	shrdq	$2,%r9,%r15
497	shrq	$2,%r9
498	addq	%r13,%r15
499	adcq	%r14,%r9
500	addq	%r15,%r10
501	adcq	%r9,%r11
502	adcq	$0,%r12
503	pxor	%xmm1,%xmm13
504	pxor	%xmm0,%xmm12
505.byte	102,69,15,56,0,248
506.byte	102,69,15,56,0,240
507.byte	102,69,15,56,0,232
508.byte	102,69,15,56,0,224
509	movdqa	0+80(%rbp),%xmm8
510	paddd	%xmm15,%xmm11
511	paddd	%xmm14,%xmm10
512	paddd	%xmm13,%xmm9
513	paddd	%xmm12,%xmm8
514	pxor	%xmm11,%xmm7
515	pxor	%xmm10,%xmm6
516	pxor	%xmm9,%xmm5
517	pxor	%xmm8,%xmm4
518	movdqa	%xmm8,0+80(%rbp)
519	movdqa	%xmm7,%xmm8
520	psrld	$20,%xmm8
521	pslld	$32-20,%xmm7
522	pxor	%xmm8,%xmm7
523	movdqa	%xmm6,%xmm8
524	psrld	$20,%xmm8
525	pslld	$32-20,%xmm6
526	pxor	%xmm8,%xmm6
527	movdqa	%xmm5,%xmm8
528	psrld	$20,%xmm8
529	pslld	$32-20,%xmm5
530	pxor	%xmm8,%xmm5
531	movdqa	%xmm4,%xmm8
532	psrld	$20,%xmm8
533	pslld	$32-20,%xmm4
534	pxor	%xmm8,%xmm4
535	movdqa	L$rol8(%rip),%xmm8
536	paddd	%xmm7,%xmm3
537	paddd	%xmm6,%xmm2
538	paddd	%xmm5,%xmm1
539	paddd	%xmm4,%xmm0
540	pxor	%xmm3,%xmm15
541	pxor	%xmm2,%xmm14
542	pxor	%xmm1,%xmm13
543	pxor	%xmm0,%xmm12
544.byte	102,69,15,56,0,248
545.byte	102,69,15,56,0,240
546.byte	102,69,15,56,0,232
547.byte	102,69,15,56,0,224
548	movdqa	0+80(%rbp),%xmm8
549	paddd	%xmm15,%xmm11
550	paddd	%xmm14,%xmm10
551	paddd	%xmm13,%xmm9
552	paddd	%xmm12,%xmm8
553	pxor	%xmm11,%xmm7
554	pxor	%xmm10,%xmm6
555	pxor	%xmm9,%xmm5
556	pxor	%xmm8,%xmm4
557	movdqa	%xmm8,0+80(%rbp)
558	movdqa	%xmm7,%xmm8
559	psrld	$25,%xmm8
560	pslld	$32-25,%xmm7
561	pxor	%xmm8,%xmm7
562	movdqa	%xmm6,%xmm8
563	psrld	$25,%xmm8
564	pslld	$32-25,%xmm6
565	pxor	%xmm8,%xmm6
566	movdqa	%xmm5,%xmm8
567	psrld	$25,%xmm8
568	pslld	$32-25,%xmm5
569	pxor	%xmm8,%xmm5
570	movdqa	%xmm4,%xmm8
571	psrld	$25,%xmm8
572	pslld	$32-25,%xmm4
573	pxor	%xmm8,%xmm4
574	movdqa	0+80(%rbp),%xmm8
575.byte	102,15,58,15,255,12
576.byte	102,69,15,58,15,219,8
577.byte	102,69,15,58,15,255,4
578.byte	102,15,58,15,246,12
579.byte	102,69,15,58,15,210,8
580.byte	102,69,15,58,15,246,4
581.byte	102,15,58,15,237,12
582.byte	102,69,15,58,15,201,8
583.byte	102,69,15,58,15,237,4
584.byte	102,15,58,15,228,12
585.byte	102,69,15,58,15,192,8
586.byte	102,69,15,58,15,228,4
587
588	decq	%rcx
589	jge	L$open_sse_main_loop_rounds
590	addq	0+0(%r8),%r10
591	adcq	8+0(%r8),%r11
592	adcq	$1,%r12
593	movq	0+0+0(%rbp),%rax
594	movq	%rax,%r15
595	mulq	%r10
596	movq	%rax,%r13
597	movq	%rdx,%r14
598	movq	0+0+0(%rbp),%rax
599	mulq	%r11
600	imulq	%r12,%r15
601	addq	%rax,%r14
602	adcq	%rdx,%r15
603	movq	8+0+0(%rbp),%rax
604	movq	%rax,%r9
605	mulq	%r10
606	addq	%rax,%r14
607	adcq	$0,%rdx
608	movq	%rdx,%r10
609	movq	8+0+0(%rbp),%rax
610	mulq	%r11
611	addq	%rax,%r15
612	adcq	$0,%rdx
613	imulq	%r12,%r9
614	addq	%r10,%r15
615	adcq	%rdx,%r9
616	movq	%r13,%r10
617	movq	%r14,%r11
618	movq	%r15,%r12
619	andq	$3,%r12
620	movq	%r15,%r13
621	andq	$-4,%r13
622	movq	%r9,%r14
623	shrdq	$2,%r9,%r15
624	shrq	$2,%r9
625	addq	%r13,%r15
626	adcq	%r14,%r9
627	addq	%r15,%r10
628	adcq	%r9,%r11
629	adcq	$0,%r12
630
631	leaq	16(%r8),%r8
632	cmpq	$-6,%rcx
633	jg	L$open_sse_main_loop_rounds
634	paddd	L$chacha20_consts(%rip),%xmm3
635	paddd	0+48(%rbp),%xmm7
636	paddd	0+64(%rbp),%xmm11
637	paddd	0+144(%rbp),%xmm15
638	paddd	L$chacha20_consts(%rip),%xmm2
639	paddd	0+48(%rbp),%xmm6
640	paddd	0+64(%rbp),%xmm10
641	paddd	0+128(%rbp),%xmm14
642	paddd	L$chacha20_consts(%rip),%xmm1
643	paddd	0+48(%rbp),%xmm5
644	paddd	0+64(%rbp),%xmm9
645	paddd	0+112(%rbp),%xmm13
646	paddd	L$chacha20_consts(%rip),%xmm0
647	paddd	0+48(%rbp),%xmm4
648	paddd	0+64(%rbp),%xmm8
649	paddd	0+96(%rbp),%xmm12
650	movdqa	%xmm12,0+80(%rbp)
651	movdqu	0 + 0(%rsi),%xmm12
652	pxor	%xmm3,%xmm12
653	movdqu	%xmm12,0 + 0(%rdi)
654	movdqu	16 + 0(%rsi),%xmm12
655	pxor	%xmm7,%xmm12
656	movdqu	%xmm12,16 + 0(%rdi)
657	movdqu	32 + 0(%rsi),%xmm12
658	pxor	%xmm11,%xmm12
659	movdqu	%xmm12,32 + 0(%rdi)
660	movdqu	48 + 0(%rsi),%xmm12
661	pxor	%xmm15,%xmm12
662	movdqu	%xmm12,48 + 0(%rdi)
663	movdqu	0 + 64(%rsi),%xmm3
664	movdqu	16 + 64(%rsi),%xmm7
665	movdqu	32 + 64(%rsi),%xmm11
666	movdqu	48 + 64(%rsi),%xmm15
667	pxor	%xmm3,%xmm2
668	pxor	%xmm7,%xmm6
669	pxor	%xmm11,%xmm10
670	pxor	%xmm14,%xmm15
671	movdqu	%xmm2,0 + 64(%rdi)
672	movdqu	%xmm6,16 + 64(%rdi)
673	movdqu	%xmm10,32 + 64(%rdi)
674	movdqu	%xmm15,48 + 64(%rdi)
675	movdqu	0 + 128(%rsi),%xmm3
676	movdqu	16 + 128(%rsi),%xmm7
677	movdqu	32 + 128(%rsi),%xmm11
678	movdqu	48 + 128(%rsi),%xmm15
679	pxor	%xmm3,%xmm1
680	pxor	%xmm7,%xmm5
681	pxor	%xmm11,%xmm9
682	pxor	%xmm13,%xmm15
683	movdqu	%xmm1,0 + 128(%rdi)
684	movdqu	%xmm5,16 + 128(%rdi)
685	movdqu	%xmm9,32 + 128(%rdi)
686	movdqu	%xmm15,48 + 128(%rdi)
687	movdqu	0 + 192(%rsi),%xmm3
688	movdqu	16 + 192(%rsi),%xmm7
689	movdqu	32 + 192(%rsi),%xmm11
690	movdqu	48 + 192(%rsi),%xmm15
691	pxor	%xmm3,%xmm0
692	pxor	%xmm7,%xmm4
693	pxor	%xmm11,%xmm8
694	pxor	0+80(%rbp),%xmm15
695	movdqu	%xmm0,0 + 192(%rdi)
696	movdqu	%xmm4,16 + 192(%rdi)
697	movdqu	%xmm8,32 + 192(%rdi)
698	movdqu	%xmm15,48 + 192(%rdi)
699
700	leaq	256(%rsi),%rsi
701	leaq	256(%rdi),%rdi
702	subq	$256,%rbx
703	jmp	L$open_sse_main_loop
704L$open_sse_tail:
705
706	testq	%rbx,%rbx
707	jz	L$open_sse_finalize
708	cmpq	$192,%rbx
709	ja	L$open_sse_tail_256
710	cmpq	$128,%rbx
711	ja	L$open_sse_tail_192
712	cmpq	$64,%rbx
713	ja	L$open_sse_tail_128
714	movdqa	L$chacha20_consts(%rip),%xmm0
715	movdqa	0+48(%rbp),%xmm4
716	movdqa	0+64(%rbp),%xmm8
717	movdqa	0+96(%rbp),%xmm12
718	paddd	L$sse_inc(%rip),%xmm12
719	movdqa	%xmm12,0+96(%rbp)
720
721	xorq	%r8,%r8
722	movq	%rbx,%rcx
723	cmpq	$16,%rcx
724	jb	L$open_sse_tail_64_rounds
725L$open_sse_tail_64_rounds_and_x1hash:
726	addq	0+0(%rsi,%r8,1),%r10
727	adcq	8+0(%rsi,%r8,1),%r11
728	adcq	$1,%r12
729	movq	0+0+0(%rbp),%rax
730	movq	%rax,%r15
731	mulq	%r10
732	movq	%rax,%r13
733	movq	%rdx,%r14
734	movq	0+0+0(%rbp),%rax
735	mulq	%r11
736	imulq	%r12,%r15
737	addq	%rax,%r14
738	adcq	%rdx,%r15
739	movq	8+0+0(%rbp),%rax
740	movq	%rax,%r9
741	mulq	%r10
742	addq	%rax,%r14
743	adcq	$0,%rdx
744	movq	%rdx,%r10
745	movq	8+0+0(%rbp),%rax
746	mulq	%r11
747	addq	%rax,%r15
748	adcq	$0,%rdx
749	imulq	%r12,%r9
750	addq	%r10,%r15
751	adcq	%rdx,%r9
752	movq	%r13,%r10
753	movq	%r14,%r11
754	movq	%r15,%r12
755	andq	$3,%r12
756	movq	%r15,%r13
757	andq	$-4,%r13
758	movq	%r9,%r14
759	shrdq	$2,%r9,%r15
760	shrq	$2,%r9
761	addq	%r13,%r15
762	adcq	%r14,%r9
763	addq	%r15,%r10
764	adcq	%r9,%r11
765	adcq	$0,%r12
766
767	subq	$16,%rcx
768L$open_sse_tail_64_rounds:
769	addq	$16,%r8
770	paddd	%xmm4,%xmm0
771	pxor	%xmm0,%xmm12
772	pshufb	L$rol16(%rip),%xmm12
773	paddd	%xmm12,%xmm8
774	pxor	%xmm8,%xmm4
775	movdqa	%xmm4,%xmm3
776	pslld	$12,%xmm3
777	psrld	$20,%xmm4
778	pxor	%xmm3,%xmm4
779	paddd	%xmm4,%xmm0
780	pxor	%xmm0,%xmm12
781	pshufb	L$rol8(%rip),%xmm12
782	paddd	%xmm12,%xmm8
783	pxor	%xmm8,%xmm4
784	movdqa	%xmm4,%xmm3
785	pslld	$7,%xmm3
786	psrld	$25,%xmm4
787	pxor	%xmm3,%xmm4
788.byte	102,15,58,15,228,4
789.byte	102,69,15,58,15,192,8
790.byte	102,69,15,58,15,228,12
791	paddd	%xmm4,%xmm0
792	pxor	%xmm0,%xmm12
793	pshufb	L$rol16(%rip),%xmm12
794	paddd	%xmm12,%xmm8
795	pxor	%xmm8,%xmm4
796	movdqa	%xmm4,%xmm3
797	pslld	$12,%xmm3
798	psrld	$20,%xmm4
799	pxor	%xmm3,%xmm4
800	paddd	%xmm4,%xmm0
801	pxor	%xmm0,%xmm12
802	pshufb	L$rol8(%rip),%xmm12
803	paddd	%xmm12,%xmm8
804	pxor	%xmm8,%xmm4
805	movdqa	%xmm4,%xmm3
806	pslld	$7,%xmm3
807	psrld	$25,%xmm4
808	pxor	%xmm3,%xmm4
809.byte	102,15,58,15,228,12
810.byte	102,69,15,58,15,192,8
811.byte	102,69,15,58,15,228,4
812
813	cmpq	$16,%rcx
814	jae	L$open_sse_tail_64_rounds_and_x1hash
815	cmpq	$160,%r8
816	jne	L$open_sse_tail_64_rounds
817	paddd	L$chacha20_consts(%rip),%xmm0
818	paddd	0+48(%rbp),%xmm4
819	paddd	0+64(%rbp),%xmm8
820	paddd	0+96(%rbp),%xmm12
821
822	jmp	L$open_sse_tail_64_dec_loop
823
824L$open_sse_tail_128:
825	movdqa	L$chacha20_consts(%rip),%xmm0
826	movdqa	0+48(%rbp),%xmm4
827	movdqa	0+64(%rbp),%xmm8
828	movdqa	%xmm0,%xmm1
829	movdqa	%xmm4,%xmm5
830	movdqa	%xmm8,%xmm9
831	movdqa	0+96(%rbp),%xmm13
832	paddd	L$sse_inc(%rip),%xmm13
833	movdqa	%xmm13,%xmm12
834	paddd	L$sse_inc(%rip),%xmm12
835	movdqa	%xmm12,0+96(%rbp)
836	movdqa	%xmm13,0+112(%rbp)
837
838	movq	%rbx,%rcx
839	andq	$-16,%rcx
840	xorq	%r8,%r8
841L$open_sse_tail_128_rounds_and_x1hash:
842	addq	0+0(%rsi,%r8,1),%r10
843	adcq	8+0(%rsi,%r8,1),%r11
844	adcq	$1,%r12
845	movq	0+0+0(%rbp),%rax
846	movq	%rax,%r15
847	mulq	%r10
848	movq	%rax,%r13
849	movq	%rdx,%r14
850	movq	0+0+0(%rbp),%rax
851	mulq	%r11
852	imulq	%r12,%r15
853	addq	%rax,%r14
854	adcq	%rdx,%r15
855	movq	8+0+0(%rbp),%rax
856	movq	%rax,%r9
857	mulq	%r10
858	addq	%rax,%r14
859	adcq	$0,%rdx
860	movq	%rdx,%r10
861	movq	8+0+0(%rbp),%rax
862	mulq	%r11
863	addq	%rax,%r15
864	adcq	$0,%rdx
865	imulq	%r12,%r9
866	addq	%r10,%r15
867	adcq	%rdx,%r9
868	movq	%r13,%r10
869	movq	%r14,%r11
870	movq	%r15,%r12
871	andq	$3,%r12
872	movq	%r15,%r13
873	andq	$-4,%r13
874	movq	%r9,%r14
875	shrdq	$2,%r9,%r15
876	shrq	$2,%r9
877	addq	%r13,%r15
878	adcq	%r14,%r9
879	addq	%r15,%r10
880	adcq	%r9,%r11
881	adcq	$0,%r12
882
883L$open_sse_tail_128_rounds:
884	addq	$16,%r8
885	paddd	%xmm4,%xmm0
886	pxor	%xmm0,%xmm12
887	pshufb	L$rol16(%rip),%xmm12
888	paddd	%xmm12,%xmm8
889	pxor	%xmm8,%xmm4
890	movdqa	%xmm4,%xmm3
891	pslld	$12,%xmm3
892	psrld	$20,%xmm4
893	pxor	%xmm3,%xmm4
894	paddd	%xmm4,%xmm0
895	pxor	%xmm0,%xmm12
896	pshufb	L$rol8(%rip),%xmm12
897	paddd	%xmm12,%xmm8
898	pxor	%xmm8,%xmm4
899	movdqa	%xmm4,%xmm3
900	pslld	$7,%xmm3
901	psrld	$25,%xmm4
902	pxor	%xmm3,%xmm4
903.byte	102,15,58,15,228,4
904.byte	102,69,15,58,15,192,8
905.byte	102,69,15,58,15,228,12
906	paddd	%xmm5,%xmm1
907	pxor	%xmm1,%xmm13
908	pshufb	L$rol16(%rip),%xmm13
909	paddd	%xmm13,%xmm9
910	pxor	%xmm9,%xmm5
911	movdqa	%xmm5,%xmm3
912	pslld	$12,%xmm3
913	psrld	$20,%xmm5
914	pxor	%xmm3,%xmm5
915	paddd	%xmm5,%xmm1
916	pxor	%xmm1,%xmm13
917	pshufb	L$rol8(%rip),%xmm13
918	paddd	%xmm13,%xmm9
919	pxor	%xmm9,%xmm5
920	movdqa	%xmm5,%xmm3
921	pslld	$7,%xmm3
922	psrld	$25,%xmm5
923	pxor	%xmm3,%xmm5
924.byte	102,15,58,15,237,4
925.byte	102,69,15,58,15,201,8
926.byte	102,69,15,58,15,237,12
927	paddd	%xmm4,%xmm0
928	pxor	%xmm0,%xmm12
929	pshufb	L$rol16(%rip),%xmm12
930	paddd	%xmm12,%xmm8
931	pxor	%xmm8,%xmm4
932	movdqa	%xmm4,%xmm3
933	pslld	$12,%xmm3
934	psrld	$20,%xmm4
935	pxor	%xmm3,%xmm4
936	paddd	%xmm4,%xmm0
937	pxor	%xmm0,%xmm12
938	pshufb	L$rol8(%rip),%xmm12
939	paddd	%xmm12,%xmm8
940	pxor	%xmm8,%xmm4
941	movdqa	%xmm4,%xmm3
942	pslld	$7,%xmm3
943	psrld	$25,%xmm4
944	pxor	%xmm3,%xmm4
945.byte	102,15,58,15,228,12
946.byte	102,69,15,58,15,192,8
947.byte	102,69,15,58,15,228,4
948	paddd	%xmm5,%xmm1
949	pxor	%xmm1,%xmm13
950	pshufb	L$rol16(%rip),%xmm13
951	paddd	%xmm13,%xmm9
952	pxor	%xmm9,%xmm5
953	movdqa	%xmm5,%xmm3
954	pslld	$12,%xmm3
955	psrld	$20,%xmm5
956	pxor	%xmm3,%xmm5
957	paddd	%xmm5,%xmm1
958	pxor	%xmm1,%xmm13
959	pshufb	L$rol8(%rip),%xmm13
960	paddd	%xmm13,%xmm9
961	pxor	%xmm9,%xmm5
962	movdqa	%xmm5,%xmm3
963	pslld	$7,%xmm3
964	psrld	$25,%xmm5
965	pxor	%xmm3,%xmm5
966.byte	102,15,58,15,237,12
967.byte	102,69,15,58,15,201,8
968.byte	102,69,15,58,15,237,4
969
970	cmpq	%rcx,%r8
971	jb	L$open_sse_tail_128_rounds_and_x1hash
972	cmpq	$160,%r8
973	jne	L$open_sse_tail_128_rounds
974	paddd	L$chacha20_consts(%rip),%xmm1
975	paddd	0+48(%rbp),%xmm5
976	paddd	0+64(%rbp),%xmm9
977	paddd	0+112(%rbp),%xmm13
978	paddd	L$chacha20_consts(%rip),%xmm0
979	paddd	0+48(%rbp),%xmm4
980	paddd	0+64(%rbp),%xmm8
981	paddd	0+96(%rbp),%xmm12
982	movdqu	0 + 0(%rsi),%xmm3
983	movdqu	16 + 0(%rsi),%xmm7
984	movdqu	32 + 0(%rsi),%xmm11
985	movdqu	48 + 0(%rsi),%xmm15
986	pxor	%xmm3,%xmm1
987	pxor	%xmm7,%xmm5
988	pxor	%xmm11,%xmm9
989	pxor	%xmm13,%xmm15
990	movdqu	%xmm1,0 + 0(%rdi)
991	movdqu	%xmm5,16 + 0(%rdi)
992	movdqu	%xmm9,32 + 0(%rdi)
993	movdqu	%xmm15,48 + 0(%rdi)
994
995	subq	$64,%rbx
996	leaq	64(%rsi),%rsi
997	leaq	64(%rdi),%rdi
998	jmp	L$open_sse_tail_64_dec_loop
999
1000L$open_sse_tail_192:
1001	movdqa	L$chacha20_consts(%rip),%xmm0
1002	movdqa	0+48(%rbp),%xmm4
1003	movdqa	0+64(%rbp),%xmm8
1004	movdqa	%xmm0,%xmm1
1005	movdqa	%xmm4,%xmm5
1006	movdqa	%xmm8,%xmm9
1007	movdqa	%xmm0,%xmm2
1008	movdqa	%xmm4,%xmm6
1009	movdqa	%xmm8,%xmm10
1010	movdqa	0+96(%rbp),%xmm14
1011	paddd	L$sse_inc(%rip),%xmm14
1012	movdqa	%xmm14,%xmm13
1013	paddd	L$sse_inc(%rip),%xmm13
1014	movdqa	%xmm13,%xmm12
1015	paddd	L$sse_inc(%rip),%xmm12
1016	movdqa	%xmm12,0+96(%rbp)
1017	movdqa	%xmm13,0+112(%rbp)
1018	movdqa	%xmm14,0+128(%rbp)
1019
1020	movq	%rbx,%rcx
1021	movq	$160,%r8
1022	cmpq	$160,%rcx
1023	cmovgq	%r8,%rcx
1024	andq	$-16,%rcx
1025	xorq	%r8,%r8
1026L$open_sse_tail_192_rounds_and_x1hash:
1027	addq	0+0(%rsi,%r8,1),%r10
1028	adcq	8+0(%rsi,%r8,1),%r11
1029	adcq	$1,%r12
1030	movq	0+0+0(%rbp),%rax
1031	movq	%rax,%r15
1032	mulq	%r10
1033	movq	%rax,%r13
1034	movq	%rdx,%r14
1035	movq	0+0+0(%rbp),%rax
1036	mulq	%r11
1037	imulq	%r12,%r15
1038	addq	%rax,%r14
1039	adcq	%rdx,%r15
1040	movq	8+0+0(%rbp),%rax
1041	movq	%rax,%r9
1042	mulq	%r10
1043	addq	%rax,%r14
1044	adcq	$0,%rdx
1045	movq	%rdx,%r10
1046	movq	8+0+0(%rbp),%rax
1047	mulq	%r11
1048	addq	%rax,%r15
1049	adcq	$0,%rdx
1050	imulq	%r12,%r9
1051	addq	%r10,%r15
1052	adcq	%rdx,%r9
1053	movq	%r13,%r10
1054	movq	%r14,%r11
1055	movq	%r15,%r12
1056	andq	$3,%r12
1057	movq	%r15,%r13
1058	andq	$-4,%r13
1059	movq	%r9,%r14
1060	shrdq	$2,%r9,%r15
1061	shrq	$2,%r9
1062	addq	%r13,%r15
1063	adcq	%r14,%r9
1064	addq	%r15,%r10
1065	adcq	%r9,%r11
1066	adcq	$0,%r12
1067
1068L$open_sse_tail_192_rounds:
1069	addq	$16,%r8
1070	paddd	%xmm4,%xmm0
1071	pxor	%xmm0,%xmm12
1072	pshufb	L$rol16(%rip),%xmm12
1073	paddd	%xmm12,%xmm8
1074	pxor	%xmm8,%xmm4
1075	movdqa	%xmm4,%xmm3
1076	pslld	$12,%xmm3
1077	psrld	$20,%xmm4
1078	pxor	%xmm3,%xmm4
1079	paddd	%xmm4,%xmm0
1080	pxor	%xmm0,%xmm12
1081	pshufb	L$rol8(%rip),%xmm12
1082	paddd	%xmm12,%xmm8
1083	pxor	%xmm8,%xmm4
1084	movdqa	%xmm4,%xmm3
1085	pslld	$7,%xmm3
1086	psrld	$25,%xmm4
1087	pxor	%xmm3,%xmm4
1088.byte	102,15,58,15,228,4
1089.byte	102,69,15,58,15,192,8
1090.byte	102,69,15,58,15,228,12
1091	paddd	%xmm5,%xmm1
1092	pxor	%xmm1,%xmm13
1093	pshufb	L$rol16(%rip),%xmm13
1094	paddd	%xmm13,%xmm9
1095	pxor	%xmm9,%xmm5
1096	movdqa	%xmm5,%xmm3
1097	pslld	$12,%xmm3
1098	psrld	$20,%xmm5
1099	pxor	%xmm3,%xmm5
1100	paddd	%xmm5,%xmm1
1101	pxor	%xmm1,%xmm13
1102	pshufb	L$rol8(%rip),%xmm13
1103	paddd	%xmm13,%xmm9
1104	pxor	%xmm9,%xmm5
1105	movdqa	%xmm5,%xmm3
1106	pslld	$7,%xmm3
1107	psrld	$25,%xmm5
1108	pxor	%xmm3,%xmm5
1109.byte	102,15,58,15,237,4
1110.byte	102,69,15,58,15,201,8
1111.byte	102,69,15,58,15,237,12
1112	paddd	%xmm6,%xmm2
1113	pxor	%xmm2,%xmm14
1114	pshufb	L$rol16(%rip),%xmm14
1115	paddd	%xmm14,%xmm10
1116	pxor	%xmm10,%xmm6
1117	movdqa	%xmm6,%xmm3
1118	pslld	$12,%xmm3
1119	psrld	$20,%xmm6
1120	pxor	%xmm3,%xmm6
1121	paddd	%xmm6,%xmm2
1122	pxor	%xmm2,%xmm14
1123	pshufb	L$rol8(%rip),%xmm14
1124	paddd	%xmm14,%xmm10
1125	pxor	%xmm10,%xmm6
1126	movdqa	%xmm6,%xmm3
1127	pslld	$7,%xmm3
1128	psrld	$25,%xmm6
1129	pxor	%xmm3,%xmm6
1130.byte	102,15,58,15,246,4
1131.byte	102,69,15,58,15,210,8
1132.byte	102,69,15,58,15,246,12
1133	paddd	%xmm4,%xmm0
1134	pxor	%xmm0,%xmm12
1135	pshufb	L$rol16(%rip),%xmm12
1136	paddd	%xmm12,%xmm8
1137	pxor	%xmm8,%xmm4
1138	movdqa	%xmm4,%xmm3
1139	pslld	$12,%xmm3
1140	psrld	$20,%xmm4
1141	pxor	%xmm3,%xmm4
1142	paddd	%xmm4,%xmm0
1143	pxor	%xmm0,%xmm12
1144	pshufb	L$rol8(%rip),%xmm12
1145	paddd	%xmm12,%xmm8
1146	pxor	%xmm8,%xmm4
1147	movdqa	%xmm4,%xmm3
1148	pslld	$7,%xmm3
1149	psrld	$25,%xmm4
1150	pxor	%xmm3,%xmm4
1151.byte	102,15,58,15,228,12
1152.byte	102,69,15,58,15,192,8
1153.byte	102,69,15,58,15,228,4
1154	paddd	%xmm5,%xmm1
1155	pxor	%xmm1,%xmm13
1156	pshufb	L$rol16(%rip),%xmm13
1157	paddd	%xmm13,%xmm9
1158	pxor	%xmm9,%xmm5
1159	movdqa	%xmm5,%xmm3
1160	pslld	$12,%xmm3
1161	psrld	$20,%xmm5
1162	pxor	%xmm3,%xmm5
1163	paddd	%xmm5,%xmm1
1164	pxor	%xmm1,%xmm13
1165	pshufb	L$rol8(%rip),%xmm13
1166	paddd	%xmm13,%xmm9
1167	pxor	%xmm9,%xmm5
1168	movdqa	%xmm5,%xmm3
1169	pslld	$7,%xmm3
1170	psrld	$25,%xmm5
1171	pxor	%xmm3,%xmm5
1172.byte	102,15,58,15,237,12
1173.byte	102,69,15,58,15,201,8
1174.byte	102,69,15,58,15,237,4
1175	paddd	%xmm6,%xmm2
1176	pxor	%xmm2,%xmm14
1177	pshufb	L$rol16(%rip),%xmm14
1178	paddd	%xmm14,%xmm10
1179	pxor	%xmm10,%xmm6
1180	movdqa	%xmm6,%xmm3
1181	pslld	$12,%xmm3
1182	psrld	$20,%xmm6
1183	pxor	%xmm3,%xmm6
1184	paddd	%xmm6,%xmm2
1185	pxor	%xmm2,%xmm14
1186	pshufb	L$rol8(%rip),%xmm14
1187	paddd	%xmm14,%xmm10
1188	pxor	%xmm10,%xmm6
1189	movdqa	%xmm6,%xmm3
1190	pslld	$7,%xmm3
1191	psrld	$25,%xmm6
1192	pxor	%xmm3,%xmm6
1193.byte	102,15,58,15,246,12
1194.byte	102,69,15,58,15,210,8
1195.byte	102,69,15,58,15,246,4
1196
1197	cmpq	%rcx,%r8
1198	jb	L$open_sse_tail_192_rounds_and_x1hash
1199	cmpq	$160,%r8
1200	jne	L$open_sse_tail_192_rounds
1201	cmpq	$176,%rbx
1202	jb	L$open_sse_tail_192_finish
1203	addq	0+160(%rsi),%r10
1204	adcq	8+160(%rsi),%r11
1205	adcq	$1,%r12
1206	movq	0+0+0(%rbp),%rax
1207	movq	%rax,%r15
1208	mulq	%r10
1209	movq	%rax,%r13
1210	movq	%rdx,%r14
1211	movq	0+0+0(%rbp),%rax
1212	mulq	%r11
1213	imulq	%r12,%r15
1214	addq	%rax,%r14
1215	adcq	%rdx,%r15
1216	movq	8+0+0(%rbp),%rax
1217	movq	%rax,%r9
1218	mulq	%r10
1219	addq	%rax,%r14
1220	adcq	$0,%rdx
1221	movq	%rdx,%r10
1222	movq	8+0+0(%rbp),%rax
1223	mulq	%r11
1224	addq	%rax,%r15
1225	adcq	$0,%rdx
1226	imulq	%r12,%r9
1227	addq	%r10,%r15
1228	adcq	%rdx,%r9
1229	movq	%r13,%r10
1230	movq	%r14,%r11
1231	movq	%r15,%r12
1232	andq	$3,%r12
1233	movq	%r15,%r13
1234	andq	$-4,%r13
1235	movq	%r9,%r14
1236	shrdq	$2,%r9,%r15
1237	shrq	$2,%r9
1238	addq	%r13,%r15
1239	adcq	%r14,%r9
1240	addq	%r15,%r10
1241	adcq	%r9,%r11
1242	adcq	$0,%r12
1243
1244	cmpq	$192,%rbx
1245	jb	L$open_sse_tail_192_finish
1246	addq	0+176(%rsi),%r10
1247	adcq	8+176(%rsi),%r11
1248	adcq	$1,%r12
1249	movq	0+0+0(%rbp),%rax
1250	movq	%rax,%r15
1251	mulq	%r10
1252	movq	%rax,%r13
1253	movq	%rdx,%r14
1254	movq	0+0+0(%rbp),%rax
1255	mulq	%r11
1256	imulq	%r12,%r15
1257	addq	%rax,%r14
1258	adcq	%rdx,%r15
1259	movq	8+0+0(%rbp),%rax
1260	movq	%rax,%r9
1261	mulq	%r10
1262	addq	%rax,%r14
1263	adcq	$0,%rdx
1264	movq	%rdx,%r10
1265	movq	8+0+0(%rbp),%rax
1266	mulq	%r11
1267	addq	%rax,%r15
1268	adcq	$0,%rdx
1269	imulq	%r12,%r9
1270	addq	%r10,%r15
1271	adcq	%rdx,%r9
1272	movq	%r13,%r10
1273	movq	%r14,%r11
1274	movq	%r15,%r12
1275	andq	$3,%r12
1276	movq	%r15,%r13
1277	andq	$-4,%r13
1278	movq	%r9,%r14
1279	shrdq	$2,%r9,%r15
1280	shrq	$2,%r9
1281	addq	%r13,%r15
1282	adcq	%r14,%r9
1283	addq	%r15,%r10
1284	adcq	%r9,%r11
1285	adcq	$0,%r12
1286
1287L$open_sse_tail_192_finish:
1288	paddd	L$chacha20_consts(%rip),%xmm2
1289	paddd	0+48(%rbp),%xmm6
1290	paddd	0+64(%rbp),%xmm10
1291	paddd	0+128(%rbp),%xmm14
1292	paddd	L$chacha20_consts(%rip),%xmm1
1293	paddd	0+48(%rbp),%xmm5
1294	paddd	0+64(%rbp),%xmm9
1295	paddd	0+112(%rbp),%xmm13
1296	paddd	L$chacha20_consts(%rip),%xmm0
1297	paddd	0+48(%rbp),%xmm4
1298	paddd	0+64(%rbp),%xmm8
1299	paddd	0+96(%rbp),%xmm12
1300	movdqu	0 + 0(%rsi),%xmm3
1301	movdqu	16 + 0(%rsi),%xmm7
1302	movdqu	32 + 0(%rsi),%xmm11
1303	movdqu	48 + 0(%rsi),%xmm15
1304	pxor	%xmm3,%xmm2
1305	pxor	%xmm7,%xmm6
1306	pxor	%xmm11,%xmm10
1307	pxor	%xmm14,%xmm15
1308	movdqu	%xmm2,0 + 0(%rdi)
1309	movdqu	%xmm6,16 + 0(%rdi)
1310	movdqu	%xmm10,32 + 0(%rdi)
1311	movdqu	%xmm15,48 + 0(%rdi)
1312	movdqu	0 + 64(%rsi),%xmm3
1313	movdqu	16 + 64(%rsi),%xmm7
1314	movdqu	32 + 64(%rsi),%xmm11
1315	movdqu	48 + 64(%rsi),%xmm15
1316	pxor	%xmm3,%xmm1
1317	pxor	%xmm7,%xmm5
1318	pxor	%xmm11,%xmm9
1319	pxor	%xmm13,%xmm15
1320	movdqu	%xmm1,0 + 64(%rdi)
1321	movdqu	%xmm5,16 + 64(%rdi)
1322	movdqu	%xmm9,32 + 64(%rdi)
1323	movdqu	%xmm15,48 + 64(%rdi)
1324
1325	subq	$128,%rbx
1326	leaq	128(%rsi),%rsi
1327	leaq	128(%rdi),%rdi
1328	jmp	L$open_sse_tail_64_dec_loop
1329
1330L$open_sse_tail_256:
1331	movdqa	L$chacha20_consts(%rip),%xmm0
1332	movdqa	0+48(%rbp),%xmm4
1333	movdqa	0+64(%rbp),%xmm8
1334	movdqa	%xmm0,%xmm1
1335	movdqa	%xmm4,%xmm5
1336	movdqa	%xmm8,%xmm9
1337	movdqa	%xmm0,%xmm2
1338	movdqa	%xmm4,%xmm6
1339	movdqa	%xmm8,%xmm10
1340	movdqa	%xmm0,%xmm3
1341	movdqa	%xmm4,%xmm7
1342	movdqa	%xmm8,%xmm11
1343	movdqa	0+96(%rbp),%xmm15
1344	paddd	L$sse_inc(%rip),%xmm15
1345	movdqa	%xmm15,%xmm14
1346	paddd	L$sse_inc(%rip),%xmm14
1347	movdqa	%xmm14,%xmm13
1348	paddd	L$sse_inc(%rip),%xmm13
1349	movdqa	%xmm13,%xmm12
1350	paddd	L$sse_inc(%rip),%xmm12
1351	movdqa	%xmm12,0+96(%rbp)
1352	movdqa	%xmm13,0+112(%rbp)
1353	movdqa	%xmm14,0+128(%rbp)
1354	movdqa	%xmm15,0+144(%rbp)
1355
1356	xorq	%r8,%r8
1357L$open_sse_tail_256_rounds_and_x1hash:
1358	addq	0+0(%rsi,%r8,1),%r10
1359	adcq	8+0(%rsi,%r8,1),%r11
1360	adcq	$1,%r12
1361	movdqa	%xmm11,0+80(%rbp)
1362	paddd	%xmm4,%xmm0
1363	pxor	%xmm0,%xmm12
1364	pshufb	L$rol16(%rip),%xmm12
1365	paddd	%xmm12,%xmm8
1366	pxor	%xmm8,%xmm4
1367	movdqa	%xmm4,%xmm11
1368	pslld	$12,%xmm11
1369	psrld	$20,%xmm4
1370	pxor	%xmm11,%xmm4
1371	paddd	%xmm4,%xmm0
1372	pxor	%xmm0,%xmm12
1373	pshufb	L$rol8(%rip),%xmm12
1374	paddd	%xmm12,%xmm8
1375	pxor	%xmm8,%xmm4
1376	movdqa	%xmm4,%xmm11
1377	pslld	$7,%xmm11
1378	psrld	$25,%xmm4
1379	pxor	%xmm11,%xmm4
1380.byte	102,15,58,15,228,4
1381.byte	102,69,15,58,15,192,8
1382.byte	102,69,15,58,15,228,12
1383	paddd	%xmm5,%xmm1
1384	pxor	%xmm1,%xmm13
1385	pshufb	L$rol16(%rip),%xmm13
1386	paddd	%xmm13,%xmm9
1387	pxor	%xmm9,%xmm5
1388	movdqa	%xmm5,%xmm11
1389	pslld	$12,%xmm11
1390	psrld	$20,%xmm5
1391	pxor	%xmm11,%xmm5
1392	paddd	%xmm5,%xmm1
1393	pxor	%xmm1,%xmm13
1394	pshufb	L$rol8(%rip),%xmm13
1395	paddd	%xmm13,%xmm9
1396	pxor	%xmm9,%xmm5
1397	movdqa	%xmm5,%xmm11
1398	pslld	$7,%xmm11
1399	psrld	$25,%xmm5
1400	pxor	%xmm11,%xmm5
1401.byte	102,15,58,15,237,4
1402.byte	102,69,15,58,15,201,8
1403.byte	102,69,15,58,15,237,12
1404	paddd	%xmm6,%xmm2
1405	pxor	%xmm2,%xmm14
1406	pshufb	L$rol16(%rip),%xmm14
1407	paddd	%xmm14,%xmm10
1408	pxor	%xmm10,%xmm6
1409	movdqa	%xmm6,%xmm11
1410	pslld	$12,%xmm11
1411	psrld	$20,%xmm6
1412	pxor	%xmm11,%xmm6
1413	paddd	%xmm6,%xmm2
1414	pxor	%xmm2,%xmm14
1415	pshufb	L$rol8(%rip),%xmm14
1416	paddd	%xmm14,%xmm10
1417	pxor	%xmm10,%xmm6
1418	movdqa	%xmm6,%xmm11
1419	pslld	$7,%xmm11
1420	psrld	$25,%xmm6
1421	pxor	%xmm11,%xmm6
1422.byte	102,15,58,15,246,4
1423.byte	102,69,15,58,15,210,8
1424.byte	102,69,15,58,15,246,12
1425	movdqa	0+80(%rbp),%xmm11
1426	movq	0+0+0(%rbp),%rax
1427	movq	%rax,%r15
1428	mulq	%r10
1429	movq	%rax,%r13
1430	movq	%rdx,%r14
1431	movq	0+0+0(%rbp),%rax
1432	mulq	%r11
1433	imulq	%r12,%r15
1434	addq	%rax,%r14
1435	adcq	%rdx,%r15
1436	movdqa	%xmm9,0+80(%rbp)
1437	paddd	%xmm7,%xmm3
1438	pxor	%xmm3,%xmm15
1439	pshufb	L$rol16(%rip),%xmm15
1440	paddd	%xmm15,%xmm11
1441	pxor	%xmm11,%xmm7
1442	movdqa	%xmm7,%xmm9
1443	pslld	$12,%xmm9
1444	psrld	$20,%xmm7
1445	pxor	%xmm9,%xmm7
1446	paddd	%xmm7,%xmm3
1447	pxor	%xmm3,%xmm15
1448	pshufb	L$rol8(%rip),%xmm15
1449	paddd	%xmm15,%xmm11
1450	pxor	%xmm11,%xmm7
1451	movdqa	%xmm7,%xmm9
1452	pslld	$7,%xmm9
1453	psrld	$25,%xmm7
1454	pxor	%xmm9,%xmm7
1455.byte	102,15,58,15,255,4
1456.byte	102,69,15,58,15,219,8
1457.byte	102,69,15,58,15,255,12
1458	movdqa	0+80(%rbp),%xmm9
1459	movq	8+0+0(%rbp),%rax
1460	movq	%rax,%r9
1461	mulq	%r10
1462	addq	%rax,%r14
1463	adcq	$0,%rdx
1464	movq	%rdx,%r10
1465	movq	8+0+0(%rbp),%rax
1466	mulq	%r11
1467	addq	%rax,%r15
1468	adcq	$0,%rdx
1469	movdqa	%xmm11,0+80(%rbp)
1470	paddd	%xmm4,%xmm0
1471	pxor	%xmm0,%xmm12
1472	pshufb	L$rol16(%rip),%xmm12
1473	paddd	%xmm12,%xmm8
1474	pxor	%xmm8,%xmm4
1475	movdqa	%xmm4,%xmm11
1476	pslld	$12,%xmm11
1477	psrld	$20,%xmm4
1478	pxor	%xmm11,%xmm4
1479	paddd	%xmm4,%xmm0
1480	pxor	%xmm0,%xmm12
1481	pshufb	L$rol8(%rip),%xmm12
1482	paddd	%xmm12,%xmm8
1483	pxor	%xmm8,%xmm4
1484	movdqa	%xmm4,%xmm11
1485	pslld	$7,%xmm11
1486	psrld	$25,%xmm4
1487	pxor	%xmm11,%xmm4
1488.byte	102,15,58,15,228,12
1489.byte	102,69,15,58,15,192,8
1490.byte	102,69,15,58,15,228,4
1491	paddd	%xmm5,%xmm1
1492	pxor	%xmm1,%xmm13
1493	pshufb	L$rol16(%rip),%xmm13
1494	paddd	%xmm13,%xmm9
1495	pxor	%xmm9,%xmm5
1496	movdqa	%xmm5,%xmm11
1497	pslld	$12,%xmm11
1498	psrld	$20,%xmm5
1499	pxor	%xmm11,%xmm5
1500	paddd	%xmm5,%xmm1
1501	pxor	%xmm1,%xmm13
1502	pshufb	L$rol8(%rip),%xmm13
1503	paddd	%xmm13,%xmm9
1504	pxor	%xmm9,%xmm5
1505	movdqa	%xmm5,%xmm11
1506	pslld	$7,%xmm11
1507	psrld	$25,%xmm5
1508	pxor	%xmm11,%xmm5
1509.byte	102,15,58,15,237,12
1510.byte	102,69,15,58,15,201,8
1511.byte	102,69,15,58,15,237,4
1512	imulq	%r12,%r9
1513	addq	%r10,%r15
1514	adcq	%rdx,%r9
1515	paddd	%xmm6,%xmm2
1516	pxor	%xmm2,%xmm14
1517	pshufb	L$rol16(%rip),%xmm14
1518	paddd	%xmm14,%xmm10
1519	pxor	%xmm10,%xmm6
1520	movdqa	%xmm6,%xmm11
1521	pslld	$12,%xmm11
1522	psrld	$20,%xmm6
1523	pxor	%xmm11,%xmm6
1524	paddd	%xmm6,%xmm2
1525	pxor	%xmm2,%xmm14
1526	pshufb	L$rol8(%rip),%xmm14
1527	paddd	%xmm14,%xmm10
1528	pxor	%xmm10,%xmm6
1529	movdqa	%xmm6,%xmm11
1530	pslld	$7,%xmm11
1531	psrld	$25,%xmm6
1532	pxor	%xmm11,%xmm6
1533.byte	102,15,58,15,246,12
1534.byte	102,69,15,58,15,210,8
1535.byte	102,69,15,58,15,246,4
1536	movdqa	0+80(%rbp),%xmm11
1537	movq	%r13,%r10
1538	movq	%r14,%r11
1539	movq	%r15,%r12
1540	andq	$3,%r12
1541	movq	%r15,%r13
1542	andq	$-4,%r13
1543	movq	%r9,%r14
1544	shrdq	$2,%r9,%r15
1545	shrq	$2,%r9
1546	addq	%r13,%r15
1547	adcq	%r14,%r9
1548	addq	%r15,%r10
1549	adcq	%r9,%r11
1550	adcq	$0,%r12
1551	movdqa	%xmm9,0+80(%rbp)
1552	paddd	%xmm7,%xmm3
1553	pxor	%xmm3,%xmm15
1554	pshufb	L$rol16(%rip),%xmm15
1555	paddd	%xmm15,%xmm11
1556	pxor	%xmm11,%xmm7
1557	movdqa	%xmm7,%xmm9
1558	pslld	$12,%xmm9
1559	psrld	$20,%xmm7
1560	pxor	%xmm9,%xmm7
1561	paddd	%xmm7,%xmm3
1562	pxor	%xmm3,%xmm15
1563	pshufb	L$rol8(%rip),%xmm15
1564	paddd	%xmm15,%xmm11
1565	pxor	%xmm11,%xmm7
1566	movdqa	%xmm7,%xmm9
1567	pslld	$7,%xmm9
1568	psrld	$25,%xmm7
1569	pxor	%xmm9,%xmm7
1570.byte	102,15,58,15,255,12
1571.byte	102,69,15,58,15,219,8
1572.byte	102,69,15,58,15,255,4
1573	movdqa	0+80(%rbp),%xmm9
1574
1575	addq	$16,%r8
1576	cmpq	$160,%r8
1577	jb	L$open_sse_tail_256_rounds_and_x1hash
1578
1579	movq	%rbx,%rcx
1580	andq	$-16,%rcx
1581L$open_sse_tail_256_hash:
1582	addq	0+0(%rsi,%r8,1),%r10
1583	adcq	8+0(%rsi,%r8,1),%r11
1584	adcq	$1,%r12
1585	movq	0+0+0(%rbp),%rax
1586	movq	%rax,%r15
1587	mulq	%r10
1588	movq	%rax,%r13
1589	movq	%rdx,%r14
1590	movq	0+0+0(%rbp),%rax
1591	mulq	%r11
1592	imulq	%r12,%r15
1593	addq	%rax,%r14
1594	adcq	%rdx,%r15
1595	movq	8+0+0(%rbp),%rax
1596	movq	%rax,%r9
1597	mulq	%r10
1598	addq	%rax,%r14
1599	adcq	$0,%rdx
1600	movq	%rdx,%r10
1601	movq	8+0+0(%rbp),%rax
1602	mulq	%r11
1603	addq	%rax,%r15
1604	adcq	$0,%rdx
1605	imulq	%r12,%r9
1606	addq	%r10,%r15
1607	adcq	%rdx,%r9
1608	movq	%r13,%r10
1609	movq	%r14,%r11
1610	movq	%r15,%r12
1611	andq	$3,%r12
1612	movq	%r15,%r13
1613	andq	$-4,%r13
1614	movq	%r9,%r14
1615	shrdq	$2,%r9,%r15
1616	shrq	$2,%r9
1617	addq	%r13,%r15
1618	adcq	%r14,%r9
1619	addq	%r15,%r10
1620	adcq	%r9,%r11
1621	adcq	$0,%r12
1622
1623	addq	$16,%r8
1624	cmpq	%rcx,%r8
1625	jb	L$open_sse_tail_256_hash
1626	paddd	L$chacha20_consts(%rip),%xmm3
1627	paddd	0+48(%rbp),%xmm7
1628	paddd	0+64(%rbp),%xmm11
1629	paddd	0+144(%rbp),%xmm15
1630	paddd	L$chacha20_consts(%rip),%xmm2
1631	paddd	0+48(%rbp),%xmm6
1632	paddd	0+64(%rbp),%xmm10
1633	paddd	0+128(%rbp),%xmm14
1634	paddd	L$chacha20_consts(%rip),%xmm1
1635	paddd	0+48(%rbp),%xmm5
1636	paddd	0+64(%rbp),%xmm9
1637	paddd	0+112(%rbp),%xmm13
1638	paddd	L$chacha20_consts(%rip),%xmm0
1639	paddd	0+48(%rbp),%xmm4
1640	paddd	0+64(%rbp),%xmm8
1641	paddd	0+96(%rbp),%xmm12
1642	movdqa	%xmm12,0+80(%rbp)
1643	movdqu	0 + 0(%rsi),%xmm12
1644	pxor	%xmm3,%xmm12
1645	movdqu	%xmm12,0 + 0(%rdi)
1646	movdqu	16 + 0(%rsi),%xmm12
1647	pxor	%xmm7,%xmm12
1648	movdqu	%xmm12,16 + 0(%rdi)
1649	movdqu	32 + 0(%rsi),%xmm12
1650	pxor	%xmm11,%xmm12
1651	movdqu	%xmm12,32 + 0(%rdi)
1652	movdqu	48 + 0(%rsi),%xmm12
1653	pxor	%xmm15,%xmm12
1654	movdqu	%xmm12,48 + 0(%rdi)
1655	movdqu	0 + 64(%rsi),%xmm3
1656	movdqu	16 + 64(%rsi),%xmm7
1657	movdqu	32 + 64(%rsi),%xmm11
1658	movdqu	48 + 64(%rsi),%xmm15
1659	pxor	%xmm3,%xmm2
1660	pxor	%xmm7,%xmm6
1661	pxor	%xmm11,%xmm10
1662	pxor	%xmm14,%xmm15
1663	movdqu	%xmm2,0 + 64(%rdi)
1664	movdqu	%xmm6,16 + 64(%rdi)
1665	movdqu	%xmm10,32 + 64(%rdi)
1666	movdqu	%xmm15,48 + 64(%rdi)
1667	movdqu	0 + 128(%rsi),%xmm3
1668	movdqu	16 + 128(%rsi),%xmm7
1669	movdqu	32 + 128(%rsi),%xmm11
1670	movdqu	48 + 128(%rsi),%xmm15
1671	pxor	%xmm3,%xmm1
1672	pxor	%xmm7,%xmm5
1673	pxor	%xmm11,%xmm9
1674	pxor	%xmm13,%xmm15
1675	movdqu	%xmm1,0 + 128(%rdi)
1676	movdqu	%xmm5,16 + 128(%rdi)
1677	movdqu	%xmm9,32 + 128(%rdi)
1678	movdqu	%xmm15,48 + 128(%rdi)
1679
1680	movdqa	0+80(%rbp),%xmm12
1681	subq	$192,%rbx
1682	leaq	192(%rsi),%rsi
1683	leaq	192(%rdi),%rdi
1684
1685
1686L$open_sse_tail_64_dec_loop:
1687	cmpq	$16,%rbx
1688	jb	L$open_sse_tail_16_init
1689	subq	$16,%rbx
1690	movdqu	(%rsi),%xmm3
1691	pxor	%xmm3,%xmm0
1692	movdqu	%xmm0,(%rdi)
1693	leaq	16(%rsi),%rsi
1694	leaq	16(%rdi),%rdi
1695	movdqa	%xmm4,%xmm0
1696	movdqa	%xmm8,%xmm4
1697	movdqa	%xmm12,%xmm8
1698	jmp	L$open_sse_tail_64_dec_loop
1699L$open_sse_tail_16_init:
1700	movdqa	%xmm0,%xmm1
1701
1702
1703L$open_sse_tail_16:
1704	testq	%rbx,%rbx
1705	jz	L$open_sse_finalize
1706
1707
1708
1709	pxor	%xmm3,%xmm3
1710	leaq	-1(%rsi,%rbx,1),%rsi
1711	movq	%rbx,%r8
1712L$open_sse_tail_16_compose:
1713	pslldq	$1,%xmm3
1714	pinsrb	$0,(%rsi),%xmm3
1715	subq	$1,%rsi
1716	subq	$1,%r8
1717	jnz	L$open_sse_tail_16_compose
1718
1719.byte	102,73,15,126,221
1720	pextrq	$1,%xmm3,%r14
1721
1722	pxor	%xmm1,%xmm3
1723
1724
1725L$open_sse_tail_16_extract:
1726	pextrb	$0,%xmm3,(%rdi)
1727	psrldq	$1,%xmm3
1728	addq	$1,%rdi
1729	subq	$1,%rbx
1730	jne	L$open_sse_tail_16_extract
1731
1732	addq	%r13,%r10
1733	adcq	%r14,%r11
1734	adcq	$1,%r12
1735	movq	0+0+0(%rbp),%rax
1736	movq	%rax,%r15
1737	mulq	%r10
1738	movq	%rax,%r13
1739	movq	%rdx,%r14
1740	movq	0+0+0(%rbp),%rax
1741	mulq	%r11
1742	imulq	%r12,%r15
1743	addq	%rax,%r14
1744	adcq	%rdx,%r15
1745	movq	8+0+0(%rbp),%rax
1746	movq	%rax,%r9
1747	mulq	%r10
1748	addq	%rax,%r14
1749	adcq	$0,%rdx
1750	movq	%rdx,%r10
1751	movq	8+0+0(%rbp),%rax
1752	mulq	%r11
1753	addq	%rax,%r15
1754	adcq	$0,%rdx
1755	imulq	%r12,%r9
1756	addq	%r10,%r15
1757	adcq	%rdx,%r9
1758	movq	%r13,%r10
1759	movq	%r14,%r11
1760	movq	%r15,%r12
1761	andq	$3,%r12
1762	movq	%r15,%r13
1763	andq	$-4,%r13
1764	movq	%r9,%r14
1765	shrdq	$2,%r9,%r15
1766	shrq	$2,%r9
1767	addq	%r13,%r15
1768	adcq	%r14,%r9
1769	addq	%r15,%r10
1770	adcq	%r9,%r11
1771	adcq	$0,%r12
1772
1773
1774L$open_sse_finalize:
1775	addq	0+0+32(%rbp),%r10
1776	adcq	8+0+32(%rbp),%r11
1777	adcq	$1,%r12
1778	movq	0+0+0(%rbp),%rax
1779	movq	%rax,%r15
1780	mulq	%r10
1781	movq	%rax,%r13
1782	movq	%rdx,%r14
1783	movq	0+0+0(%rbp),%rax
1784	mulq	%r11
1785	imulq	%r12,%r15
1786	addq	%rax,%r14
1787	adcq	%rdx,%r15
1788	movq	8+0+0(%rbp),%rax
1789	movq	%rax,%r9
1790	mulq	%r10
1791	addq	%rax,%r14
1792	adcq	$0,%rdx
1793	movq	%rdx,%r10
1794	movq	8+0+0(%rbp),%rax
1795	mulq	%r11
1796	addq	%rax,%r15
1797	adcq	$0,%rdx
1798	imulq	%r12,%r9
1799	addq	%r10,%r15
1800	adcq	%rdx,%r9
1801	movq	%r13,%r10
1802	movq	%r14,%r11
1803	movq	%r15,%r12
1804	andq	$3,%r12
1805	movq	%r15,%r13
1806	andq	$-4,%r13
1807	movq	%r9,%r14
1808	shrdq	$2,%r9,%r15
1809	shrq	$2,%r9
1810	addq	%r13,%r15
1811	adcq	%r14,%r9
1812	addq	%r15,%r10
1813	adcq	%r9,%r11
1814	adcq	$0,%r12
1815
1816
1817	movq	%r10,%r13
1818	movq	%r11,%r14
1819	movq	%r12,%r15
1820	subq	$-5,%r10
1821	sbbq	$-1,%r11
1822	sbbq	$3,%r12
1823	cmovcq	%r13,%r10
1824	cmovcq	%r14,%r11
1825	cmovcq	%r15,%r12
1826
1827	addq	0+0+16(%rbp),%r10
1828	adcq	8+0+16(%rbp),%r11
1829
1830
1831	addq	$288 + 0 + 32,%rsp
1832
1833
1834	popq	%r9
1835
1836	movq	%r10,(%r9)
1837	movq	%r11,8(%r9)
1838	popq	%r15
1839
1840	popq	%r14
1841
1842	popq	%r13
1843
1844	popq	%r12
1845
1846	popq	%rbx
1847
1848	popq	%rbp
1849
1850	ret
1851
1852L$open_sse_128:
1853
1854	movdqu	L$chacha20_consts(%rip),%xmm0
1855	movdqa	%xmm0,%xmm1
1856	movdqa	%xmm0,%xmm2
1857	movdqu	0(%r9),%xmm4
1858	movdqa	%xmm4,%xmm5
1859	movdqa	%xmm4,%xmm6
1860	movdqu	16(%r9),%xmm8
1861	movdqa	%xmm8,%xmm9
1862	movdqa	%xmm8,%xmm10
1863	movdqu	32(%r9),%xmm12
1864	movdqa	%xmm12,%xmm13
1865	paddd	L$sse_inc(%rip),%xmm13
1866	movdqa	%xmm13,%xmm14
1867	paddd	L$sse_inc(%rip),%xmm14
1868	movdqa	%xmm4,%xmm7
1869	movdqa	%xmm8,%xmm11
1870	movdqa	%xmm13,%xmm15
1871	movq	$10,%r10
1872
1873L$open_sse_128_rounds:
1874	paddd	%xmm4,%xmm0
1875	pxor	%xmm0,%xmm12
1876	pshufb	L$rol16(%rip),%xmm12
1877	paddd	%xmm12,%xmm8
1878	pxor	%xmm8,%xmm4
1879	movdqa	%xmm4,%xmm3
1880	pslld	$12,%xmm3
1881	psrld	$20,%xmm4
1882	pxor	%xmm3,%xmm4
1883	paddd	%xmm4,%xmm0
1884	pxor	%xmm0,%xmm12
1885	pshufb	L$rol8(%rip),%xmm12
1886	paddd	%xmm12,%xmm8
1887	pxor	%xmm8,%xmm4
1888	movdqa	%xmm4,%xmm3
1889	pslld	$7,%xmm3
1890	psrld	$25,%xmm4
1891	pxor	%xmm3,%xmm4
1892.byte	102,15,58,15,228,4
1893.byte	102,69,15,58,15,192,8
1894.byte	102,69,15,58,15,228,12
1895	paddd	%xmm5,%xmm1
1896	pxor	%xmm1,%xmm13
1897	pshufb	L$rol16(%rip),%xmm13
1898	paddd	%xmm13,%xmm9
1899	pxor	%xmm9,%xmm5
1900	movdqa	%xmm5,%xmm3
1901	pslld	$12,%xmm3
1902	psrld	$20,%xmm5
1903	pxor	%xmm3,%xmm5
1904	paddd	%xmm5,%xmm1
1905	pxor	%xmm1,%xmm13
1906	pshufb	L$rol8(%rip),%xmm13
1907	paddd	%xmm13,%xmm9
1908	pxor	%xmm9,%xmm5
1909	movdqa	%xmm5,%xmm3
1910	pslld	$7,%xmm3
1911	psrld	$25,%xmm5
1912	pxor	%xmm3,%xmm5
1913.byte	102,15,58,15,237,4
1914.byte	102,69,15,58,15,201,8
1915.byte	102,69,15,58,15,237,12
1916	paddd	%xmm6,%xmm2
1917	pxor	%xmm2,%xmm14
1918	pshufb	L$rol16(%rip),%xmm14
1919	paddd	%xmm14,%xmm10
1920	pxor	%xmm10,%xmm6
1921	movdqa	%xmm6,%xmm3
1922	pslld	$12,%xmm3
1923	psrld	$20,%xmm6
1924	pxor	%xmm3,%xmm6
1925	paddd	%xmm6,%xmm2
1926	pxor	%xmm2,%xmm14
1927	pshufb	L$rol8(%rip),%xmm14
1928	paddd	%xmm14,%xmm10
1929	pxor	%xmm10,%xmm6
1930	movdqa	%xmm6,%xmm3
1931	pslld	$7,%xmm3
1932	psrld	$25,%xmm6
1933	pxor	%xmm3,%xmm6
1934.byte	102,15,58,15,246,4
1935.byte	102,69,15,58,15,210,8
1936.byte	102,69,15,58,15,246,12
1937	paddd	%xmm4,%xmm0
1938	pxor	%xmm0,%xmm12
1939	pshufb	L$rol16(%rip),%xmm12
1940	paddd	%xmm12,%xmm8
1941	pxor	%xmm8,%xmm4
1942	movdqa	%xmm4,%xmm3
1943	pslld	$12,%xmm3
1944	psrld	$20,%xmm4
1945	pxor	%xmm3,%xmm4
1946	paddd	%xmm4,%xmm0
1947	pxor	%xmm0,%xmm12
1948	pshufb	L$rol8(%rip),%xmm12
1949	paddd	%xmm12,%xmm8
1950	pxor	%xmm8,%xmm4
1951	movdqa	%xmm4,%xmm3
1952	pslld	$7,%xmm3
1953	psrld	$25,%xmm4
1954	pxor	%xmm3,%xmm4
1955.byte	102,15,58,15,228,12
1956.byte	102,69,15,58,15,192,8
1957.byte	102,69,15,58,15,228,4
1958	paddd	%xmm5,%xmm1
1959	pxor	%xmm1,%xmm13
1960	pshufb	L$rol16(%rip),%xmm13
1961	paddd	%xmm13,%xmm9
1962	pxor	%xmm9,%xmm5
1963	movdqa	%xmm5,%xmm3
1964	pslld	$12,%xmm3
1965	psrld	$20,%xmm5
1966	pxor	%xmm3,%xmm5
1967	paddd	%xmm5,%xmm1
1968	pxor	%xmm1,%xmm13
1969	pshufb	L$rol8(%rip),%xmm13
1970	paddd	%xmm13,%xmm9
1971	pxor	%xmm9,%xmm5
1972	movdqa	%xmm5,%xmm3
1973	pslld	$7,%xmm3
1974	psrld	$25,%xmm5
1975	pxor	%xmm3,%xmm5
1976.byte	102,15,58,15,237,12
1977.byte	102,69,15,58,15,201,8
1978.byte	102,69,15,58,15,237,4
1979	paddd	%xmm6,%xmm2
1980	pxor	%xmm2,%xmm14
1981	pshufb	L$rol16(%rip),%xmm14
1982	paddd	%xmm14,%xmm10
1983	pxor	%xmm10,%xmm6
1984	movdqa	%xmm6,%xmm3
1985	pslld	$12,%xmm3
1986	psrld	$20,%xmm6
1987	pxor	%xmm3,%xmm6
1988	paddd	%xmm6,%xmm2
1989	pxor	%xmm2,%xmm14
1990	pshufb	L$rol8(%rip),%xmm14
1991	paddd	%xmm14,%xmm10
1992	pxor	%xmm10,%xmm6
1993	movdqa	%xmm6,%xmm3
1994	pslld	$7,%xmm3
1995	psrld	$25,%xmm6
1996	pxor	%xmm3,%xmm6
1997.byte	102,15,58,15,246,12
1998.byte	102,69,15,58,15,210,8
1999.byte	102,69,15,58,15,246,4
2000
2001	decq	%r10
2002	jnz	L$open_sse_128_rounds
2003	paddd	L$chacha20_consts(%rip),%xmm0
2004	paddd	L$chacha20_consts(%rip),%xmm1
2005	paddd	L$chacha20_consts(%rip),%xmm2
2006	paddd	%xmm7,%xmm4
2007	paddd	%xmm7,%xmm5
2008	paddd	%xmm7,%xmm6
2009	paddd	%xmm11,%xmm9
2010	paddd	%xmm11,%xmm10
2011	paddd	%xmm15,%xmm13
2012	paddd	L$sse_inc(%rip),%xmm15
2013	paddd	%xmm15,%xmm14
2014
2015	pand	L$clamp(%rip),%xmm0
2016	movdqa	%xmm0,0+0(%rbp)
2017	movdqa	%xmm4,0+16(%rbp)
2018
2019	movq	%r8,%r8
2020	call	poly_hash_ad_internal
2021L$open_sse_128_xor_hash:
2022	cmpq	$16,%rbx
2023	jb	L$open_sse_tail_16
2024	subq	$16,%rbx
2025	addq	0+0(%rsi),%r10
2026	adcq	8+0(%rsi),%r11
2027	adcq	$1,%r12
2028
2029
2030	movdqu	0(%rsi),%xmm3
2031	pxor	%xmm3,%xmm1
2032	movdqu	%xmm1,0(%rdi)
2033	leaq	16(%rsi),%rsi
2034	leaq	16(%rdi),%rdi
2035	movq	0+0+0(%rbp),%rax
2036	movq	%rax,%r15
2037	mulq	%r10
2038	movq	%rax,%r13
2039	movq	%rdx,%r14
2040	movq	0+0+0(%rbp),%rax
2041	mulq	%r11
2042	imulq	%r12,%r15
2043	addq	%rax,%r14
2044	adcq	%rdx,%r15
2045	movq	8+0+0(%rbp),%rax
2046	movq	%rax,%r9
2047	mulq	%r10
2048	addq	%rax,%r14
2049	adcq	$0,%rdx
2050	movq	%rdx,%r10
2051	movq	8+0+0(%rbp),%rax
2052	mulq	%r11
2053	addq	%rax,%r15
2054	adcq	$0,%rdx
2055	imulq	%r12,%r9
2056	addq	%r10,%r15
2057	adcq	%rdx,%r9
2058	movq	%r13,%r10
2059	movq	%r14,%r11
2060	movq	%r15,%r12
2061	andq	$3,%r12
2062	movq	%r15,%r13
2063	andq	$-4,%r13
2064	movq	%r9,%r14
2065	shrdq	$2,%r9,%r15
2066	shrq	$2,%r9
2067	addq	%r13,%r15
2068	adcq	%r14,%r9
2069	addq	%r15,%r10
2070	adcq	%r9,%r11
2071	adcq	$0,%r12
2072
2073
2074	movdqa	%xmm5,%xmm1
2075	movdqa	%xmm9,%xmm5
2076	movdqa	%xmm13,%xmm9
2077	movdqa	%xmm2,%xmm13
2078	movdqa	%xmm6,%xmm2
2079	movdqa	%xmm10,%xmm6
2080	movdqa	%xmm14,%xmm10
2081	jmp	L$open_sse_128_xor_hash
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091.globl	_chacha20_poly1305_seal
2092.private_extern _chacha20_poly1305_seal
2093
2094.p2align	6
2095_chacha20_poly1305_seal:
2096
2097_CET_ENDBR
2098	pushq	%rbp
2099
2100	pushq	%rbx
2101
2102	pushq	%r12
2103
2104	pushq	%r13
2105
2106	pushq	%r14
2107
2108	pushq	%r15
2109
2110
2111
2112	pushq	%r9
2113
2114	subq	$288 + 0 + 32,%rsp
2115
2116	leaq	32(%rsp),%rbp
2117	andq	$-32,%rbp
2118
2119	movq	56(%r9),%rbx
2120	addq	%rdx,%rbx
2121	movq	%r8,0+0+32(%rbp)
2122	movq	%rbx,8+0+32(%rbp)
2123	movq	%rdx,%rbx
2124
2125	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
2126	andl	$288,%eax
2127	xorl	$288,%eax
2128	jz	chacha20_poly1305_seal_avx2
2129
2130	cmpq	$128,%rbx
2131	jbe	L$seal_sse_128
2132
2133	movdqa	L$chacha20_consts(%rip),%xmm0
2134	movdqu	0(%r9),%xmm4
2135	movdqu	16(%r9),%xmm8
2136	movdqu	32(%r9),%xmm12
2137
2138	movdqa	%xmm0,%xmm1
2139	movdqa	%xmm0,%xmm2
2140	movdqa	%xmm0,%xmm3
2141	movdqa	%xmm4,%xmm5
2142	movdqa	%xmm4,%xmm6
2143	movdqa	%xmm4,%xmm7
2144	movdqa	%xmm8,%xmm9
2145	movdqa	%xmm8,%xmm10
2146	movdqa	%xmm8,%xmm11
2147	movdqa	%xmm12,%xmm15
2148	paddd	L$sse_inc(%rip),%xmm12
2149	movdqa	%xmm12,%xmm14
2150	paddd	L$sse_inc(%rip),%xmm12
2151	movdqa	%xmm12,%xmm13
2152	paddd	L$sse_inc(%rip),%xmm12
2153
2154	movdqa	%xmm4,0+48(%rbp)
2155	movdqa	%xmm8,0+64(%rbp)
2156	movdqa	%xmm12,0+96(%rbp)
2157	movdqa	%xmm13,0+112(%rbp)
2158	movdqa	%xmm14,0+128(%rbp)
2159	movdqa	%xmm15,0+144(%rbp)
2160	movq	$10,%r10
2161L$seal_sse_init_rounds:
2162	movdqa	%xmm8,0+80(%rbp)
2163	movdqa	L$rol16(%rip),%xmm8
2164	paddd	%xmm7,%xmm3
2165	paddd	%xmm6,%xmm2
2166	paddd	%xmm5,%xmm1
2167	paddd	%xmm4,%xmm0
2168	pxor	%xmm3,%xmm15
2169	pxor	%xmm2,%xmm14
2170	pxor	%xmm1,%xmm13
2171	pxor	%xmm0,%xmm12
2172.byte	102,69,15,56,0,248
2173.byte	102,69,15,56,0,240
2174.byte	102,69,15,56,0,232
2175.byte	102,69,15,56,0,224
2176	movdqa	0+80(%rbp),%xmm8
2177	paddd	%xmm15,%xmm11
2178	paddd	%xmm14,%xmm10
2179	paddd	%xmm13,%xmm9
2180	paddd	%xmm12,%xmm8
2181	pxor	%xmm11,%xmm7
2182	pxor	%xmm10,%xmm6
2183	pxor	%xmm9,%xmm5
2184	pxor	%xmm8,%xmm4
2185	movdqa	%xmm8,0+80(%rbp)
2186	movdqa	%xmm7,%xmm8
2187	psrld	$20,%xmm8
2188	pslld	$32-20,%xmm7
2189	pxor	%xmm8,%xmm7
2190	movdqa	%xmm6,%xmm8
2191	psrld	$20,%xmm8
2192	pslld	$32-20,%xmm6
2193	pxor	%xmm8,%xmm6
2194	movdqa	%xmm5,%xmm8
2195	psrld	$20,%xmm8
2196	pslld	$32-20,%xmm5
2197	pxor	%xmm8,%xmm5
2198	movdqa	%xmm4,%xmm8
2199	psrld	$20,%xmm8
2200	pslld	$32-20,%xmm4
2201	pxor	%xmm8,%xmm4
2202	movdqa	L$rol8(%rip),%xmm8
2203	paddd	%xmm7,%xmm3
2204	paddd	%xmm6,%xmm2
2205	paddd	%xmm5,%xmm1
2206	paddd	%xmm4,%xmm0
2207	pxor	%xmm3,%xmm15
2208	pxor	%xmm2,%xmm14
2209	pxor	%xmm1,%xmm13
2210	pxor	%xmm0,%xmm12
2211.byte	102,69,15,56,0,248
2212.byte	102,69,15,56,0,240
2213.byte	102,69,15,56,0,232
2214.byte	102,69,15,56,0,224
2215	movdqa	0+80(%rbp),%xmm8
2216	paddd	%xmm15,%xmm11
2217	paddd	%xmm14,%xmm10
2218	paddd	%xmm13,%xmm9
2219	paddd	%xmm12,%xmm8
2220	pxor	%xmm11,%xmm7
2221	pxor	%xmm10,%xmm6
2222	pxor	%xmm9,%xmm5
2223	pxor	%xmm8,%xmm4
2224	movdqa	%xmm8,0+80(%rbp)
2225	movdqa	%xmm7,%xmm8
2226	psrld	$25,%xmm8
2227	pslld	$32-25,%xmm7
2228	pxor	%xmm8,%xmm7
2229	movdqa	%xmm6,%xmm8
2230	psrld	$25,%xmm8
2231	pslld	$32-25,%xmm6
2232	pxor	%xmm8,%xmm6
2233	movdqa	%xmm5,%xmm8
2234	psrld	$25,%xmm8
2235	pslld	$32-25,%xmm5
2236	pxor	%xmm8,%xmm5
2237	movdqa	%xmm4,%xmm8
2238	psrld	$25,%xmm8
2239	pslld	$32-25,%xmm4
2240	pxor	%xmm8,%xmm4
2241	movdqa	0+80(%rbp),%xmm8
2242.byte	102,15,58,15,255,4
2243.byte	102,69,15,58,15,219,8
2244.byte	102,69,15,58,15,255,12
2245.byte	102,15,58,15,246,4
2246.byte	102,69,15,58,15,210,8
2247.byte	102,69,15,58,15,246,12
2248.byte	102,15,58,15,237,4
2249.byte	102,69,15,58,15,201,8
2250.byte	102,69,15,58,15,237,12
2251.byte	102,15,58,15,228,4
2252.byte	102,69,15,58,15,192,8
2253.byte	102,69,15,58,15,228,12
2254	movdqa	%xmm8,0+80(%rbp)
2255	movdqa	L$rol16(%rip),%xmm8
2256	paddd	%xmm7,%xmm3
2257	paddd	%xmm6,%xmm2
2258	paddd	%xmm5,%xmm1
2259	paddd	%xmm4,%xmm0
2260	pxor	%xmm3,%xmm15
2261	pxor	%xmm2,%xmm14
2262	pxor	%xmm1,%xmm13
2263	pxor	%xmm0,%xmm12
2264.byte	102,69,15,56,0,248
2265.byte	102,69,15,56,0,240
2266.byte	102,69,15,56,0,232
2267.byte	102,69,15,56,0,224
2268	movdqa	0+80(%rbp),%xmm8
2269	paddd	%xmm15,%xmm11
2270	paddd	%xmm14,%xmm10
2271	paddd	%xmm13,%xmm9
2272	paddd	%xmm12,%xmm8
2273	pxor	%xmm11,%xmm7
2274	pxor	%xmm10,%xmm6
2275	pxor	%xmm9,%xmm5
2276	pxor	%xmm8,%xmm4
2277	movdqa	%xmm8,0+80(%rbp)
2278	movdqa	%xmm7,%xmm8
2279	psrld	$20,%xmm8
2280	pslld	$32-20,%xmm7
2281	pxor	%xmm8,%xmm7
2282	movdqa	%xmm6,%xmm8
2283	psrld	$20,%xmm8
2284	pslld	$32-20,%xmm6
2285	pxor	%xmm8,%xmm6
2286	movdqa	%xmm5,%xmm8
2287	psrld	$20,%xmm8
2288	pslld	$32-20,%xmm5
2289	pxor	%xmm8,%xmm5
2290	movdqa	%xmm4,%xmm8
2291	psrld	$20,%xmm8
2292	pslld	$32-20,%xmm4
2293	pxor	%xmm8,%xmm4
2294	movdqa	L$rol8(%rip),%xmm8
2295	paddd	%xmm7,%xmm3
2296	paddd	%xmm6,%xmm2
2297	paddd	%xmm5,%xmm1
2298	paddd	%xmm4,%xmm0
2299	pxor	%xmm3,%xmm15
2300	pxor	%xmm2,%xmm14
2301	pxor	%xmm1,%xmm13
2302	pxor	%xmm0,%xmm12
2303.byte	102,69,15,56,0,248
2304.byte	102,69,15,56,0,240
2305.byte	102,69,15,56,0,232
2306.byte	102,69,15,56,0,224
2307	movdqa	0+80(%rbp),%xmm8
2308	paddd	%xmm15,%xmm11
2309	paddd	%xmm14,%xmm10
2310	paddd	%xmm13,%xmm9
2311	paddd	%xmm12,%xmm8
2312	pxor	%xmm11,%xmm7
2313	pxor	%xmm10,%xmm6
2314	pxor	%xmm9,%xmm5
2315	pxor	%xmm8,%xmm4
2316	movdqa	%xmm8,0+80(%rbp)
2317	movdqa	%xmm7,%xmm8
2318	psrld	$25,%xmm8
2319	pslld	$32-25,%xmm7
2320	pxor	%xmm8,%xmm7
2321	movdqa	%xmm6,%xmm8
2322	psrld	$25,%xmm8
2323	pslld	$32-25,%xmm6
2324	pxor	%xmm8,%xmm6
2325	movdqa	%xmm5,%xmm8
2326	psrld	$25,%xmm8
2327	pslld	$32-25,%xmm5
2328	pxor	%xmm8,%xmm5
2329	movdqa	%xmm4,%xmm8
2330	psrld	$25,%xmm8
2331	pslld	$32-25,%xmm4
2332	pxor	%xmm8,%xmm4
2333	movdqa	0+80(%rbp),%xmm8
2334.byte	102,15,58,15,255,12
2335.byte	102,69,15,58,15,219,8
2336.byte	102,69,15,58,15,255,4
2337.byte	102,15,58,15,246,12
2338.byte	102,69,15,58,15,210,8
2339.byte	102,69,15,58,15,246,4
2340.byte	102,15,58,15,237,12
2341.byte	102,69,15,58,15,201,8
2342.byte	102,69,15,58,15,237,4
2343.byte	102,15,58,15,228,12
2344.byte	102,69,15,58,15,192,8
2345.byte	102,69,15,58,15,228,4
2346
2347	decq	%r10
2348	jnz	L$seal_sse_init_rounds
2349	paddd	L$chacha20_consts(%rip),%xmm3
2350	paddd	0+48(%rbp),%xmm7
2351	paddd	0+64(%rbp),%xmm11
2352	paddd	0+144(%rbp),%xmm15
2353	paddd	L$chacha20_consts(%rip),%xmm2
2354	paddd	0+48(%rbp),%xmm6
2355	paddd	0+64(%rbp),%xmm10
2356	paddd	0+128(%rbp),%xmm14
2357	paddd	L$chacha20_consts(%rip),%xmm1
2358	paddd	0+48(%rbp),%xmm5
2359	paddd	0+64(%rbp),%xmm9
2360	paddd	0+112(%rbp),%xmm13
2361	paddd	L$chacha20_consts(%rip),%xmm0
2362	paddd	0+48(%rbp),%xmm4
2363	paddd	0+64(%rbp),%xmm8
2364	paddd	0+96(%rbp),%xmm12
2365
2366
2367	pand	L$clamp(%rip),%xmm3
2368	movdqa	%xmm3,0+0(%rbp)
2369	movdqa	%xmm7,0+16(%rbp)
2370
2371	movq	%r8,%r8
2372	call	poly_hash_ad_internal
2373	movdqu	0 + 0(%rsi),%xmm3
2374	movdqu	16 + 0(%rsi),%xmm7
2375	movdqu	32 + 0(%rsi),%xmm11
2376	movdqu	48 + 0(%rsi),%xmm15
2377	pxor	%xmm3,%xmm2
2378	pxor	%xmm7,%xmm6
2379	pxor	%xmm11,%xmm10
2380	pxor	%xmm14,%xmm15
2381	movdqu	%xmm2,0 + 0(%rdi)
2382	movdqu	%xmm6,16 + 0(%rdi)
2383	movdqu	%xmm10,32 + 0(%rdi)
2384	movdqu	%xmm15,48 + 0(%rdi)
2385	movdqu	0 + 64(%rsi),%xmm3
2386	movdqu	16 + 64(%rsi),%xmm7
2387	movdqu	32 + 64(%rsi),%xmm11
2388	movdqu	48 + 64(%rsi),%xmm15
2389	pxor	%xmm3,%xmm1
2390	pxor	%xmm7,%xmm5
2391	pxor	%xmm11,%xmm9
2392	pxor	%xmm13,%xmm15
2393	movdqu	%xmm1,0 + 64(%rdi)
2394	movdqu	%xmm5,16 + 64(%rdi)
2395	movdqu	%xmm9,32 + 64(%rdi)
2396	movdqu	%xmm15,48 + 64(%rdi)
2397
2398	cmpq	$192,%rbx
2399	ja	L$seal_sse_main_init
2400	movq	$128,%rcx
2401	subq	$128,%rbx
2402	leaq	128(%rsi),%rsi
2403	jmp	L$seal_sse_128_tail_hash
2404L$seal_sse_main_init:
2405	movdqu	0 + 128(%rsi),%xmm3
2406	movdqu	16 + 128(%rsi),%xmm7
2407	movdqu	32 + 128(%rsi),%xmm11
2408	movdqu	48 + 128(%rsi),%xmm15
2409	pxor	%xmm3,%xmm0
2410	pxor	%xmm7,%xmm4
2411	pxor	%xmm11,%xmm8
2412	pxor	%xmm12,%xmm15
2413	movdqu	%xmm0,0 + 128(%rdi)
2414	movdqu	%xmm4,16 + 128(%rdi)
2415	movdqu	%xmm8,32 + 128(%rdi)
2416	movdqu	%xmm15,48 + 128(%rdi)
2417
2418	movq	$192,%rcx
2419	subq	$192,%rbx
2420	leaq	192(%rsi),%rsi
2421	movq	$2,%rcx
2422	movq	$8,%r8
2423	cmpq	$64,%rbx
2424	jbe	L$seal_sse_tail_64
2425	cmpq	$128,%rbx
2426	jbe	L$seal_sse_tail_128
2427	cmpq	$192,%rbx
2428	jbe	L$seal_sse_tail_192
2429
2430L$seal_sse_main_loop:
2431	movdqa	L$chacha20_consts(%rip),%xmm0
2432	movdqa	0+48(%rbp),%xmm4
2433	movdqa	0+64(%rbp),%xmm8
2434	movdqa	%xmm0,%xmm1
2435	movdqa	%xmm4,%xmm5
2436	movdqa	%xmm8,%xmm9
2437	movdqa	%xmm0,%xmm2
2438	movdqa	%xmm4,%xmm6
2439	movdqa	%xmm8,%xmm10
2440	movdqa	%xmm0,%xmm3
2441	movdqa	%xmm4,%xmm7
2442	movdqa	%xmm8,%xmm11
2443	movdqa	0+96(%rbp),%xmm15
2444	paddd	L$sse_inc(%rip),%xmm15
2445	movdqa	%xmm15,%xmm14
2446	paddd	L$sse_inc(%rip),%xmm14
2447	movdqa	%xmm14,%xmm13
2448	paddd	L$sse_inc(%rip),%xmm13
2449	movdqa	%xmm13,%xmm12
2450	paddd	L$sse_inc(%rip),%xmm12
2451	movdqa	%xmm12,0+96(%rbp)
2452	movdqa	%xmm13,0+112(%rbp)
2453	movdqa	%xmm14,0+128(%rbp)
2454	movdqa	%xmm15,0+144(%rbp)
2455
2456.p2align	5
2457L$seal_sse_main_rounds:
2458	movdqa	%xmm8,0+80(%rbp)
2459	movdqa	L$rol16(%rip),%xmm8
2460	paddd	%xmm7,%xmm3
2461	paddd	%xmm6,%xmm2
2462	paddd	%xmm5,%xmm1
2463	paddd	%xmm4,%xmm0
2464	pxor	%xmm3,%xmm15
2465	pxor	%xmm2,%xmm14
2466	pxor	%xmm1,%xmm13
2467	pxor	%xmm0,%xmm12
2468.byte	102,69,15,56,0,248
2469.byte	102,69,15,56,0,240
2470.byte	102,69,15,56,0,232
2471.byte	102,69,15,56,0,224
2472	movdqa	0+80(%rbp),%xmm8
2473	paddd	%xmm15,%xmm11
2474	paddd	%xmm14,%xmm10
2475	paddd	%xmm13,%xmm9
2476	paddd	%xmm12,%xmm8
2477	pxor	%xmm11,%xmm7
2478	addq	0+0(%rdi),%r10
2479	adcq	8+0(%rdi),%r11
2480	adcq	$1,%r12
2481	pxor	%xmm10,%xmm6
2482	pxor	%xmm9,%xmm5
2483	pxor	%xmm8,%xmm4
2484	movdqa	%xmm8,0+80(%rbp)
2485	movdqa	%xmm7,%xmm8
2486	psrld	$20,%xmm8
2487	pslld	$32-20,%xmm7
2488	pxor	%xmm8,%xmm7
2489	movdqa	%xmm6,%xmm8
2490	psrld	$20,%xmm8
2491	pslld	$32-20,%xmm6
2492	pxor	%xmm8,%xmm6
2493	movdqa	%xmm5,%xmm8
2494	psrld	$20,%xmm8
2495	pslld	$32-20,%xmm5
2496	pxor	%xmm8,%xmm5
2497	movdqa	%xmm4,%xmm8
2498	psrld	$20,%xmm8
2499	pslld	$32-20,%xmm4
2500	pxor	%xmm8,%xmm4
2501	movq	0+0+0(%rbp),%rax
2502	movq	%rax,%r15
2503	mulq	%r10
2504	movq	%rax,%r13
2505	movq	%rdx,%r14
2506	movq	0+0+0(%rbp),%rax
2507	mulq	%r11
2508	imulq	%r12,%r15
2509	addq	%rax,%r14
2510	adcq	%rdx,%r15
2511	movdqa	L$rol8(%rip),%xmm8
2512	paddd	%xmm7,%xmm3
2513	paddd	%xmm6,%xmm2
2514	paddd	%xmm5,%xmm1
2515	paddd	%xmm4,%xmm0
2516	pxor	%xmm3,%xmm15
2517	pxor	%xmm2,%xmm14
2518	pxor	%xmm1,%xmm13
2519	pxor	%xmm0,%xmm12
2520.byte	102,69,15,56,0,248
2521.byte	102,69,15,56,0,240
2522.byte	102,69,15,56,0,232
2523.byte	102,69,15,56,0,224
2524	movdqa	0+80(%rbp),%xmm8
2525	paddd	%xmm15,%xmm11
2526	paddd	%xmm14,%xmm10
2527	paddd	%xmm13,%xmm9
2528	paddd	%xmm12,%xmm8
2529	pxor	%xmm11,%xmm7
2530	pxor	%xmm10,%xmm6
2531	movq	8+0+0(%rbp),%rax
2532	movq	%rax,%r9
2533	mulq	%r10
2534	addq	%rax,%r14
2535	adcq	$0,%rdx
2536	movq	%rdx,%r10
2537	movq	8+0+0(%rbp),%rax
2538	mulq	%r11
2539	addq	%rax,%r15
2540	adcq	$0,%rdx
2541	pxor	%xmm9,%xmm5
2542	pxor	%xmm8,%xmm4
2543	movdqa	%xmm8,0+80(%rbp)
2544	movdqa	%xmm7,%xmm8
2545	psrld	$25,%xmm8
2546	pslld	$32-25,%xmm7
2547	pxor	%xmm8,%xmm7
2548	movdqa	%xmm6,%xmm8
2549	psrld	$25,%xmm8
2550	pslld	$32-25,%xmm6
2551	pxor	%xmm8,%xmm6
2552	movdqa	%xmm5,%xmm8
2553	psrld	$25,%xmm8
2554	pslld	$32-25,%xmm5
2555	pxor	%xmm8,%xmm5
2556	movdqa	%xmm4,%xmm8
2557	psrld	$25,%xmm8
2558	pslld	$32-25,%xmm4
2559	pxor	%xmm8,%xmm4
2560	movdqa	0+80(%rbp),%xmm8
2561	imulq	%r12,%r9
2562	addq	%r10,%r15
2563	adcq	%rdx,%r9
2564.byte	102,15,58,15,255,4
2565.byte	102,69,15,58,15,219,8
2566.byte	102,69,15,58,15,255,12
2567.byte	102,15,58,15,246,4
2568.byte	102,69,15,58,15,210,8
2569.byte	102,69,15,58,15,246,12
2570.byte	102,15,58,15,237,4
2571.byte	102,69,15,58,15,201,8
2572.byte	102,69,15,58,15,237,12
2573.byte	102,15,58,15,228,4
2574.byte	102,69,15,58,15,192,8
2575.byte	102,69,15,58,15,228,12
2576	movdqa	%xmm8,0+80(%rbp)
2577	movdqa	L$rol16(%rip),%xmm8
2578	paddd	%xmm7,%xmm3
2579	paddd	%xmm6,%xmm2
2580	paddd	%xmm5,%xmm1
2581	paddd	%xmm4,%xmm0
2582	pxor	%xmm3,%xmm15
2583	pxor	%xmm2,%xmm14
2584	movq	%r13,%r10
2585	movq	%r14,%r11
2586	movq	%r15,%r12
2587	andq	$3,%r12
2588	movq	%r15,%r13
2589	andq	$-4,%r13
2590	movq	%r9,%r14
2591	shrdq	$2,%r9,%r15
2592	shrq	$2,%r9
2593	addq	%r13,%r15
2594	adcq	%r14,%r9
2595	addq	%r15,%r10
2596	adcq	%r9,%r11
2597	adcq	$0,%r12
2598	pxor	%xmm1,%xmm13
2599	pxor	%xmm0,%xmm12
2600.byte	102,69,15,56,0,248
2601.byte	102,69,15,56,0,240
2602.byte	102,69,15,56,0,232
2603.byte	102,69,15,56,0,224
2604	movdqa	0+80(%rbp),%xmm8
2605	paddd	%xmm15,%xmm11
2606	paddd	%xmm14,%xmm10
2607	paddd	%xmm13,%xmm9
2608	paddd	%xmm12,%xmm8
2609	pxor	%xmm11,%xmm7
2610	pxor	%xmm10,%xmm6
2611	pxor	%xmm9,%xmm5
2612	pxor	%xmm8,%xmm4
2613	movdqa	%xmm8,0+80(%rbp)
2614	movdqa	%xmm7,%xmm8
2615	psrld	$20,%xmm8
2616	pslld	$32-20,%xmm7
2617	pxor	%xmm8,%xmm7
2618	movdqa	%xmm6,%xmm8
2619	psrld	$20,%xmm8
2620	pslld	$32-20,%xmm6
2621	pxor	%xmm8,%xmm6
2622	movdqa	%xmm5,%xmm8
2623	psrld	$20,%xmm8
2624	pslld	$32-20,%xmm5
2625	pxor	%xmm8,%xmm5
2626	movdqa	%xmm4,%xmm8
2627	psrld	$20,%xmm8
2628	pslld	$32-20,%xmm4
2629	pxor	%xmm8,%xmm4
2630	movdqa	L$rol8(%rip),%xmm8
2631	paddd	%xmm7,%xmm3
2632	paddd	%xmm6,%xmm2
2633	paddd	%xmm5,%xmm1
2634	paddd	%xmm4,%xmm0
2635	pxor	%xmm3,%xmm15
2636	pxor	%xmm2,%xmm14
2637	pxor	%xmm1,%xmm13
2638	pxor	%xmm0,%xmm12
2639.byte	102,69,15,56,0,248
2640.byte	102,69,15,56,0,240
2641.byte	102,69,15,56,0,232
2642.byte	102,69,15,56,0,224
2643	movdqa	0+80(%rbp),%xmm8
2644	paddd	%xmm15,%xmm11
2645	paddd	%xmm14,%xmm10
2646	paddd	%xmm13,%xmm9
2647	paddd	%xmm12,%xmm8
2648	pxor	%xmm11,%xmm7
2649	pxor	%xmm10,%xmm6
2650	pxor	%xmm9,%xmm5
2651	pxor	%xmm8,%xmm4
2652	movdqa	%xmm8,0+80(%rbp)
2653	movdqa	%xmm7,%xmm8
2654	psrld	$25,%xmm8
2655	pslld	$32-25,%xmm7
2656	pxor	%xmm8,%xmm7
2657	movdqa	%xmm6,%xmm8
2658	psrld	$25,%xmm8
2659	pslld	$32-25,%xmm6
2660	pxor	%xmm8,%xmm6
2661	movdqa	%xmm5,%xmm8
2662	psrld	$25,%xmm8
2663	pslld	$32-25,%xmm5
2664	pxor	%xmm8,%xmm5
2665	movdqa	%xmm4,%xmm8
2666	psrld	$25,%xmm8
2667	pslld	$32-25,%xmm4
2668	pxor	%xmm8,%xmm4
2669	movdqa	0+80(%rbp),%xmm8
2670.byte	102,15,58,15,255,12
2671.byte	102,69,15,58,15,219,8
2672.byte	102,69,15,58,15,255,4
2673.byte	102,15,58,15,246,12
2674.byte	102,69,15,58,15,210,8
2675.byte	102,69,15,58,15,246,4
2676.byte	102,15,58,15,237,12
2677.byte	102,69,15,58,15,201,8
2678.byte	102,69,15,58,15,237,4
2679.byte	102,15,58,15,228,12
2680.byte	102,69,15,58,15,192,8
2681.byte	102,69,15,58,15,228,4
2682
2683	leaq	16(%rdi),%rdi
2684	decq	%r8
2685	jge	L$seal_sse_main_rounds
2686	addq	0+0(%rdi),%r10
2687	adcq	8+0(%rdi),%r11
2688	adcq	$1,%r12
2689	movq	0+0+0(%rbp),%rax
2690	movq	%rax,%r15
2691	mulq	%r10
2692	movq	%rax,%r13
2693	movq	%rdx,%r14
2694	movq	0+0+0(%rbp),%rax
2695	mulq	%r11
2696	imulq	%r12,%r15
2697	addq	%rax,%r14
2698	adcq	%rdx,%r15
2699	movq	8+0+0(%rbp),%rax
2700	movq	%rax,%r9
2701	mulq	%r10
2702	addq	%rax,%r14
2703	adcq	$0,%rdx
2704	movq	%rdx,%r10
2705	movq	8+0+0(%rbp),%rax
2706	mulq	%r11
2707	addq	%rax,%r15
2708	adcq	$0,%rdx
2709	imulq	%r12,%r9
2710	addq	%r10,%r15
2711	adcq	%rdx,%r9
2712	movq	%r13,%r10
2713	movq	%r14,%r11
2714	movq	%r15,%r12
2715	andq	$3,%r12
2716	movq	%r15,%r13
2717	andq	$-4,%r13
2718	movq	%r9,%r14
2719	shrdq	$2,%r9,%r15
2720	shrq	$2,%r9
2721	addq	%r13,%r15
2722	adcq	%r14,%r9
2723	addq	%r15,%r10
2724	adcq	%r9,%r11
2725	adcq	$0,%r12
2726
2727	leaq	16(%rdi),%rdi
2728	decq	%rcx
2729	jg	L$seal_sse_main_rounds
2730	paddd	L$chacha20_consts(%rip),%xmm3
2731	paddd	0+48(%rbp),%xmm7
2732	paddd	0+64(%rbp),%xmm11
2733	paddd	0+144(%rbp),%xmm15
2734	paddd	L$chacha20_consts(%rip),%xmm2
2735	paddd	0+48(%rbp),%xmm6
2736	paddd	0+64(%rbp),%xmm10
2737	paddd	0+128(%rbp),%xmm14
2738	paddd	L$chacha20_consts(%rip),%xmm1
2739	paddd	0+48(%rbp),%xmm5
2740	paddd	0+64(%rbp),%xmm9
2741	paddd	0+112(%rbp),%xmm13
2742	paddd	L$chacha20_consts(%rip),%xmm0
2743	paddd	0+48(%rbp),%xmm4
2744	paddd	0+64(%rbp),%xmm8
2745	paddd	0+96(%rbp),%xmm12
2746
2747	movdqa	%xmm14,0+80(%rbp)
2748	movdqa	%xmm14,0+80(%rbp)
2749	movdqu	0 + 0(%rsi),%xmm14
2750	pxor	%xmm3,%xmm14
2751	movdqu	%xmm14,0 + 0(%rdi)
2752	movdqu	16 + 0(%rsi),%xmm14
2753	pxor	%xmm7,%xmm14
2754	movdqu	%xmm14,16 + 0(%rdi)
2755	movdqu	32 + 0(%rsi),%xmm14
2756	pxor	%xmm11,%xmm14
2757	movdqu	%xmm14,32 + 0(%rdi)
2758	movdqu	48 + 0(%rsi),%xmm14
2759	pxor	%xmm15,%xmm14
2760	movdqu	%xmm14,48 + 0(%rdi)
2761
2762	movdqa	0+80(%rbp),%xmm14
2763	movdqu	0 + 64(%rsi),%xmm3
2764	movdqu	16 + 64(%rsi),%xmm7
2765	movdqu	32 + 64(%rsi),%xmm11
2766	movdqu	48 + 64(%rsi),%xmm15
2767	pxor	%xmm3,%xmm2
2768	pxor	%xmm7,%xmm6
2769	pxor	%xmm11,%xmm10
2770	pxor	%xmm14,%xmm15
2771	movdqu	%xmm2,0 + 64(%rdi)
2772	movdqu	%xmm6,16 + 64(%rdi)
2773	movdqu	%xmm10,32 + 64(%rdi)
2774	movdqu	%xmm15,48 + 64(%rdi)
2775	movdqu	0 + 128(%rsi),%xmm3
2776	movdqu	16 + 128(%rsi),%xmm7
2777	movdqu	32 + 128(%rsi),%xmm11
2778	movdqu	48 + 128(%rsi),%xmm15
2779	pxor	%xmm3,%xmm1
2780	pxor	%xmm7,%xmm5
2781	pxor	%xmm11,%xmm9
2782	pxor	%xmm13,%xmm15
2783	movdqu	%xmm1,0 + 128(%rdi)
2784	movdqu	%xmm5,16 + 128(%rdi)
2785	movdqu	%xmm9,32 + 128(%rdi)
2786	movdqu	%xmm15,48 + 128(%rdi)
2787
2788	cmpq	$256,%rbx
2789	ja	L$seal_sse_main_loop_xor
2790
2791	movq	$192,%rcx
2792	subq	$192,%rbx
2793	leaq	192(%rsi),%rsi
2794	jmp	L$seal_sse_128_tail_hash
2795L$seal_sse_main_loop_xor:
2796	movdqu	0 + 192(%rsi),%xmm3
2797	movdqu	16 + 192(%rsi),%xmm7
2798	movdqu	32 + 192(%rsi),%xmm11
2799	movdqu	48 + 192(%rsi),%xmm15
2800	pxor	%xmm3,%xmm0
2801	pxor	%xmm7,%xmm4
2802	pxor	%xmm11,%xmm8
2803	pxor	%xmm12,%xmm15
2804	movdqu	%xmm0,0 + 192(%rdi)
2805	movdqu	%xmm4,16 + 192(%rdi)
2806	movdqu	%xmm8,32 + 192(%rdi)
2807	movdqu	%xmm15,48 + 192(%rdi)
2808
2809	leaq	256(%rsi),%rsi
2810	subq	$256,%rbx
2811	movq	$6,%rcx
2812	movq	$4,%r8
2813	cmpq	$192,%rbx
2814	jg	L$seal_sse_main_loop
2815	movq	%rbx,%rcx
2816	testq	%rbx,%rbx
2817	je	L$seal_sse_128_tail_hash
2818	movq	$6,%rcx
2819	cmpq	$128,%rbx
2820	ja	L$seal_sse_tail_192
2821	cmpq	$64,%rbx
2822	ja	L$seal_sse_tail_128
2823
2824L$seal_sse_tail_64:
2825	movdqa	L$chacha20_consts(%rip),%xmm0
2826	movdqa	0+48(%rbp),%xmm4
2827	movdqa	0+64(%rbp),%xmm8
2828	movdqa	0+96(%rbp),%xmm12
2829	paddd	L$sse_inc(%rip),%xmm12
2830	movdqa	%xmm12,0+96(%rbp)
2831
2832L$seal_sse_tail_64_rounds_and_x2hash:
2833	addq	0+0(%rdi),%r10
2834	adcq	8+0(%rdi),%r11
2835	adcq	$1,%r12
2836	movq	0+0+0(%rbp),%rax
2837	movq	%rax,%r15
2838	mulq	%r10
2839	movq	%rax,%r13
2840	movq	%rdx,%r14
2841	movq	0+0+0(%rbp),%rax
2842	mulq	%r11
2843	imulq	%r12,%r15
2844	addq	%rax,%r14
2845	adcq	%rdx,%r15
2846	movq	8+0+0(%rbp),%rax
2847	movq	%rax,%r9
2848	mulq	%r10
2849	addq	%rax,%r14
2850	adcq	$0,%rdx
2851	movq	%rdx,%r10
2852	movq	8+0+0(%rbp),%rax
2853	mulq	%r11
2854	addq	%rax,%r15
2855	adcq	$0,%rdx
2856	imulq	%r12,%r9
2857	addq	%r10,%r15
2858	adcq	%rdx,%r9
2859	movq	%r13,%r10
2860	movq	%r14,%r11
2861	movq	%r15,%r12
2862	andq	$3,%r12
2863	movq	%r15,%r13
2864	andq	$-4,%r13
2865	movq	%r9,%r14
2866	shrdq	$2,%r9,%r15
2867	shrq	$2,%r9
2868	addq	%r13,%r15
2869	adcq	%r14,%r9
2870	addq	%r15,%r10
2871	adcq	%r9,%r11
2872	adcq	$0,%r12
2873
2874	leaq	16(%rdi),%rdi
2875L$seal_sse_tail_64_rounds_and_x1hash:
2876	paddd	%xmm4,%xmm0
2877	pxor	%xmm0,%xmm12
2878	pshufb	L$rol16(%rip),%xmm12
2879	paddd	%xmm12,%xmm8
2880	pxor	%xmm8,%xmm4
2881	movdqa	%xmm4,%xmm3
2882	pslld	$12,%xmm3
2883	psrld	$20,%xmm4
2884	pxor	%xmm3,%xmm4
2885	paddd	%xmm4,%xmm0
2886	pxor	%xmm0,%xmm12
2887	pshufb	L$rol8(%rip),%xmm12
2888	paddd	%xmm12,%xmm8
2889	pxor	%xmm8,%xmm4
2890	movdqa	%xmm4,%xmm3
2891	pslld	$7,%xmm3
2892	psrld	$25,%xmm4
2893	pxor	%xmm3,%xmm4
2894.byte	102,15,58,15,228,4
2895.byte	102,69,15,58,15,192,8
2896.byte	102,69,15,58,15,228,12
2897	paddd	%xmm4,%xmm0
2898	pxor	%xmm0,%xmm12
2899	pshufb	L$rol16(%rip),%xmm12
2900	paddd	%xmm12,%xmm8
2901	pxor	%xmm8,%xmm4
2902	movdqa	%xmm4,%xmm3
2903	pslld	$12,%xmm3
2904	psrld	$20,%xmm4
2905	pxor	%xmm3,%xmm4
2906	paddd	%xmm4,%xmm0
2907	pxor	%xmm0,%xmm12
2908	pshufb	L$rol8(%rip),%xmm12
2909	paddd	%xmm12,%xmm8
2910	pxor	%xmm8,%xmm4
2911	movdqa	%xmm4,%xmm3
2912	pslld	$7,%xmm3
2913	psrld	$25,%xmm4
2914	pxor	%xmm3,%xmm4
2915.byte	102,15,58,15,228,12
2916.byte	102,69,15,58,15,192,8
2917.byte	102,69,15,58,15,228,4
2918	addq	0+0(%rdi),%r10
2919	adcq	8+0(%rdi),%r11
2920	adcq	$1,%r12
2921	movq	0+0+0(%rbp),%rax
2922	movq	%rax,%r15
2923	mulq	%r10
2924	movq	%rax,%r13
2925	movq	%rdx,%r14
2926	movq	0+0+0(%rbp),%rax
2927	mulq	%r11
2928	imulq	%r12,%r15
2929	addq	%rax,%r14
2930	adcq	%rdx,%r15
2931	movq	8+0+0(%rbp),%rax
2932	movq	%rax,%r9
2933	mulq	%r10
2934	addq	%rax,%r14
2935	adcq	$0,%rdx
2936	movq	%rdx,%r10
2937	movq	8+0+0(%rbp),%rax
2938	mulq	%r11
2939	addq	%rax,%r15
2940	adcq	$0,%rdx
2941	imulq	%r12,%r9
2942	addq	%r10,%r15
2943	adcq	%rdx,%r9
2944	movq	%r13,%r10
2945	movq	%r14,%r11
2946	movq	%r15,%r12
2947	andq	$3,%r12
2948	movq	%r15,%r13
2949	andq	$-4,%r13
2950	movq	%r9,%r14
2951	shrdq	$2,%r9,%r15
2952	shrq	$2,%r9
2953	addq	%r13,%r15
2954	adcq	%r14,%r9
2955	addq	%r15,%r10
2956	adcq	%r9,%r11
2957	adcq	$0,%r12
2958
2959	leaq	16(%rdi),%rdi
2960	decq	%rcx
2961	jg	L$seal_sse_tail_64_rounds_and_x2hash
2962	decq	%r8
2963	jge	L$seal_sse_tail_64_rounds_and_x1hash
2964	paddd	L$chacha20_consts(%rip),%xmm0
2965	paddd	0+48(%rbp),%xmm4
2966	paddd	0+64(%rbp),%xmm8
2967	paddd	0+96(%rbp),%xmm12
2968
2969	jmp	L$seal_sse_128_tail_xor
2970
2971L$seal_sse_tail_128:
2972	movdqa	L$chacha20_consts(%rip),%xmm0
2973	movdqa	0+48(%rbp),%xmm4
2974	movdqa	0+64(%rbp),%xmm8
2975	movdqa	%xmm0,%xmm1
2976	movdqa	%xmm4,%xmm5
2977	movdqa	%xmm8,%xmm9
2978	movdqa	0+96(%rbp),%xmm13
2979	paddd	L$sse_inc(%rip),%xmm13
2980	movdqa	%xmm13,%xmm12
2981	paddd	L$sse_inc(%rip),%xmm12
2982	movdqa	%xmm12,0+96(%rbp)
2983	movdqa	%xmm13,0+112(%rbp)
2984
2985L$seal_sse_tail_128_rounds_and_x2hash:
2986	addq	0+0(%rdi),%r10
2987	adcq	8+0(%rdi),%r11
2988	adcq	$1,%r12
2989	movq	0+0+0(%rbp),%rax
2990	movq	%rax,%r15
2991	mulq	%r10
2992	movq	%rax,%r13
2993	movq	%rdx,%r14
2994	movq	0+0+0(%rbp),%rax
2995	mulq	%r11
2996	imulq	%r12,%r15
2997	addq	%rax,%r14
2998	adcq	%rdx,%r15
2999	movq	8+0+0(%rbp),%rax
3000	movq	%rax,%r9
3001	mulq	%r10
3002	addq	%rax,%r14
3003	adcq	$0,%rdx
3004	movq	%rdx,%r10
3005	movq	8+0+0(%rbp),%rax
3006	mulq	%r11
3007	addq	%rax,%r15
3008	adcq	$0,%rdx
3009	imulq	%r12,%r9
3010	addq	%r10,%r15
3011	adcq	%rdx,%r9
3012	movq	%r13,%r10
3013	movq	%r14,%r11
3014	movq	%r15,%r12
3015	andq	$3,%r12
3016	movq	%r15,%r13
3017	andq	$-4,%r13
3018	movq	%r9,%r14
3019	shrdq	$2,%r9,%r15
3020	shrq	$2,%r9
3021	addq	%r13,%r15
3022	adcq	%r14,%r9
3023	addq	%r15,%r10
3024	adcq	%r9,%r11
3025	adcq	$0,%r12
3026
3027	leaq	16(%rdi),%rdi
3028L$seal_sse_tail_128_rounds_and_x1hash:
3029	paddd	%xmm4,%xmm0
3030	pxor	%xmm0,%xmm12
3031	pshufb	L$rol16(%rip),%xmm12
3032	paddd	%xmm12,%xmm8
3033	pxor	%xmm8,%xmm4
3034	movdqa	%xmm4,%xmm3
3035	pslld	$12,%xmm3
3036	psrld	$20,%xmm4
3037	pxor	%xmm3,%xmm4
3038	paddd	%xmm4,%xmm0
3039	pxor	%xmm0,%xmm12
3040	pshufb	L$rol8(%rip),%xmm12
3041	paddd	%xmm12,%xmm8
3042	pxor	%xmm8,%xmm4
3043	movdqa	%xmm4,%xmm3
3044	pslld	$7,%xmm3
3045	psrld	$25,%xmm4
3046	pxor	%xmm3,%xmm4
3047.byte	102,15,58,15,228,4
3048.byte	102,69,15,58,15,192,8
3049.byte	102,69,15,58,15,228,12
3050	paddd	%xmm5,%xmm1
3051	pxor	%xmm1,%xmm13
3052	pshufb	L$rol16(%rip),%xmm13
3053	paddd	%xmm13,%xmm9
3054	pxor	%xmm9,%xmm5
3055	movdqa	%xmm5,%xmm3
3056	pslld	$12,%xmm3
3057	psrld	$20,%xmm5
3058	pxor	%xmm3,%xmm5
3059	paddd	%xmm5,%xmm1
3060	pxor	%xmm1,%xmm13
3061	pshufb	L$rol8(%rip),%xmm13
3062	paddd	%xmm13,%xmm9
3063	pxor	%xmm9,%xmm5
3064	movdqa	%xmm5,%xmm3
3065	pslld	$7,%xmm3
3066	psrld	$25,%xmm5
3067	pxor	%xmm3,%xmm5
3068.byte	102,15,58,15,237,4
3069.byte	102,69,15,58,15,201,8
3070.byte	102,69,15,58,15,237,12
3071	addq	0+0(%rdi),%r10
3072	adcq	8+0(%rdi),%r11
3073	adcq	$1,%r12
3074	movq	0+0+0(%rbp),%rax
3075	movq	%rax,%r15
3076	mulq	%r10
3077	movq	%rax,%r13
3078	movq	%rdx,%r14
3079	movq	0+0+0(%rbp),%rax
3080	mulq	%r11
3081	imulq	%r12,%r15
3082	addq	%rax,%r14
3083	adcq	%rdx,%r15
3084	movq	8+0+0(%rbp),%rax
3085	movq	%rax,%r9
3086	mulq	%r10
3087	addq	%rax,%r14
3088	adcq	$0,%rdx
3089	movq	%rdx,%r10
3090	movq	8+0+0(%rbp),%rax
3091	mulq	%r11
3092	addq	%rax,%r15
3093	adcq	$0,%rdx
3094	imulq	%r12,%r9
3095	addq	%r10,%r15
3096	adcq	%rdx,%r9
3097	movq	%r13,%r10
3098	movq	%r14,%r11
3099	movq	%r15,%r12
3100	andq	$3,%r12
3101	movq	%r15,%r13
3102	andq	$-4,%r13
3103	movq	%r9,%r14
3104	shrdq	$2,%r9,%r15
3105	shrq	$2,%r9
3106	addq	%r13,%r15
3107	adcq	%r14,%r9
3108	addq	%r15,%r10
3109	adcq	%r9,%r11
3110	adcq	$0,%r12
3111	paddd	%xmm4,%xmm0
3112	pxor	%xmm0,%xmm12
3113	pshufb	L$rol16(%rip),%xmm12
3114	paddd	%xmm12,%xmm8
3115	pxor	%xmm8,%xmm4
3116	movdqa	%xmm4,%xmm3
3117	pslld	$12,%xmm3
3118	psrld	$20,%xmm4
3119	pxor	%xmm3,%xmm4
3120	paddd	%xmm4,%xmm0
3121	pxor	%xmm0,%xmm12
3122	pshufb	L$rol8(%rip),%xmm12
3123	paddd	%xmm12,%xmm8
3124	pxor	%xmm8,%xmm4
3125	movdqa	%xmm4,%xmm3
3126	pslld	$7,%xmm3
3127	psrld	$25,%xmm4
3128	pxor	%xmm3,%xmm4
3129.byte	102,15,58,15,228,12
3130.byte	102,69,15,58,15,192,8
3131.byte	102,69,15,58,15,228,4
3132	paddd	%xmm5,%xmm1
3133	pxor	%xmm1,%xmm13
3134	pshufb	L$rol16(%rip),%xmm13
3135	paddd	%xmm13,%xmm9
3136	pxor	%xmm9,%xmm5
3137	movdqa	%xmm5,%xmm3
3138	pslld	$12,%xmm3
3139	psrld	$20,%xmm5
3140	pxor	%xmm3,%xmm5
3141	paddd	%xmm5,%xmm1
3142	pxor	%xmm1,%xmm13
3143	pshufb	L$rol8(%rip),%xmm13
3144	paddd	%xmm13,%xmm9
3145	pxor	%xmm9,%xmm5
3146	movdqa	%xmm5,%xmm3
3147	pslld	$7,%xmm3
3148	psrld	$25,%xmm5
3149	pxor	%xmm3,%xmm5
3150.byte	102,15,58,15,237,12
3151.byte	102,69,15,58,15,201,8
3152.byte	102,69,15,58,15,237,4
3153
3154	leaq	16(%rdi),%rdi
3155	decq	%rcx
3156	jg	L$seal_sse_tail_128_rounds_and_x2hash
3157	decq	%r8
3158	jge	L$seal_sse_tail_128_rounds_and_x1hash
3159	paddd	L$chacha20_consts(%rip),%xmm1
3160	paddd	0+48(%rbp),%xmm5
3161	paddd	0+64(%rbp),%xmm9
3162	paddd	0+112(%rbp),%xmm13
3163	paddd	L$chacha20_consts(%rip),%xmm0
3164	paddd	0+48(%rbp),%xmm4
3165	paddd	0+64(%rbp),%xmm8
3166	paddd	0+96(%rbp),%xmm12
3167	movdqu	0 + 0(%rsi),%xmm3
3168	movdqu	16 + 0(%rsi),%xmm7
3169	movdqu	32 + 0(%rsi),%xmm11
3170	movdqu	48 + 0(%rsi),%xmm15
3171	pxor	%xmm3,%xmm1
3172	pxor	%xmm7,%xmm5
3173	pxor	%xmm11,%xmm9
3174	pxor	%xmm13,%xmm15
3175	movdqu	%xmm1,0 + 0(%rdi)
3176	movdqu	%xmm5,16 + 0(%rdi)
3177	movdqu	%xmm9,32 + 0(%rdi)
3178	movdqu	%xmm15,48 + 0(%rdi)
3179
3180	movq	$64,%rcx
3181	subq	$64,%rbx
3182	leaq	64(%rsi),%rsi
3183	jmp	L$seal_sse_128_tail_hash
3184
3185L$seal_sse_tail_192:
3186	movdqa	L$chacha20_consts(%rip),%xmm0
3187	movdqa	0+48(%rbp),%xmm4
3188	movdqa	0+64(%rbp),%xmm8
3189	movdqa	%xmm0,%xmm1
3190	movdqa	%xmm4,%xmm5
3191	movdqa	%xmm8,%xmm9
3192	movdqa	%xmm0,%xmm2
3193	movdqa	%xmm4,%xmm6
3194	movdqa	%xmm8,%xmm10
3195	movdqa	0+96(%rbp),%xmm14
3196	paddd	L$sse_inc(%rip),%xmm14
3197	movdqa	%xmm14,%xmm13
3198	paddd	L$sse_inc(%rip),%xmm13
3199	movdqa	%xmm13,%xmm12
3200	paddd	L$sse_inc(%rip),%xmm12
3201	movdqa	%xmm12,0+96(%rbp)
3202	movdqa	%xmm13,0+112(%rbp)
3203	movdqa	%xmm14,0+128(%rbp)
3204
3205L$seal_sse_tail_192_rounds_and_x2hash:
3206	addq	0+0(%rdi),%r10
3207	adcq	8+0(%rdi),%r11
3208	adcq	$1,%r12
3209	movq	0+0+0(%rbp),%rax
3210	movq	%rax,%r15
3211	mulq	%r10
3212	movq	%rax,%r13
3213	movq	%rdx,%r14
3214	movq	0+0+0(%rbp),%rax
3215	mulq	%r11
3216	imulq	%r12,%r15
3217	addq	%rax,%r14
3218	adcq	%rdx,%r15
3219	movq	8+0+0(%rbp),%rax
3220	movq	%rax,%r9
3221	mulq	%r10
3222	addq	%rax,%r14
3223	adcq	$0,%rdx
3224	movq	%rdx,%r10
3225	movq	8+0+0(%rbp),%rax
3226	mulq	%r11
3227	addq	%rax,%r15
3228	adcq	$0,%rdx
3229	imulq	%r12,%r9
3230	addq	%r10,%r15
3231	adcq	%rdx,%r9
3232	movq	%r13,%r10
3233	movq	%r14,%r11
3234	movq	%r15,%r12
3235	andq	$3,%r12
3236	movq	%r15,%r13
3237	andq	$-4,%r13
3238	movq	%r9,%r14
3239	shrdq	$2,%r9,%r15
3240	shrq	$2,%r9
3241	addq	%r13,%r15
3242	adcq	%r14,%r9
3243	addq	%r15,%r10
3244	adcq	%r9,%r11
3245	adcq	$0,%r12
3246
3247	leaq	16(%rdi),%rdi
3248L$seal_sse_tail_192_rounds_and_x1hash:
3249	paddd	%xmm4,%xmm0
3250	pxor	%xmm0,%xmm12
3251	pshufb	L$rol16(%rip),%xmm12
3252	paddd	%xmm12,%xmm8
3253	pxor	%xmm8,%xmm4
3254	movdqa	%xmm4,%xmm3
3255	pslld	$12,%xmm3
3256	psrld	$20,%xmm4
3257	pxor	%xmm3,%xmm4
3258	paddd	%xmm4,%xmm0
3259	pxor	%xmm0,%xmm12
3260	pshufb	L$rol8(%rip),%xmm12
3261	paddd	%xmm12,%xmm8
3262	pxor	%xmm8,%xmm4
3263	movdqa	%xmm4,%xmm3
3264	pslld	$7,%xmm3
3265	psrld	$25,%xmm4
3266	pxor	%xmm3,%xmm4
3267.byte	102,15,58,15,228,4
3268.byte	102,69,15,58,15,192,8
3269.byte	102,69,15,58,15,228,12
3270	paddd	%xmm5,%xmm1
3271	pxor	%xmm1,%xmm13
3272	pshufb	L$rol16(%rip),%xmm13
3273	paddd	%xmm13,%xmm9
3274	pxor	%xmm9,%xmm5
3275	movdqa	%xmm5,%xmm3
3276	pslld	$12,%xmm3
3277	psrld	$20,%xmm5
3278	pxor	%xmm3,%xmm5
3279	paddd	%xmm5,%xmm1
3280	pxor	%xmm1,%xmm13
3281	pshufb	L$rol8(%rip),%xmm13
3282	paddd	%xmm13,%xmm9
3283	pxor	%xmm9,%xmm5
3284	movdqa	%xmm5,%xmm3
3285	pslld	$7,%xmm3
3286	psrld	$25,%xmm5
3287	pxor	%xmm3,%xmm5
3288.byte	102,15,58,15,237,4
3289.byte	102,69,15,58,15,201,8
3290.byte	102,69,15,58,15,237,12
3291	paddd	%xmm6,%xmm2
3292	pxor	%xmm2,%xmm14
3293	pshufb	L$rol16(%rip),%xmm14
3294	paddd	%xmm14,%xmm10
3295	pxor	%xmm10,%xmm6
3296	movdqa	%xmm6,%xmm3
3297	pslld	$12,%xmm3
3298	psrld	$20,%xmm6
3299	pxor	%xmm3,%xmm6
3300	paddd	%xmm6,%xmm2
3301	pxor	%xmm2,%xmm14
3302	pshufb	L$rol8(%rip),%xmm14
3303	paddd	%xmm14,%xmm10
3304	pxor	%xmm10,%xmm6
3305	movdqa	%xmm6,%xmm3
3306	pslld	$7,%xmm3
3307	psrld	$25,%xmm6
3308	pxor	%xmm3,%xmm6
3309.byte	102,15,58,15,246,4
3310.byte	102,69,15,58,15,210,8
3311.byte	102,69,15,58,15,246,12
3312	addq	0+0(%rdi),%r10
3313	adcq	8+0(%rdi),%r11
3314	adcq	$1,%r12
3315	movq	0+0+0(%rbp),%rax
3316	movq	%rax,%r15
3317	mulq	%r10
3318	movq	%rax,%r13
3319	movq	%rdx,%r14
3320	movq	0+0+0(%rbp),%rax
3321	mulq	%r11
3322	imulq	%r12,%r15
3323	addq	%rax,%r14
3324	adcq	%rdx,%r15
3325	movq	8+0+0(%rbp),%rax
3326	movq	%rax,%r9
3327	mulq	%r10
3328	addq	%rax,%r14
3329	adcq	$0,%rdx
3330	movq	%rdx,%r10
3331	movq	8+0+0(%rbp),%rax
3332	mulq	%r11
3333	addq	%rax,%r15
3334	adcq	$0,%rdx
3335	imulq	%r12,%r9
3336	addq	%r10,%r15
3337	adcq	%rdx,%r9
3338	movq	%r13,%r10
3339	movq	%r14,%r11
3340	movq	%r15,%r12
3341	andq	$3,%r12
3342	movq	%r15,%r13
3343	andq	$-4,%r13
3344	movq	%r9,%r14
3345	shrdq	$2,%r9,%r15
3346	shrq	$2,%r9
3347	addq	%r13,%r15
3348	adcq	%r14,%r9
3349	addq	%r15,%r10
3350	adcq	%r9,%r11
3351	adcq	$0,%r12
3352	paddd	%xmm4,%xmm0
3353	pxor	%xmm0,%xmm12
3354	pshufb	L$rol16(%rip),%xmm12
3355	paddd	%xmm12,%xmm8
3356	pxor	%xmm8,%xmm4
3357	movdqa	%xmm4,%xmm3
3358	pslld	$12,%xmm3
3359	psrld	$20,%xmm4
3360	pxor	%xmm3,%xmm4
3361	paddd	%xmm4,%xmm0
3362	pxor	%xmm0,%xmm12
3363	pshufb	L$rol8(%rip),%xmm12
3364	paddd	%xmm12,%xmm8
3365	pxor	%xmm8,%xmm4
3366	movdqa	%xmm4,%xmm3
3367	pslld	$7,%xmm3
3368	psrld	$25,%xmm4
3369	pxor	%xmm3,%xmm4
3370.byte	102,15,58,15,228,12
3371.byte	102,69,15,58,15,192,8
3372.byte	102,69,15,58,15,228,4
3373	paddd	%xmm5,%xmm1
3374	pxor	%xmm1,%xmm13
3375	pshufb	L$rol16(%rip),%xmm13
3376	paddd	%xmm13,%xmm9
3377	pxor	%xmm9,%xmm5
3378	movdqa	%xmm5,%xmm3
3379	pslld	$12,%xmm3
3380	psrld	$20,%xmm5
3381	pxor	%xmm3,%xmm5
3382	paddd	%xmm5,%xmm1
3383	pxor	%xmm1,%xmm13
3384	pshufb	L$rol8(%rip),%xmm13
3385	paddd	%xmm13,%xmm9
3386	pxor	%xmm9,%xmm5
3387	movdqa	%xmm5,%xmm3
3388	pslld	$7,%xmm3
3389	psrld	$25,%xmm5
3390	pxor	%xmm3,%xmm5
3391.byte	102,15,58,15,237,12
3392.byte	102,69,15,58,15,201,8
3393.byte	102,69,15,58,15,237,4
3394	paddd	%xmm6,%xmm2
3395	pxor	%xmm2,%xmm14
3396	pshufb	L$rol16(%rip),%xmm14
3397	paddd	%xmm14,%xmm10
3398	pxor	%xmm10,%xmm6
3399	movdqa	%xmm6,%xmm3
3400	pslld	$12,%xmm3
3401	psrld	$20,%xmm6
3402	pxor	%xmm3,%xmm6
3403	paddd	%xmm6,%xmm2
3404	pxor	%xmm2,%xmm14
3405	pshufb	L$rol8(%rip),%xmm14
3406	paddd	%xmm14,%xmm10
3407	pxor	%xmm10,%xmm6
3408	movdqa	%xmm6,%xmm3
3409	pslld	$7,%xmm3
3410	psrld	$25,%xmm6
3411	pxor	%xmm3,%xmm6
3412.byte	102,15,58,15,246,12
3413.byte	102,69,15,58,15,210,8
3414.byte	102,69,15,58,15,246,4
3415
3416	leaq	16(%rdi),%rdi
3417	decq	%rcx
3418	jg	L$seal_sse_tail_192_rounds_and_x2hash
3419	decq	%r8
3420	jge	L$seal_sse_tail_192_rounds_and_x1hash
3421	paddd	L$chacha20_consts(%rip),%xmm2
3422	paddd	0+48(%rbp),%xmm6
3423	paddd	0+64(%rbp),%xmm10
3424	paddd	0+128(%rbp),%xmm14
3425	paddd	L$chacha20_consts(%rip),%xmm1
3426	paddd	0+48(%rbp),%xmm5
3427	paddd	0+64(%rbp),%xmm9
3428	paddd	0+112(%rbp),%xmm13
3429	paddd	L$chacha20_consts(%rip),%xmm0
3430	paddd	0+48(%rbp),%xmm4
3431	paddd	0+64(%rbp),%xmm8
3432	paddd	0+96(%rbp),%xmm12
3433	movdqu	0 + 0(%rsi),%xmm3
3434	movdqu	16 + 0(%rsi),%xmm7
3435	movdqu	32 + 0(%rsi),%xmm11
3436	movdqu	48 + 0(%rsi),%xmm15
3437	pxor	%xmm3,%xmm2
3438	pxor	%xmm7,%xmm6
3439	pxor	%xmm11,%xmm10
3440	pxor	%xmm14,%xmm15
3441	movdqu	%xmm2,0 + 0(%rdi)
3442	movdqu	%xmm6,16 + 0(%rdi)
3443	movdqu	%xmm10,32 + 0(%rdi)
3444	movdqu	%xmm15,48 + 0(%rdi)
3445	movdqu	0 + 64(%rsi),%xmm3
3446	movdqu	16 + 64(%rsi),%xmm7
3447	movdqu	32 + 64(%rsi),%xmm11
3448	movdqu	48 + 64(%rsi),%xmm15
3449	pxor	%xmm3,%xmm1
3450	pxor	%xmm7,%xmm5
3451	pxor	%xmm11,%xmm9
3452	pxor	%xmm13,%xmm15
3453	movdqu	%xmm1,0 + 64(%rdi)
3454	movdqu	%xmm5,16 + 64(%rdi)
3455	movdqu	%xmm9,32 + 64(%rdi)
3456	movdqu	%xmm15,48 + 64(%rdi)
3457
3458	movq	$128,%rcx
3459	subq	$128,%rbx
3460	leaq	128(%rsi),%rsi
3461
3462L$seal_sse_128_tail_hash:
3463	cmpq	$16,%rcx
3464	jb	L$seal_sse_128_tail_xor
3465	addq	0+0(%rdi),%r10
3466	adcq	8+0(%rdi),%r11
3467	adcq	$1,%r12
3468	movq	0+0+0(%rbp),%rax
3469	movq	%rax,%r15
3470	mulq	%r10
3471	movq	%rax,%r13
3472	movq	%rdx,%r14
3473	movq	0+0+0(%rbp),%rax
3474	mulq	%r11
3475	imulq	%r12,%r15
3476	addq	%rax,%r14
3477	adcq	%rdx,%r15
3478	movq	8+0+0(%rbp),%rax
3479	movq	%rax,%r9
3480	mulq	%r10
3481	addq	%rax,%r14
3482	adcq	$0,%rdx
3483	movq	%rdx,%r10
3484	movq	8+0+0(%rbp),%rax
3485	mulq	%r11
3486	addq	%rax,%r15
3487	adcq	$0,%rdx
3488	imulq	%r12,%r9
3489	addq	%r10,%r15
3490	adcq	%rdx,%r9
3491	movq	%r13,%r10
3492	movq	%r14,%r11
3493	movq	%r15,%r12
3494	andq	$3,%r12
3495	movq	%r15,%r13
3496	andq	$-4,%r13
3497	movq	%r9,%r14
3498	shrdq	$2,%r9,%r15
3499	shrq	$2,%r9
3500	addq	%r13,%r15
3501	adcq	%r14,%r9
3502	addq	%r15,%r10
3503	adcq	%r9,%r11
3504	adcq	$0,%r12
3505
3506	subq	$16,%rcx
3507	leaq	16(%rdi),%rdi
3508	jmp	L$seal_sse_128_tail_hash
3509
3510L$seal_sse_128_tail_xor:
3511	cmpq	$16,%rbx
3512	jb	L$seal_sse_tail_16
3513	subq	$16,%rbx
3514
3515	movdqu	0(%rsi),%xmm3
3516	pxor	%xmm3,%xmm0
3517	movdqu	%xmm0,0(%rdi)
3518
3519	addq	0(%rdi),%r10
3520	adcq	8(%rdi),%r11
3521	adcq	$1,%r12
3522	leaq	16(%rsi),%rsi
3523	leaq	16(%rdi),%rdi
3524	movq	0+0+0(%rbp),%rax
3525	movq	%rax,%r15
3526	mulq	%r10
3527	movq	%rax,%r13
3528	movq	%rdx,%r14
3529	movq	0+0+0(%rbp),%rax
3530	mulq	%r11
3531	imulq	%r12,%r15
3532	addq	%rax,%r14
3533	adcq	%rdx,%r15
3534	movq	8+0+0(%rbp),%rax
3535	movq	%rax,%r9
3536	mulq	%r10
3537	addq	%rax,%r14
3538	adcq	$0,%rdx
3539	movq	%rdx,%r10
3540	movq	8+0+0(%rbp),%rax
3541	mulq	%r11
3542	addq	%rax,%r15
3543	adcq	$0,%rdx
3544	imulq	%r12,%r9
3545	addq	%r10,%r15
3546	adcq	%rdx,%r9
3547	movq	%r13,%r10
3548	movq	%r14,%r11
3549	movq	%r15,%r12
3550	andq	$3,%r12
3551	movq	%r15,%r13
3552	andq	$-4,%r13
3553	movq	%r9,%r14
3554	shrdq	$2,%r9,%r15
3555	shrq	$2,%r9
3556	addq	%r13,%r15
3557	adcq	%r14,%r9
3558	addq	%r15,%r10
3559	adcq	%r9,%r11
3560	adcq	$0,%r12
3561
3562
3563	movdqa	%xmm4,%xmm0
3564	movdqa	%xmm8,%xmm4
3565	movdqa	%xmm12,%xmm8
3566	movdqa	%xmm1,%xmm12
3567	movdqa	%xmm5,%xmm1
3568	movdqa	%xmm9,%xmm5
3569	movdqa	%xmm13,%xmm9
3570	jmp	L$seal_sse_128_tail_xor
3571
3572L$seal_sse_tail_16:
3573	testq	%rbx,%rbx
3574	jz	L$process_blocks_of_extra_in
3575
3576	movq	%rbx,%r8
3577	movq	%rbx,%rcx
3578	leaq	-1(%rsi,%rbx,1),%rsi
3579	pxor	%xmm15,%xmm15
3580L$seal_sse_tail_16_compose:
3581	pslldq	$1,%xmm15
3582	pinsrb	$0,(%rsi),%xmm15
3583	leaq	-1(%rsi),%rsi
3584	decq	%rcx
3585	jne	L$seal_sse_tail_16_compose
3586
3587
3588	pxor	%xmm0,%xmm15
3589
3590
3591	movq	%rbx,%rcx
3592	movdqu	%xmm15,%xmm0
3593L$seal_sse_tail_16_extract:
3594	pextrb	$0,%xmm0,(%rdi)
3595	psrldq	$1,%xmm0
3596	addq	$1,%rdi
3597	subq	$1,%rcx
3598	jnz	L$seal_sse_tail_16_extract
3599
3600
3601
3602
3603
3604
3605
3606
3607	movq	288 + 0 + 32(%rsp),%r9
3608	movq	56(%r9),%r14
3609	movq	48(%r9),%r13
3610	testq	%r14,%r14
3611	jz	L$process_partial_block
3612
3613	movq	$16,%r15
3614	subq	%rbx,%r15
3615	cmpq	%r15,%r14
3616
3617	jge	L$load_extra_in
3618	movq	%r14,%r15
3619
3620L$load_extra_in:
3621
3622
3623	leaq	-1(%r13,%r15,1),%rsi
3624
3625
3626	addq	%r15,%r13
3627	subq	%r15,%r14
3628	movq	%r13,48(%r9)
3629	movq	%r14,56(%r9)
3630
3631
3632
3633	addq	%r15,%r8
3634
3635
3636	pxor	%xmm11,%xmm11
3637L$load_extra_load_loop:
3638	pslldq	$1,%xmm11
3639	pinsrb	$0,(%rsi),%xmm11
3640	leaq	-1(%rsi),%rsi
3641	subq	$1,%r15
3642	jnz	L$load_extra_load_loop
3643
3644
3645
3646
3647	movq	%rbx,%r15
3648
3649L$load_extra_shift_loop:
3650	pslldq	$1,%xmm11
3651	subq	$1,%r15
3652	jnz	L$load_extra_shift_loop
3653
3654
3655
3656
3657	leaq	L$and_masks(%rip),%r15
3658	shlq	$4,%rbx
3659	pand	-16(%r15,%rbx,1),%xmm15
3660
3661
3662	por	%xmm11,%xmm15
3663
3664
3665
3666.byte	102,77,15,126,253
3667	pextrq	$1,%xmm15,%r14
3668	addq	%r13,%r10
3669	adcq	%r14,%r11
3670	adcq	$1,%r12
3671	movq	0+0+0(%rbp),%rax
3672	movq	%rax,%r15
3673	mulq	%r10
3674	movq	%rax,%r13
3675	movq	%rdx,%r14
3676	movq	0+0+0(%rbp),%rax
3677	mulq	%r11
3678	imulq	%r12,%r15
3679	addq	%rax,%r14
3680	adcq	%rdx,%r15
3681	movq	8+0+0(%rbp),%rax
3682	movq	%rax,%r9
3683	mulq	%r10
3684	addq	%rax,%r14
3685	adcq	$0,%rdx
3686	movq	%rdx,%r10
3687	movq	8+0+0(%rbp),%rax
3688	mulq	%r11
3689	addq	%rax,%r15
3690	adcq	$0,%rdx
3691	imulq	%r12,%r9
3692	addq	%r10,%r15
3693	adcq	%rdx,%r9
3694	movq	%r13,%r10
3695	movq	%r14,%r11
3696	movq	%r15,%r12
3697	andq	$3,%r12
3698	movq	%r15,%r13
3699	andq	$-4,%r13
3700	movq	%r9,%r14
3701	shrdq	$2,%r9,%r15
3702	shrq	$2,%r9
3703	addq	%r13,%r15
3704	adcq	%r14,%r9
3705	addq	%r15,%r10
3706	adcq	%r9,%r11
3707	adcq	$0,%r12
3708
3709
3710L$process_blocks_of_extra_in:
3711
3712	movq	288+32+0 (%rsp),%r9
3713	movq	48(%r9),%rsi
3714	movq	56(%r9),%r8
3715	movq	%r8,%rcx
3716	shrq	$4,%r8
3717
3718L$process_extra_hash_loop:
3719	jz	process_extra_in_trailer
3720	addq	0+0(%rsi),%r10
3721	adcq	8+0(%rsi),%r11
3722	adcq	$1,%r12
3723	movq	0+0+0(%rbp),%rax
3724	movq	%rax,%r15
3725	mulq	%r10
3726	movq	%rax,%r13
3727	movq	%rdx,%r14
3728	movq	0+0+0(%rbp),%rax
3729	mulq	%r11
3730	imulq	%r12,%r15
3731	addq	%rax,%r14
3732	adcq	%rdx,%r15
3733	movq	8+0+0(%rbp),%rax
3734	movq	%rax,%r9
3735	mulq	%r10
3736	addq	%rax,%r14
3737	adcq	$0,%rdx
3738	movq	%rdx,%r10
3739	movq	8+0+0(%rbp),%rax
3740	mulq	%r11
3741	addq	%rax,%r15
3742	adcq	$0,%rdx
3743	imulq	%r12,%r9
3744	addq	%r10,%r15
3745	adcq	%rdx,%r9
3746	movq	%r13,%r10
3747	movq	%r14,%r11
3748	movq	%r15,%r12
3749	andq	$3,%r12
3750	movq	%r15,%r13
3751	andq	$-4,%r13
3752	movq	%r9,%r14
3753	shrdq	$2,%r9,%r15
3754	shrq	$2,%r9
3755	addq	%r13,%r15
3756	adcq	%r14,%r9
3757	addq	%r15,%r10
3758	adcq	%r9,%r11
3759	adcq	$0,%r12
3760
3761	leaq	16(%rsi),%rsi
3762	subq	$1,%r8
3763	jmp	L$process_extra_hash_loop
3764process_extra_in_trailer:
3765	andq	$15,%rcx
3766	movq	%rcx,%rbx
3767	jz	L$do_length_block
3768	leaq	-1(%rsi,%rcx,1),%rsi
3769
3770L$process_extra_in_trailer_load:
3771	pslldq	$1,%xmm15
3772	pinsrb	$0,(%rsi),%xmm15
3773	leaq	-1(%rsi),%rsi
3774	subq	$1,%rcx
3775	jnz	L$process_extra_in_trailer_load
3776
3777L$process_partial_block:
3778
3779	leaq	L$and_masks(%rip),%r15
3780	shlq	$4,%rbx
3781	pand	-16(%r15,%rbx,1),%xmm15
3782.byte	102,77,15,126,253
3783	pextrq	$1,%xmm15,%r14
3784	addq	%r13,%r10
3785	adcq	%r14,%r11
3786	adcq	$1,%r12
3787	movq	0+0+0(%rbp),%rax
3788	movq	%rax,%r15
3789	mulq	%r10
3790	movq	%rax,%r13
3791	movq	%rdx,%r14
3792	movq	0+0+0(%rbp),%rax
3793	mulq	%r11
3794	imulq	%r12,%r15
3795	addq	%rax,%r14
3796	adcq	%rdx,%r15
3797	movq	8+0+0(%rbp),%rax
3798	movq	%rax,%r9
3799	mulq	%r10
3800	addq	%rax,%r14
3801	adcq	$0,%rdx
3802	movq	%rdx,%r10
3803	movq	8+0+0(%rbp),%rax
3804	mulq	%r11
3805	addq	%rax,%r15
3806	adcq	$0,%rdx
3807	imulq	%r12,%r9
3808	addq	%r10,%r15
3809	adcq	%rdx,%r9
3810	movq	%r13,%r10
3811	movq	%r14,%r11
3812	movq	%r15,%r12
3813	andq	$3,%r12
3814	movq	%r15,%r13
3815	andq	$-4,%r13
3816	movq	%r9,%r14
3817	shrdq	$2,%r9,%r15
3818	shrq	$2,%r9
3819	addq	%r13,%r15
3820	adcq	%r14,%r9
3821	addq	%r15,%r10
3822	adcq	%r9,%r11
3823	adcq	$0,%r12
3824
3825
3826L$do_length_block:
3827	addq	0+0+32(%rbp),%r10
3828	adcq	8+0+32(%rbp),%r11
3829	adcq	$1,%r12
3830	movq	0+0+0(%rbp),%rax
3831	movq	%rax,%r15
3832	mulq	%r10
3833	movq	%rax,%r13
3834	movq	%rdx,%r14
3835	movq	0+0+0(%rbp),%rax
3836	mulq	%r11
3837	imulq	%r12,%r15
3838	addq	%rax,%r14
3839	adcq	%rdx,%r15
3840	movq	8+0+0(%rbp),%rax
3841	movq	%rax,%r9
3842	mulq	%r10
3843	addq	%rax,%r14
3844	adcq	$0,%rdx
3845	movq	%rdx,%r10
3846	movq	8+0+0(%rbp),%rax
3847	mulq	%r11
3848	addq	%rax,%r15
3849	adcq	$0,%rdx
3850	imulq	%r12,%r9
3851	addq	%r10,%r15
3852	adcq	%rdx,%r9
3853	movq	%r13,%r10
3854	movq	%r14,%r11
3855	movq	%r15,%r12
3856	andq	$3,%r12
3857	movq	%r15,%r13
3858	andq	$-4,%r13
3859	movq	%r9,%r14
3860	shrdq	$2,%r9,%r15
3861	shrq	$2,%r9
3862	addq	%r13,%r15
3863	adcq	%r14,%r9
3864	addq	%r15,%r10
3865	adcq	%r9,%r11
3866	adcq	$0,%r12
3867
3868
3869	movq	%r10,%r13
3870	movq	%r11,%r14
3871	movq	%r12,%r15
3872	subq	$-5,%r10
3873	sbbq	$-1,%r11
3874	sbbq	$3,%r12
3875	cmovcq	%r13,%r10
3876	cmovcq	%r14,%r11
3877	cmovcq	%r15,%r12
3878
3879	addq	0+0+16(%rbp),%r10
3880	adcq	8+0+16(%rbp),%r11
3881
3882
3883	addq	$288 + 0 + 32,%rsp
3884
3885
3886	popq	%r9
3887
3888	movq	%r10,(%r9)
3889	movq	%r11,8(%r9)
3890	popq	%r15
3891
3892	popq	%r14
3893
3894	popq	%r13
3895
3896	popq	%r12
3897
3898	popq	%rbx
3899
3900	popq	%rbp
3901
3902	ret
3903
3904L$seal_sse_128:
3905
3906	movdqu	L$chacha20_consts(%rip),%xmm0
3907	movdqa	%xmm0,%xmm1
3908	movdqa	%xmm0,%xmm2
3909	movdqu	0(%r9),%xmm4
3910	movdqa	%xmm4,%xmm5
3911	movdqa	%xmm4,%xmm6
3912	movdqu	16(%r9),%xmm8
3913	movdqa	%xmm8,%xmm9
3914	movdqa	%xmm8,%xmm10
3915	movdqu	32(%r9),%xmm14
3916	movdqa	%xmm14,%xmm12
3917	paddd	L$sse_inc(%rip),%xmm12
3918	movdqa	%xmm12,%xmm13
3919	paddd	L$sse_inc(%rip),%xmm13
3920	movdqa	%xmm4,%xmm7
3921	movdqa	%xmm8,%xmm11
3922	movdqa	%xmm12,%xmm15
3923	movq	$10,%r10
3924
3925L$seal_sse_128_rounds:
3926	paddd	%xmm4,%xmm0
3927	pxor	%xmm0,%xmm12
3928	pshufb	L$rol16(%rip),%xmm12
3929	paddd	%xmm12,%xmm8
3930	pxor	%xmm8,%xmm4
3931	movdqa	%xmm4,%xmm3
3932	pslld	$12,%xmm3
3933	psrld	$20,%xmm4
3934	pxor	%xmm3,%xmm4
3935	paddd	%xmm4,%xmm0
3936	pxor	%xmm0,%xmm12
3937	pshufb	L$rol8(%rip),%xmm12
3938	paddd	%xmm12,%xmm8
3939	pxor	%xmm8,%xmm4
3940	movdqa	%xmm4,%xmm3
3941	pslld	$7,%xmm3
3942	psrld	$25,%xmm4
3943	pxor	%xmm3,%xmm4
3944.byte	102,15,58,15,228,4
3945.byte	102,69,15,58,15,192,8
3946.byte	102,69,15,58,15,228,12
3947	paddd	%xmm5,%xmm1
3948	pxor	%xmm1,%xmm13
3949	pshufb	L$rol16(%rip),%xmm13
3950	paddd	%xmm13,%xmm9
3951	pxor	%xmm9,%xmm5
3952	movdqa	%xmm5,%xmm3
3953	pslld	$12,%xmm3
3954	psrld	$20,%xmm5
3955	pxor	%xmm3,%xmm5
3956	paddd	%xmm5,%xmm1
3957	pxor	%xmm1,%xmm13
3958	pshufb	L$rol8(%rip),%xmm13
3959	paddd	%xmm13,%xmm9
3960	pxor	%xmm9,%xmm5
3961	movdqa	%xmm5,%xmm3
3962	pslld	$7,%xmm3
3963	psrld	$25,%xmm5
3964	pxor	%xmm3,%xmm5
3965.byte	102,15,58,15,237,4
3966.byte	102,69,15,58,15,201,8
3967.byte	102,69,15,58,15,237,12
3968	paddd	%xmm6,%xmm2
3969	pxor	%xmm2,%xmm14
3970	pshufb	L$rol16(%rip),%xmm14
3971	paddd	%xmm14,%xmm10
3972	pxor	%xmm10,%xmm6
3973	movdqa	%xmm6,%xmm3
3974	pslld	$12,%xmm3
3975	psrld	$20,%xmm6
3976	pxor	%xmm3,%xmm6
3977	paddd	%xmm6,%xmm2
3978	pxor	%xmm2,%xmm14
3979	pshufb	L$rol8(%rip),%xmm14
3980	paddd	%xmm14,%xmm10
3981	pxor	%xmm10,%xmm6
3982	movdqa	%xmm6,%xmm3
3983	pslld	$7,%xmm3
3984	psrld	$25,%xmm6
3985	pxor	%xmm3,%xmm6
3986.byte	102,15,58,15,246,4
3987.byte	102,69,15,58,15,210,8
3988.byte	102,69,15,58,15,246,12
3989	paddd	%xmm4,%xmm0
3990	pxor	%xmm0,%xmm12
3991	pshufb	L$rol16(%rip),%xmm12
3992	paddd	%xmm12,%xmm8
3993	pxor	%xmm8,%xmm4
3994	movdqa	%xmm4,%xmm3
3995	pslld	$12,%xmm3
3996	psrld	$20,%xmm4
3997	pxor	%xmm3,%xmm4
3998	paddd	%xmm4,%xmm0
3999	pxor	%xmm0,%xmm12
4000	pshufb	L$rol8(%rip),%xmm12
4001	paddd	%xmm12,%xmm8
4002	pxor	%xmm8,%xmm4
4003	movdqa	%xmm4,%xmm3
4004	pslld	$7,%xmm3
4005	psrld	$25,%xmm4
4006	pxor	%xmm3,%xmm4
4007.byte	102,15,58,15,228,12
4008.byte	102,69,15,58,15,192,8
4009.byte	102,69,15,58,15,228,4
4010	paddd	%xmm5,%xmm1
4011	pxor	%xmm1,%xmm13
4012	pshufb	L$rol16(%rip),%xmm13
4013	paddd	%xmm13,%xmm9
4014	pxor	%xmm9,%xmm5
4015	movdqa	%xmm5,%xmm3
4016	pslld	$12,%xmm3
4017	psrld	$20,%xmm5
4018	pxor	%xmm3,%xmm5
4019	paddd	%xmm5,%xmm1
4020	pxor	%xmm1,%xmm13
4021	pshufb	L$rol8(%rip),%xmm13
4022	paddd	%xmm13,%xmm9
4023	pxor	%xmm9,%xmm5
4024	movdqa	%xmm5,%xmm3
4025	pslld	$7,%xmm3
4026	psrld	$25,%xmm5
4027	pxor	%xmm3,%xmm5
4028.byte	102,15,58,15,237,12
4029.byte	102,69,15,58,15,201,8
4030.byte	102,69,15,58,15,237,4
4031	paddd	%xmm6,%xmm2
4032	pxor	%xmm2,%xmm14
4033	pshufb	L$rol16(%rip),%xmm14
4034	paddd	%xmm14,%xmm10
4035	pxor	%xmm10,%xmm6
4036	movdqa	%xmm6,%xmm3
4037	pslld	$12,%xmm3
4038	psrld	$20,%xmm6
4039	pxor	%xmm3,%xmm6
4040	paddd	%xmm6,%xmm2
4041	pxor	%xmm2,%xmm14
4042	pshufb	L$rol8(%rip),%xmm14
4043	paddd	%xmm14,%xmm10
4044	pxor	%xmm10,%xmm6
4045	movdqa	%xmm6,%xmm3
4046	pslld	$7,%xmm3
4047	psrld	$25,%xmm6
4048	pxor	%xmm3,%xmm6
4049.byte	102,15,58,15,246,12
4050.byte	102,69,15,58,15,210,8
4051.byte	102,69,15,58,15,246,4
4052
4053	decq	%r10
4054	jnz	L$seal_sse_128_rounds
4055	paddd	L$chacha20_consts(%rip),%xmm0
4056	paddd	L$chacha20_consts(%rip),%xmm1
4057	paddd	L$chacha20_consts(%rip),%xmm2
4058	paddd	%xmm7,%xmm4
4059	paddd	%xmm7,%xmm5
4060	paddd	%xmm7,%xmm6
4061	paddd	%xmm11,%xmm8
4062	paddd	%xmm11,%xmm9
4063	paddd	%xmm15,%xmm12
4064	paddd	L$sse_inc(%rip),%xmm15
4065	paddd	%xmm15,%xmm13
4066
4067	pand	L$clamp(%rip),%xmm2
4068	movdqa	%xmm2,0+0(%rbp)
4069	movdqa	%xmm6,0+16(%rbp)
4070
4071	movq	%r8,%r8
4072	call	poly_hash_ad_internal
4073	jmp	L$seal_sse_128_tail_xor
4074
4075
4076
4077
4078
4079.p2align	6
4080chacha20_poly1305_open_avx2:
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093	vzeroupper
4094	vmovdqa	L$chacha20_consts(%rip),%ymm0
4095	vbroadcasti128	0(%r9),%ymm4
4096	vbroadcasti128	16(%r9),%ymm8
4097	vbroadcasti128	32(%r9),%ymm12
4098	vpaddd	L$avx2_init(%rip),%ymm12,%ymm12
4099	cmpq	$192,%rbx
4100	jbe	L$open_avx2_192
4101	cmpq	$320,%rbx
4102	jbe	L$open_avx2_320
4103
4104	vmovdqa	%ymm4,0+64(%rbp)
4105	vmovdqa	%ymm8,0+96(%rbp)
4106	vmovdqa	%ymm12,0+160(%rbp)
4107	movq	$10,%r10
4108L$open_avx2_init_rounds:
4109	vpaddd	%ymm4,%ymm0,%ymm0
4110	vpxor	%ymm0,%ymm12,%ymm12
4111	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4112	vpaddd	%ymm12,%ymm8,%ymm8
4113	vpxor	%ymm8,%ymm4,%ymm4
4114	vpsrld	$20,%ymm4,%ymm3
4115	vpslld	$12,%ymm4,%ymm4
4116	vpxor	%ymm3,%ymm4,%ymm4
4117	vpaddd	%ymm4,%ymm0,%ymm0
4118	vpxor	%ymm0,%ymm12,%ymm12
4119	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4120	vpaddd	%ymm12,%ymm8,%ymm8
4121	vpxor	%ymm8,%ymm4,%ymm4
4122	vpslld	$7,%ymm4,%ymm3
4123	vpsrld	$25,%ymm4,%ymm4
4124	vpxor	%ymm3,%ymm4,%ymm4
4125	vpalignr	$12,%ymm12,%ymm12,%ymm12
4126	vpalignr	$8,%ymm8,%ymm8,%ymm8
4127	vpalignr	$4,%ymm4,%ymm4,%ymm4
4128	vpaddd	%ymm4,%ymm0,%ymm0
4129	vpxor	%ymm0,%ymm12,%ymm12
4130	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4131	vpaddd	%ymm12,%ymm8,%ymm8
4132	vpxor	%ymm8,%ymm4,%ymm4
4133	vpsrld	$20,%ymm4,%ymm3
4134	vpslld	$12,%ymm4,%ymm4
4135	vpxor	%ymm3,%ymm4,%ymm4
4136	vpaddd	%ymm4,%ymm0,%ymm0
4137	vpxor	%ymm0,%ymm12,%ymm12
4138	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4139	vpaddd	%ymm12,%ymm8,%ymm8
4140	vpxor	%ymm8,%ymm4,%ymm4
4141	vpslld	$7,%ymm4,%ymm3
4142	vpsrld	$25,%ymm4,%ymm4
4143	vpxor	%ymm3,%ymm4,%ymm4
4144	vpalignr	$4,%ymm12,%ymm12,%ymm12
4145	vpalignr	$8,%ymm8,%ymm8,%ymm8
4146	vpalignr	$12,%ymm4,%ymm4,%ymm4
4147
4148	decq	%r10
4149	jne	L$open_avx2_init_rounds
4150	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
4151	vpaddd	0+64(%rbp),%ymm4,%ymm4
4152	vpaddd	0+96(%rbp),%ymm8,%ymm8
4153	vpaddd	0+160(%rbp),%ymm12,%ymm12
4154
4155	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4156
4157	vpand	L$clamp(%rip),%ymm3,%ymm3
4158	vmovdqa	%ymm3,0+0(%rbp)
4159
4160	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
4161	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
4162
4163	movq	%r8,%r8
4164	call	poly_hash_ad_internal
4165
4166	xorq	%rcx,%rcx
4167L$open_avx2_init_hash:
4168	addq	0+0(%rsi,%rcx,1),%r10
4169	adcq	8+0(%rsi,%rcx,1),%r11
4170	adcq	$1,%r12
4171	movq	0+0+0(%rbp),%rax
4172	movq	%rax,%r15
4173	mulq	%r10
4174	movq	%rax,%r13
4175	movq	%rdx,%r14
4176	movq	0+0+0(%rbp),%rax
4177	mulq	%r11
4178	imulq	%r12,%r15
4179	addq	%rax,%r14
4180	adcq	%rdx,%r15
4181	movq	8+0+0(%rbp),%rax
4182	movq	%rax,%r9
4183	mulq	%r10
4184	addq	%rax,%r14
4185	adcq	$0,%rdx
4186	movq	%rdx,%r10
4187	movq	8+0+0(%rbp),%rax
4188	mulq	%r11
4189	addq	%rax,%r15
4190	adcq	$0,%rdx
4191	imulq	%r12,%r9
4192	addq	%r10,%r15
4193	adcq	%rdx,%r9
4194	movq	%r13,%r10
4195	movq	%r14,%r11
4196	movq	%r15,%r12
4197	andq	$3,%r12
4198	movq	%r15,%r13
4199	andq	$-4,%r13
4200	movq	%r9,%r14
4201	shrdq	$2,%r9,%r15
4202	shrq	$2,%r9
4203	addq	%r13,%r15
4204	adcq	%r14,%r9
4205	addq	%r15,%r10
4206	adcq	%r9,%r11
4207	adcq	$0,%r12
4208
4209	addq	$16,%rcx
4210	cmpq	$64,%rcx
4211	jne	L$open_avx2_init_hash
4212
4213	vpxor	0(%rsi),%ymm0,%ymm0
4214	vpxor	32(%rsi),%ymm4,%ymm4
4215
4216	vmovdqu	%ymm0,0(%rdi)
4217	vmovdqu	%ymm4,32(%rdi)
4218	leaq	64(%rsi),%rsi
4219	leaq	64(%rdi),%rdi
4220	subq	$64,%rbx
4221L$open_avx2_main_loop:
4222
4223	cmpq	$512,%rbx
4224	jb	L$open_avx2_main_loop_done
4225	vmovdqa	L$chacha20_consts(%rip),%ymm0
4226	vmovdqa	0+64(%rbp),%ymm4
4227	vmovdqa	0+96(%rbp),%ymm8
4228	vmovdqa	%ymm0,%ymm1
4229	vmovdqa	%ymm4,%ymm5
4230	vmovdqa	%ymm8,%ymm9
4231	vmovdqa	%ymm0,%ymm2
4232	vmovdqa	%ymm4,%ymm6
4233	vmovdqa	%ymm8,%ymm10
4234	vmovdqa	%ymm0,%ymm3
4235	vmovdqa	%ymm4,%ymm7
4236	vmovdqa	%ymm8,%ymm11
4237	vmovdqa	L$avx2_inc(%rip),%ymm12
4238	vpaddd	0+160(%rbp),%ymm12,%ymm15
4239	vpaddd	%ymm15,%ymm12,%ymm14
4240	vpaddd	%ymm14,%ymm12,%ymm13
4241	vpaddd	%ymm13,%ymm12,%ymm12
4242	vmovdqa	%ymm15,0+256(%rbp)
4243	vmovdqa	%ymm14,0+224(%rbp)
4244	vmovdqa	%ymm13,0+192(%rbp)
4245	vmovdqa	%ymm12,0+160(%rbp)
4246
4247	xorq	%rcx,%rcx
4248L$open_avx2_main_loop_rounds:
4249	addq	0+0(%rsi,%rcx,1),%r10
4250	adcq	8+0(%rsi,%rcx,1),%r11
4251	adcq	$1,%r12
4252	vmovdqa	%ymm8,0+128(%rbp)
4253	vmovdqa	L$rol16(%rip),%ymm8
4254	vpaddd	%ymm7,%ymm3,%ymm3
4255	vpaddd	%ymm6,%ymm2,%ymm2
4256	vpaddd	%ymm5,%ymm1,%ymm1
4257	vpaddd	%ymm4,%ymm0,%ymm0
4258	vpxor	%ymm3,%ymm15,%ymm15
4259	vpxor	%ymm2,%ymm14,%ymm14
4260	vpxor	%ymm1,%ymm13,%ymm13
4261	vpxor	%ymm0,%ymm12,%ymm12
4262	movq	0+0+0(%rbp),%rdx
4263	movq	%rdx,%r15
4264	mulxq	%r10,%r13,%r14
4265	mulxq	%r11,%rax,%rdx
4266	imulq	%r12,%r15
4267	addq	%rax,%r14
4268	adcq	%rdx,%r15
4269	vpshufb	%ymm8,%ymm15,%ymm15
4270	vpshufb	%ymm8,%ymm14,%ymm14
4271	vpshufb	%ymm8,%ymm13,%ymm13
4272	vpshufb	%ymm8,%ymm12,%ymm12
4273	vpaddd	%ymm15,%ymm11,%ymm11
4274	vpaddd	%ymm14,%ymm10,%ymm10
4275	vpaddd	%ymm13,%ymm9,%ymm9
4276	vpaddd	0+128(%rbp),%ymm12,%ymm8
4277	vpxor	%ymm11,%ymm7,%ymm7
4278	movq	8+0+0(%rbp),%rdx
4279	mulxq	%r10,%r10,%rax
4280	addq	%r10,%r14
4281	mulxq	%r11,%r11,%r9
4282	adcq	%r11,%r15
4283	adcq	$0,%r9
4284	imulq	%r12,%rdx
4285	vpxor	%ymm10,%ymm6,%ymm6
4286	vpxor	%ymm9,%ymm5,%ymm5
4287	vpxor	%ymm8,%ymm4,%ymm4
4288	vmovdqa	%ymm8,0+128(%rbp)
4289	vpsrld	$20,%ymm7,%ymm8
4290	vpslld	$32-20,%ymm7,%ymm7
4291	vpxor	%ymm8,%ymm7,%ymm7
4292	vpsrld	$20,%ymm6,%ymm8
4293	vpslld	$32-20,%ymm6,%ymm6
4294	vpxor	%ymm8,%ymm6,%ymm6
4295	vpsrld	$20,%ymm5,%ymm8
4296	vpslld	$32-20,%ymm5,%ymm5
4297	addq	%rax,%r15
4298	adcq	%rdx,%r9
4299	vpxor	%ymm8,%ymm5,%ymm5
4300	vpsrld	$20,%ymm4,%ymm8
4301	vpslld	$32-20,%ymm4,%ymm4
4302	vpxor	%ymm8,%ymm4,%ymm4
4303	vmovdqa	L$rol8(%rip),%ymm8
4304	vpaddd	%ymm7,%ymm3,%ymm3
4305	vpaddd	%ymm6,%ymm2,%ymm2
4306	vpaddd	%ymm5,%ymm1,%ymm1
4307	vpaddd	%ymm4,%ymm0,%ymm0
4308	vpxor	%ymm3,%ymm15,%ymm15
4309	movq	%r13,%r10
4310	movq	%r14,%r11
4311	movq	%r15,%r12
4312	andq	$3,%r12
4313	movq	%r15,%r13
4314	andq	$-4,%r13
4315	movq	%r9,%r14
4316	shrdq	$2,%r9,%r15
4317	shrq	$2,%r9
4318	addq	%r13,%r15
4319	adcq	%r14,%r9
4320	addq	%r15,%r10
4321	adcq	%r9,%r11
4322	adcq	$0,%r12
4323	vpxor	%ymm2,%ymm14,%ymm14
4324	vpxor	%ymm1,%ymm13,%ymm13
4325	vpxor	%ymm0,%ymm12,%ymm12
4326	vpshufb	%ymm8,%ymm15,%ymm15
4327	vpshufb	%ymm8,%ymm14,%ymm14
4328	vpshufb	%ymm8,%ymm13,%ymm13
4329	vpshufb	%ymm8,%ymm12,%ymm12
4330	vpaddd	%ymm15,%ymm11,%ymm11
4331	vpaddd	%ymm14,%ymm10,%ymm10
4332	addq	0+16(%rsi,%rcx,1),%r10
4333	adcq	8+16(%rsi,%rcx,1),%r11
4334	adcq	$1,%r12
4335	vpaddd	%ymm13,%ymm9,%ymm9
4336	vpaddd	0+128(%rbp),%ymm12,%ymm8
4337	vpxor	%ymm11,%ymm7,%ymm7
4338	vpxor	%ymm10,%ymm6,%ymm6
4339	vpxor	%ymm9,%ymm5,%ymm5
4340	vpxor	%ymm8,%ymm4,%ymm4
4341	vmovdqa	%ymm8,0+128(%rbp)
4342	vpsrld	$25,%ymm7,%ymm8
4343	movq	0+0+0(%rbp),%rdx
4344	movq	%rdx,%r15
4345	mulxq	%r10,%r13,%r14
4346	mulxq	%r11,%rax,%rdx
4347	imulq	%r12,%r15
4348	addq	%rax,%r14
4349	adcq	%rdx,%r15
4350	vpslld	$32-25,%ymm7,%ymm7
4351	vpxor	%ymm8,%ymm7,%ymm7
4352	vpsrld	$25,%ymm6,%ymm8
4353	vpslld	$32-25,%ymm6,%ymm6
4354	vpxor	%ymm8,%ymm6,%ymm6
4355	vpsrld	$25,%ymm5,%ymm8
4356	vpslld	$32-25,%ymm5,%ymm5
4357	vpxor	%ymm8,%ymm5,%ymm5
4358	vpsrld	$25,%ymm4,%ymm8
4359	vpslld	$32-25,%ymm4,%ymm4
4360	vpxor	%ymm8,%ymm4,%ymm4
4361	vmovdqa	0+128(%rbp),%ymm8
4362	vpalignr	$4,%ymm7,%ymm7,%ymm7
4363	vpalignr	$8,%ymm11,%ymm11,%ymm11
4364	vpalignr	$12,%ymm15,%ymm15,%ymm15
4365	vpalignr	$4,%ymm6,%ymm6,%ymm6
4366	vpalignr	$8,%ymm10,%ymm10,%ymm10
4367	vpalignr	$12,%ymm14,%ymm14,%ymm14
4368	movq	8+0+0(%rbp),%rdx
4369	mulxq	%r10,%r10,%rax
4370	addq	%r10,%r14
4371	mulxq	%r11,%r11,%r9
4372	adcq	%r11,%r15
4373	adcq	$0,%r9
4374	imulq	%r12,%rdx
4375	vpalignr	$4,%ymm5,%ymm5,%ymm5
4376	vpalignr	$8,%ymm9,%ymm9,%ymm9
4377	vpalignr	$12,%ymm13,%ymm13,%ymm13
4378	vpalignr	$4,%ymm4,%ymm4,%ymm4
4379	vpalignr	$8,%ymm8,%ymm8,%ymm8
4380	vpalignr	$12,%ymm12,%ymm12,%ymm12
4381	vmovdqa	%ymm8,0+128(%rbp)
4382	vmovdqa	L$rol16(%rip),%ymm8
4383	vpaddd	%ymm7,%ymm3,%ymm3
4384	vpaddd	%ymm6,%ymm2,%ymm2
4385	vpaddd	%ymm5,%ymm1,%ymm1
4386	vpaddd	%ymm4,%ymm0,%ymm0
4387	vpxor	%ymm3,%ymm15,%ymm15
4388	vpxor	%ymm2,%ymm14,%ymm14
4389	vpxor	%ymm1,%ymm13,%ymm13
4390	vpxor	%ymm0,%ymm12,%ymm12
4391	vpshufb	%ymm8,%ymm15,%ymm15
4392	vpshufb	%ymm8,%ymm14,%ymm14
4393	addq	%rax,%r15
4394	adcq	%rdx,%r9
4395	vpshufb	%ymm8,%ymm13,%ymm13
4396	vpshufb	%ymm8,%ymm12,%ymm12
4397	vpaddd	%ymm15,%ymm11,%ymm11
4398	vpaddd	%ymm14,%ymm10,%ymm10
4399	vpaddd	%ymm13,%ymm9,%ymm9
4400	vpaddd	0+128(%rbp),%ymm12,%ymm8
4401	vpxor	%ymm11,%ymm7,%ymm7
4402	vpxor	%ymm10,%ymm6,%ymm6
4403	vpxor	%ymm9,%ymm5,%ymm5
4404	movq	%r13,%r10
4405	movq	%r14,%r11
4406	movq	%r15,%r12
4407	andq	$3,%r12
4408	movq	%r15,%r13
4409	andq	$-4,%r13
4410	movq	%r9,%r14
4411	shrdq	$2,%r9,%r15
4412	shrq	$2,%r9
4413	addq	%r13,%r15
4414	adcq	%r14,%r9
4415	addq	%r15,%r10
4416	adcq	%r9,%r11
4417	adcq	$0,%r12
4418	vpxor	%ymm8,%ymm4,%ymm4
4419	vmovdqa	%ymm8,0+128(%rbp)
4420	vpsrld	$20,%ymm7,%ymm8
4421	vpslld	$32-20,%ymm7,%ymm7
4422	vpxor	%ymm8,%ymm7,%ymm7
4423	vpsrld	$20,%ymm6,%ymm8
4424	vpslld	$32-20,%ymm6,%ymm6
4425	vpxor	%ymm8,%ymm6,%ymm6
4426	addq	0+32(%rsi,%rcx,1),%r10
4427	adcq	8+32(%rsi,%rcx,1),%r11
4428	adcq	$1,%r12
4429
4430	leaq	48(%rcx),%rcx
4431	vpsrld	$20,%ymm5,%ymm8
4432	vpslld	$32-20,%ymm5,%ymm5
4433	vpxor	%ymm8,%ymm5,%ymm5
4434	vpsrld	$20,%ymm4,%ymm8
4435	vpslld	$32-20,%ymm4,%ymm4
4436	vpxor	%ymm8,%ymm4,%ymm4
4437	vmovdqa	L$rol8(%rip),%ymm8
4438	vpaddd	%ymm7,%ymm3,%ymm3
4439	vpaddd	%ymm6,%ymm2,%ymm2
4440	vpaddd	%ymm5,%ymm1,%ymm1
4441	vpaddd	%ymm4,%ymm0,%ymm0
4442	vpxor	%ymm3,%ymm15,%ymm15
4443	vpxor	%ymm2,%ymm14,%ymm14
4444	vpxor	%ymm1,%ymm13,%ymm13
4445	vpxor	%ymm0,%ymm12,%ymm12
4446	vpshufb	%ymm8,%ymm15,%ymm15
4447	vpshufb	%ymm8,%ymm14,%ymm14
4448	vpshufb	%ymm8,%ymm13,%ymm13
4449	movq	0+0+0(%rbp),%rdx
4450	movq	%rdx,%r15
4451	mulxq	%r10,%r13,%r14
4452	mulxq	%r11,%rax,%rdx
4453	imulq	%r12,%r15
4454	addq	%rax,%r14
4455	adcq	%rdx,%r15
4456	vpshufb	%ymm8,%ymm12,%ymm12
4457	vpaddd	%ymm15,%ymm11,%ymm11
4458	vpaddd	%ymm14,%ymm10,%ymm10
4459	vpaddd	%ymm13,%ymm9,%ymm9
4460	vpaddd	0+128(%rbp),%ymm12,%ymm8
4461	vpxor	%ymm11,%ymm7,%ymm7
4462	vpxor	%ymm10,%ymm6,%ymm6
4463	vpxor	%ymm9,%ymm5,%ymm5
4464	movq	8+0+0(%rbp),%rdx
4465	mulxq	%r10,%r10,%rax
4466	addq	%r10,%r14
4467	mulxq	%r11,%r11,%r9
4468	adcq	%r11,%r15
4469	adcq	$0,%r9
4470	imulq	%r12,%rdx
4471	vpxor	%ymm8,%ymm4,%ymm4
4472	vmovdqa	%ymm8,0+128(%rbp)
4473	vpsrld	$25,%ymm7,%ymm8
4474	vpslld	$32-25,%ymm7,%ymm7
4475	vpxor	%ymm8,%ymm7,%ymm7
4476	vpsrld	$25,%ymm6,%ymm8
4477	vpslld	$32-25,%ymm6,%ymm6
4478	vpxor	%ymm8,%ymm6,%ymm6
4479	addq	%rax,%r15
4480	adcq	%rdx,%r9
4481	vpsrld	$25,%ymm5,%ymm8
4482	vpslld	$32-25,%ymm5,%ymm5
4483	vpxor	%ymm8,%ymm5,%ymm5
4484	vpsrld	$25,%ymm4,%ymm8
4485	vpslld	$32-25,%ymm4,%ymm4
4486	vpxor	%ymm8,%ymm4,%ymm4
4487	vmovdqa	0+128(%rbp),%ymm8
4488	vpalignr	$12,%ymm7,%ymm7,%ymm7
4489	vpalignr	$8,%ymm11,%ymm11,%ymm11
4490	vpalignr	$4,%ymm15,%ymm15,%ymm15
4491	vpalignr	$12,%ymm6,%ymm6,%ymm6
4492	vpalignr	$8,%ymm10,%ymm10,%ymm10
4493	vpalignr	$4,%ymm14,%ymm14,%ymm14
4494	vpalignr	$12,%ymm5,%ymm5,%ymm5
4495	vpalignr	$8,%ymm9,%ymm9,%ymm9
4496	vpalignr	$4,%ymm13,%ymm13,%ymm13
4497	vpalignr	$12,%ymm4,%ymm4,%ymm4
4498	vpalignr	$8,%ymm8,%ymm8,%ymm8
4499	movq	%r13,%r10
4500	movq	%r14,%r11
4501	movq	%r15,%r12
4502	andq	$3,%r12
4503	movq	%r15,%r13
4504	andq	$-4,%r13
4505	movq	%r9,%r14
4506	shrdq	$2,%r9,%r15
4507	shrq	$2,%r9
4508	addq	%r13,%r15
4509	adcq	%r14,%r9
4510	addq	%r15,%r10
4511	adcq	%r9,%r11
4512	adcq	$0,%r12
4513	vpalignr	$4,%ymm12,%ymm12,%ymm12
4514
4515	cmpq	$60*8,%rcx
4516	jne	L$open_avx2_main_loop_rounds
4517	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
4518	vpaddd	0+64(%rbp),%ymm7,%ymm7
4519	vpaddd	0+96(%rbp),%ymm11,%ymm11
4520	vpaddd	0+256(%rbp),%ymm15,%ymm15
4521	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
4522	vpaddd	0+64(%rbp),%ymm6,%ymm6
4523	vpaddd	0+96(%rbp),%ymm10,%ymm10
4524	vpaddd	0+224(%rbp),%ymm14,%ymm14
4525	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
4526	vpaddd	0+64(%rbp),%ymm5,%ymm5
4527	vpaddd	0+96(%rbp),%ymm9,%ymm9
4528	vpaddd	0+192(%rbp),%ymm13,%ymm13
4529	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
4530	vpaddd	0+64(%rbp),%ymm4,%ymm4
4531	vpaddd	0+96(%rbp),%ymm8,%ymm8
4532	vpaddd	0+160(%rbp),%ymm12,%ymm12
4533
4534	vmovdqa	%ymm0,0+128(%rbp)
4535	addq	0+60*8(%rsi),%r10
4536	adcq	8+60*8(%rsi),%r11
4537	adcq	$1,%r12
4538	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
4539	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
4540	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
4541	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
4542	vpxor	0+0(%rsi),%ymm0,%ymm0
4543	vpxor	32+0(%rsi),%ymm3,%ymm3
4544	vpxor	64+0(%rsi),%ymm7,%ymm7
4545	vpxor	96+0(%rsi),%ymm11,%ymm11
4546	vmovdqu	%ymm0,0+0(%rdi)
4547	vmovdqu	%ymm3,32+0(%rdi)
4548	vmovdqu	%ymm7,64+0(%rdi)
4549	vmovdqu	%ymm11,96+0(%rdi)
4550
4551	vmovdqa	0+128(%rbp),%ymm0
4552	movq	0+0+0(%rbp),%rax
4553	movq	%rax,%r15
4554	mulq	%r10
4555	movq	%rax,%r13
4556	movq	%rdx,%r14
4557	movq	0+0+0(%rbp),%rax
4558	mulq	%r11
4559	imulq	%r12,%r15
4560	addq	%rax,%r14
4561	adcq	%rdx,%r15
4562	movq	8+0+0(%rbp),%rax
4563	movq	%rax,%r9
4564	mulq	%r10
4565	addq	%rax,%r14
4566	adcq	$0,%rdx
4567	movq	%rdx,%r10
4568	movq	8+0+0(%rbp),%rax
4569	mulq	%r11
4570	addq	%rax,%r15
4571	adcq	$0,%rdx
4572	imulq	%r12,%r9
4573	addq	%r10,%r15
4574	adcq	%rdx,%r9
4575	movq	%r13,%r10
4576	movq	%r14,%r11
4577	movq	%r15,%r12
4578	andq	$3,%r12
4579	movq	%r15,%r13
4580	andq	$-4,%r13
4581	movq	%r9,%r14
4582	shrdq	$2,%r9,%r15
4583	shrq	$2,%r9
4584	addq	%r13,%r15
4585	adcq	%r14,%r9
4586	addq	%r15,%r10
4587	adcq	%r9,%r11
4588	adcq	$0,%r12
4589	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
4590	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
4591	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
4592	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
4593	vpxor	0+128(%rsi),%ymm3,%ymm3
4594	vpxor	32+128(%rsi),%ymm2,%ymm2
4595	vpxor	64+128(%rsi),%ymm6,%ymm6
4596	vpxor	96+128(%rsi),%ymm10,%ymm10
4597	vmovdqu	%ymm3,0+128(%rdi)
4598	vmovdqu	%ymm2,32+128(%rdi)
4599	vmovdqu	%ymm6,64+128(%rdi)
4600	vmovdqu	%ymm10,96+128(%rdi)
4601	addq	0+60*8+16(%rsi),%r10
4602	adcq	8+60*8+16(%rsi),%r11
4603	adcq	$1,%r12
4604	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
4605	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
4606	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
4607	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
4608	vpxor	0+256(%rsi),%ymm3,%ymm3
4609	vpxor	32+256(%rsi),%ymm1,%ymm1
4610	vpxor	64+256(%rsi),%ymm5,%ymm5
4611	vpxor	96+256(%rsi),%ymm9,%ymm9
4612	vmovdqu	%ymm3,0+256(%rdi)
4613	vmovdqu	%ymm1,32+256(%rdi)
4614	vmovdqu	%ymm5,64+256(%rdi)
4615	vmovdqu	%ymm9,96+256(%rdi)
4616	movq	0+0+0(%rbp),%rax
4617	movq	%rax,%r15
4618	mulq	%r10
4619	movq	%rax,%r13
4620	movq	%rdx,%r14
4621	movq	0+0+0(%rbp),%rax
4622	mulq	%r11
4623	imulq	%r12,%r15
4624	addq	%rax,%r14
4625	adcq	%rdx,%r15
4626	movq	8+0+0(%rbp),%rax
4627	movq	%rax,%r9
4628	mulq	%r10
4629	addq	%rax,%r14
4630	adcq	$0,%rdx
4631	movq	%rdx,%r10
4632	movq	8+0+0(%rbp),%rax
4633	mulq	%r11
4634	addq	%rax,%r15
4635	adcq	$0,%rdx
4636	imulq	%r12,%r9
4637	addq	%r10,%r15
4638	adcq	%rdx,%r9
4639	movq	%r13,%r10
4640	movq	%r14,%r11
4641	movq	%r15,%r12
4642	andq	$3,%r12
4643	movq	%r15,%r13
4644	andq	$-4,%r13
4645	movq	%r9,%r14
4646	shrdq	$2,%r9,%r15
4647	shrq	$2,%r9
4648	addq	%r13,%r15
4649	adcq	%r14,%r9
4650	addq	%r15,%r10
4651	adcq	%r9,%r11
4652	adcq	$0,%r12
4653	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4654	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
4655	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
4656	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
4657	vpxor	0+384(%rsi),%ymm3,%ymm3
4658	vpxor	32+384(%rsi),%ymm0,%ymm0
4659	vpxor	64+384(%rsi),%ymm4,%ymm4
4660	vpxor	96+384(%rsi),%ymm8,%ymm8
4661	vmovdqu	%ymm3,0+384(%rdi)
4662	vmovdqu	%ymm0,32+384(%rdi)
4663	vmovdqu	%ymm4,64+384(%rdi)
4664	vmovdqu	%ymm8,96+384(%rdi)
4665
4666	leaq	512(%rsi),%rsi
4667	leaq	512(%rdi),%rdi
4668	subq	$512,%rbx
4669	jmp	L$open_avx2_main_loop
4670L$open_avx2_main_loop_done:
4671	testq	%rbx,%rbx
4672	vzeroupper
4673	je	L$open_sse_finalize
4674
4675	cmpq	$384,%rbx
4676	ja	L$open_avx2_tail_512
4677	cmpq	$256,%rbx
4678	ja	L$open_avx2_tail_384
4679	cmpq	$128,%rbx
4680	ja	L$open_avx2_tail_256
4681	vmovdqa	L$chacha20_consts(%rip),%ymm0
4682	vmovdqa	0+64(%rbp),%ymm4
4683	vmovdqa	0+96(%rbp),%ymm8
4684	vmovdqa	L$avx2_inc(%rip),%ymm12
4685	vpaddd	0+160(%rbp),%ymm12,%ymm12
4686	vmovdqa	%ymm12,0+160(%rbp)
4687
4688	xorq	%r8,%r8
4689	movq	%rbx,%rcx
4690	andq	$-16,%rcx
4691	testq	%rcx,%rcx
4692	je	L$open_avx2_tail_128_rounds
4693L$open_avx2_tail_128_rounds_and_x1hash:
4694	addq	0+0(%rsi,%r8,1),%r10
4695	adcq	8+0(%rsi,%r8,1),%r11
4696	adcq	$1,%r12
4697	movq	0+0+0(%rbp),%rax
4698	movq	%rax,%r15
4699	mulq	%r10
4700	movq	%rax,%r13
4701	movq	%rdx,%r14
4702	movq	0+0+0(%rbp),%rax
4703	mulq	%r11
4704	imulq	%r12,%r15
4705	addq	%rax,%r14
4706	adcq	%rdx,%r15
4707	movq	8+0+0(%rbp),%rax
4708	movq	%rax,%r9
4709	mulq	%r10
4710	addq	%rax,%r14
4711	adcq	$0,%rdx
4712	movq	%rdx,%r10
4713	movq	8+0+0(%rbp),%rax
4714	mulq	%r11
4715	addq	%rax,%r15
4716	adcq	$0,%rdx
4717	imulq	%r12,%r9
4718	addq	%r10,%r15
4719	adcq	%rdx,%r9
4720	movq	%r13,%r10
4721	movq	%r14,%r11
4722	movq	%r15,%r12
4723	andq	$3,%r12
4724	movq	%r15,%r13
4725	andq	$-4,%r13
4726	movq	%r9,%r14
4727	shrdq	$2,%r9,%r15
4728	shrq	$2,%r9
4729	addq	%r13,%r15
4730	adcq	%r14,%r9
4731	addq	%r15,%r10
4732	adcq	%r9,%r11
4733	adcq	$0,%r12
4734
4735L$open_avx2_tail_128_rounds:
4736	addq	$16,%r8
4737	vpaddd	%ymm4,%ymm0,%ymm0
4738	vpxor	%ymm0,%ymm12,%ymm12
4739	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4740	vpaddd	%ymm12,%ymm8,%ymm8
4741	vpxor	%ymm8,%ymm4,%ymm4
4742	vpsrld	$20,%ymm4,%ymm3
4743	vpslld	$12,%ymm4,%ymm4
4744	vpxor	%ymm3,%ymm4,%ymm4
4745	vpaddd	%ymm4,%ymm0,%ymm0
4746	vpxor	%ymm0,%ymm12,%ymm12
4747	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4748	vpaddd	%ymm12,%ymm8,%ymm8
4749	vpxor	%ymm8,%ymm4,%ymm4
4750	vpslld	$7,%ymm4,%ymm3
4751	vpsrld	$25,%ymm4,%ymm4
4752	vpxor	%ymm3,%ymm4,%ymm4
4753	vpalignr	$12,%ymm12,%ymm12,%ymm12
4754	vpalignr	$8,%ymm8,%ymm8,%ymm8
4755	vpalignr	$4,%ymm4,%ymm4,%ymm4
4756	vpaddd	%ymm4,%ymm0,%ymm0
4757	vpxor	%ymm0,%ymm12,%ymm12
4758	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4759	vpaddd	%ymm12,%ymm8,%ymm8
4760	vpxor	%ymm8,%ymm4,%ymm4
4761	vpsrld	$20,%ymm4,%ymm3
4762	vpslld	$12,%ymm4,%ymm4
4763	vpxor	%ymm3,%ymm4,%ymm4
4764	vpaddd	%ymm4,%ymm0,%ymm0
4765	vpxor	%ymm0,%ymm12,%ymm12
4766	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4767	vpaddd	%ymm12,%ymm8,%ymm8
4768	vpxor	%ymm8,%ymm4,%ymm4
4769	vpslld	$7,%ymm4,%ymm3
4770	vpsrld	$25,%ymm4,%ymm4
4771	vpxor	%ymm3,%ymm4,%ymm4
4772	vpalignr	$4,%ymm12,%ymm12,%ymm12
4773	vpalignr	$8,%ymm8,%ymm8,%ymm8
4774	vpalignr	$12,%ymm4,%ymm4,%ymm4
4775
4776	cmpq	%rcx,%r8
4777	jb	L$open_avx2_tail_128_rounds_and_x1hash
4778	cmpq	$160,%r8
4779	jne	L$open_avx2_tail_128_rounds
4780	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
4781	vpaddd	0+64(%rbp),%ymm4,%ymm4
4782	vpaddd	0+96(%rbp),%ymm8,%ymm8
4783	vpaddd	0+160(%rbp),%ymm12,%ymm12
4784	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
4785	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
4786	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
4787	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
4788	vmovdqa	%ymm3,%ymm8
4789
4790	jmp	L$open_avx2_tail_128_xor
4791
4792L$open_avx2_tail_256:
4793	vmovdqa	L$chacha20_consts(%rip),%ymm0
4794	vmovdqa	0+64(%rbp),%ymm4
4795	vmovdqa	0+96(%rbp),%ymm8
4796	vmovdqa	%ymm0,%ymm1
4797	vmovdqa	%ymm4,%ymm5
4798	vmovdqa	%ymm8,%ymm9
4799	vmovdqa	L$avx2_inc(%rip),%ymm12
4800	vpaddd	0+160(%rbp),%ymm12,%ymm13
4801	vpaddd	%ymm13,%ymm12,%ymm12
4802	vmovdqa	%ymm12,0+160(%rbp)
4803	vmovdqa	%ymm13,0+192(%rbp)
4804
4805	movq	%rbx,0+128(%rbp)
4806	movq	%rbx,%rcx
4807	subq	$128,%rcx
4808	shrq	$4,%rcx
4809	movq	$10,%r8
4810	cmpq	$10,%rcx
4811	cmovgq	%r8,%rcx
4812	movq	%rsi,%rbx
4813	xorq	%r8,%r8
4814L$open_avx2_tail_256_rounds_and_x1hash:
4815	addq	0+0(%rbx),%r10
4816	adcq	8+0(%rbx),%r11
4817	adcq	$1,%r12
4818	movq	0+0+0(%rbp),%rdx
4819	movq	%rdx,%r15
4820	mulxq	%r10,%r13,%r14
4821	mulxq	%r11,%rax,%rdx
4822	imulq	%r12,%r15
4823	addq	%rax,%r14
4824	adcq	%rdx,%r15
4825	movq	8+0+0(%rbp),%rdx
4826	mulxq	%r10,%r10,%rax
4827	addq	%r10,%r14
4828	mulxq	%r11,%r11,%r9
4829	adcq	%r11,%r15
4830	adcq	$0,%r9
4831	imulq	%r12,%rdx
4832	addq	%rax,%r15
4833	adcq	%rdx,%r9
4834	movq	%r13,%r10
4835	movq	%r14,%r11
4836	movq	%r15,%r12
4837	andq	$3,%r12
4838	movq	%r15,%r13
4839	andq	$-4,%r13
4840	movq	%r9,%r14
4841	shrdq	$2,%r9,%r15
4842	shrq	$2,%r9
4843	addq	%r13,%r15
4844	adcq	%r14,%r9
4845	addq	%r15,%r10
4846	adcq	%r9,%r11
4847	adcq	$0,%r12
4848
4849	leaq	16(%rbx),%rbx
4850L$open_avx2_tail_256_rounds:
4851	vpaddd	%ymm4,%ymm0,%ymm0
4852	vpxor	%ymm0,%ymm12,%ymm12
4853	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4854	vpaddd	%ymm12,%ymm8,%ymm8
4855	vpxor	%ymm8,%ymm4,%ymm4
4856	vpsrld	$20,%ymm4,%ymm3
4857	vpslld	$12,%ymm4,%ymm4
4858	vpxor	%ymm3,%ymm4,%ymm4
4859	vpaddd	%ymm4,%ymm0,%ymm0
4860	vpxor	%ymm0,%ymm12,%ymm12
4861	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4862	vpaddd	%ymm12,%ymm8,%ymm8
4863	vpxor	%ymm8,%ymm4,%ymm4
4864	vpslld	$7,%ymm4,%ymm3
4865	vpsrld	$25,%ymm4,%ymm4
4866	vpxor	%ymm3,%ymm4,%ymm4
4867	vpalignr	$12,%ymm12,%ymm12,%ymm12
4868	vpalignr	$8,%ymm8,%ymm8,%ymm8
4869	vpalignr	$4,%ymm4,%ymm4,%ymm4
4870	vpaddd	%ymm5,%ymm1,%ymm1
4871	vpxor	%ymm1,%ymm13,%ymm13
4872	vpshufb	L$rol16(%rip),%ymm13,%ymm13
4873	vpaddd	%ymm13,%ymm9,%ymm9
4874	vpxor	%ymm9,%ymm5,%ymm5
4875	vpsrld	$20,%ymm5,%ymm3
4876	vpslld	$12,%ymm5,%ymm5
4877	vpxor	%ymm3,%ymm5,%ymm5
4878	vpaddd	%ymm5,%ymm1,%ymm1
4879	vpxor	%ymm1,%ymm13,%ymm13
4880	vpshufb	L$rol8(%rip),%ymm13,%ymm13
4881	vpaddd	%ymm13,%ymm9,%ymm9
4882	vpxor	%ymm9,%ymm5,%ymm5
4883	vpslld	$7,%ymm5,%ymm3
4884	vpsrld	$25,%ymm5,%ymm5
4885	vpxor	%ymm3,%ymm5,%ymm5
4886	vpalignr	$12,%ymm13,%ymm13,%ymm13
4887	vpalignr	$8,%ymm9,%ymm9,%ymm9
4888	vpalignr	$4,%ymm5,%ymm5,%ymm5
4889
4890	incq	%r8
4891	vpaddd	%ymm4,%ymm0,%ymm0
4892	vpxor	%ymm0,%ymm12,%ymm12
4893	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4894	vpaddd	%ymm12,%ymm8,%ymm8
4895	vpxor	%ymm8,%ymm4,%ymm4
4896	vpsrld	$20,%ymm4,%ymm3
4897	vpslld	$12,%ymm4,%ymm4
4898	vpxor	%ymm3,%ymm4,%ymm4
4899	vpaddd	%ymm4,%ymm0,%ymm0
4900	vpxor	%ymm0,%ymm12,%ymm12
4901	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4902	vpaddd	%ymm12,%ymm8,%ymm8
4903	vpxor	%ymm8,%ymm4,%ymm4
4904	vpslld	$7,%ymm4,%ymm3
4905	vpsrld	$25,%ymm4,%ymm4
4906	vpxor	%ymm3,%ymm4,%ymm4
4907	vpalignr	$4,%ymm12,%ymm12,%ymm12
4908	vpalignr	$8,%ymm8,%ymm8,%ymm8
4909	vpalignr	$12,%ymm4,%ymm4,%ymm4
4910	vpaddd	%ymm5,%ymm1,%ymm1
4911	vpxor	%ymm1,%ymm13,%ymm13
4912	vpshufb	L$rol16(%rip),%ymm13,%ymm13
4913	vpaddd	%ymm13,%ymm9,%ymm9
4914	vpxor	%ymm9,%ymm5,%ymm5
4915	vpsrld	$20,%ymm5,%ymm3
4916	vpslld	$12,%ymm5,%ymm5
4917	vpxor	%ymm3,%ymm5,%ymm5
4918	vpaddd	%ymm5,%ymm1,%ymm1
4919	vpxor	%ymm1,%ymm13,%ymm13
4920	vpshufb	L$rol8(%rip),%ymm13,%ymm13
4921	vpaddd	%ymm13,%ymm9,%ymm9
4922	vpxor	%ymm9,%ymm5,%ymm5
4923	vpslld	$7,%ymm5,%ymm3
4924	vpsrld	$25,%ymm5,%ymm5
4925	vpxor	%ymm3,%ymm5,%ymm5
4926	vpalignr	$4,%ymm13,%ymm13,%ymm13
4927	vpalignr	$8,%ymm9,%ymm9,%ymm9
4928	vpalignr	$12,%ymm5,%ymm5,%ymm5
4929	vpaddd	%ymm6,%ymm2,%ymm2
4930	vpxor	%ymm2,%ymm14,%ymm14
4931	vpshufb	L$rol16(%rip),%ymm14,%ymm14
4932	vpaddd	%ymm14,%ymm10,%ymm10
4933	vpxor	%ymm10,%ymm6,%ymm6
4934	vpsrld	$20,%ymm6,%ymm3
4935	vpslld	$12,%ymm6,%ymm6
4936	vpxor	%ymm3,%ymm6,%ymm6
4937	vpaddd	%ymm6,%ymm2,%ymm2
4938	vpxor	%ymm2,%ymm14,%ymm14
4939	vpshufb	L$rol8(%rip),%ymm14,%ymm14
4940	vpaddd	%ymm14,%ymm10,%ymm10
4941	vpxor	%ymm10,%ymm6,%ymm6
4942	vpslld	$7,%ymm6,%ymm3
4943	vpsrld	$25,%ymm6,%ymm6
4944	vpxor	%ymm3,%ymm6,%ymm6
4945	vpalignr	$4,%ymm14,%ymm14,%ymm14
4946	vpalignr	$8,%ymm10,%ymm10,%ymm10
4947	vpalignr	$12,%ymm6,%ymm6,%ymm6
4948
4949	cmpq	%rcx,%r8
4950	jb	L$open_avx2_tail_256_rounds_and_x1hash
4951	cmpq	$10,%r8
4952	jne	L$open_avx2_tail_256_rounds
4953	movq	%rbx,%r8
4954	subq	%rsi,%rbx
4955	movq	%rbx,%rcx
4956	movq	0+128(%rbp),%rbx
4957L$open_avx2_tail_256_hash:
4958	addq	$16,%rcx
4959	cmpq	%rbx,%rcx
4960	jg	L$open_avx2_tail_256_done
4961	addq	0+0(%r8),%r10
4962	adcq	8+0(%r8),%r11
4963	adcq	$1,%r12
4964	movq	0+0+0(%rbp),%rdx
4965	movq	%rdx,%r15
4966	mulxq	%r10,%r13,%r14
4967	mulxq	%r11,%rax,%rdx
4968	imulq	%r12,%r15
4969	addq	%rax,%r14
4970	adcq	%rdx,%r15
4971	movq	8+0+0(%rbp),%rdx
4972	mulxq	%r10,%r10,%rax
4973	addq	%r10,%r14
4974	mulxq	%r11,%r11,%r9
4975	adcq	%r11,%r15
4976	adcq	$0,%r9
4977	imulq	%r12,%rdx
4978	addq	%rax,%r15
4979	adcq	%rdx,%r9
4980	movq	%r13,%r10
4981	movq	%r14,%r11
4982	movq	%r15,%r12
4983	andq	$3,%r12
4984	movq	%r15,%r13
4985	andq	$-4,%r13
4986	movq	%r9,%r14
4987	shrdq	$2,%r9,%r15
4988	shrq	$2,%r9
4989	addq	%r13,%r15
4990	adcq	%r14,%r9
4991	addq	%r15,%r10
4992	adcq	%r9,%r11
4993	adcq	$0,%r12
4994
4995	leaq	16(%r8),%r8
4996	jmp	L$open_avx2_tail_256_hash
4997L$open_avx2_tail_256_done:
4998	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
4999	vpaddd	0+64(%rbp),%ymm5,%ymm5
5000	vpaddd	0+96(%rbp),%ymm9,%ymm9
5001	vpaddd	0+192(%rbp),%ymm13,%ymm13
5002	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
5003	vpaddd	0+64(%rbp),%ymm4,%ymm4
5004	vpaddd	0+96(%rbp),%ymm8,%ymm8
5005	vpaddd	0+160(%rbp),%ymm12,%ymm12
5006	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5007	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5008	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5009	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5010	vpxor	0+0(%rsi),%ymm3,%ymm3
5011	vpxor	32+0(%rsi),%ymm1,%ymm1
5012	vpxor	64+0(%rsi),%ymm5,%ymm5
5013	vpxor	96+0(%rsi),%ymm9,%ymm9
5014	vmovdqu	%ymm3,0+0(%rdi)
5015	vmovdqu	%ymm1,32+0(%rdi)
5016	vmovdqu	%ymm5,64+0(%rdi)
5017	vmovdqu	%ymm9,96+0(%rdi)
5018	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5019	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5020	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5021	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5022	vmovdqa	%ymm3,%ymm8
5023
5024	leaq	128(%rsi),%rsi
5025	leaq	128(%rdi),%rdi
5026	subq	$128,%rbx
5027	jmp	L$open_avx2_tail_128_xor
5028
5029L$open_avx2_tail_384:
5030	vmovdqa	L$chacha20_consts(%rip),%ymm0
5031	vmovdqa	0+64(%rbp),%ymm4
5032	vmovdqa	0+96(%rbp),%ymm8
5033	vmovdqa	%ymm0,%ymm1
5034	vmovdqa	%ymm4,%ymm5
5035	vmovdqa	%ymm8,%ymm9
5036	vmovdqa	%ymm0,%ymm2
5037	vmovdqa	%ymm4,%ymm6
5038	vmovdqa	%ymm8,%ymm10
5039	vmovdqa	L$avx2_inc(%rip),%ymm12
5040	vpaddd	0+160(%rbp),%ymm12,%ymm14
5041	vpaddd	%ymm14,%ymm12,%ymm13
5042	vpaddd	%ymm13,%ymm12,%ymm12
5043	vmovdqa	%ymm12,0+160(%rbp)
5044	vmovdqa	%ymm13,0+192(%rbp)
5045	vmovdqa	%ymm14,0+224(%rbp)
5046
5047	movq	%rbx,0+128(%rbp)
5048	movq	%rbx,%rcx
5049	subq	$256,%rcx
5050	shrq	$4,%rcx
5051	addq	$6,%rcx
5052	movq	$10,%r8
5053	cmpq	$10,%rcx
5054	cmovgq	%r8,%rcx
5055	movq	%rsi,%rbx
5056	xorq	%r8,%r8
5057L$open_avx2_tail_384_rounds_and_x2hash:
5058	addq	0+0(%rbx),%r10
5059	adcq	8+0(%rbx),%r11
5060	adcq	$1,%r12
5061	movq	0+0+0(%rbp),%rdx
5062	movq	%rdx,%r15
5063	mulxq	%r10,%r13,%r14
5064	mulxq	%r11,%rax,%rdx
5065	imulq	%r12,%r15
5066	addq	%rax,%r14
5067	adcq	%rdx,%r15
5068	movq	8+0+0(%rbp),%rdx
5069	mulxq	%r10,%r10,%rax
5070	addq	%r10,%r14
5071	mulxq	%r11,%r11,%r9
5072	adcq	%r11,%r15
5073	adcq	$0,%r9
5074	imulq	%r12,%rdx
5075	addq	%rax,%r15
5076	adcq	%rdx,%r9
5077	movq	%r13,%r10
5078	movq	%r14,%r11
5079	movq	%r15,%r12
5080	andq	$3,%r12
5081	movq	%r15,%r13
5082	andq	$-4,%r13
5083	movq	%r9,%r14
5084	shrdq	$2,%r9,%r15
5085	shrq	$2,%r9
5086	addq	%r13,%r15
5087	adcq	%r14,%r9
5088	addq	%r15,%r10
5089	adcq	%r9,%r11
5090	adcq	$0,%r12
5091
5092	leaq	16(%rbx),%rbx
5093L$open_avx2_tail_384_rounds_and_x1hash:
5094	vpaddd	%ymm6,%ymm2,%ymm2
5095	vpxor	%ymm2,%ymm14,%ymm14
5096	vpshufb	L$rol16(%rip),%ymm14,%ymm14
5097	vpaddd	%ymm14,%ymm10,%ymm10
5098	vpxor	%ymm10,%ymm6,%ymm6
5099	vpsrld	$20,%ymm6,%ymm3
5100	vpslld	$12,%ymm6,%ymm6
5101	vpxor	%ymm3,%ymm6,%ymm6
5102	vpaddd	%ymm6,%ymm2,%ymm2
5103	vpxor	%ymm2,%ymm14,%ymm14
5104	vpshufb	L$rol8(%rip),%ymm14,%ymm14
5105	vpaddd	%ymm14,%ymm10,%ymm10
5106	vpxor	%ymm10,%ymm6,%ymm6
5107	vpslld	$7,%ymm6,%ymm3
5108	vpsrld	$25,%ymm6,%ymm6
5109	vpxor	%ymm3,%ymm6,%ymm6
5110	vpalignr	$12,%ymm14,%ymm14,%ymm14
5111	vpalignr	$8,%ymm10,%ymm10,%ymm10
5112	vpalignr	$4,%ymm6,%ymm6,%ymm6
5113	vpaddd	%ymm5,%ymm1,%ymm1
5114	vpxor	%ymm1,%ymm13,%ymm13
5115	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5116	vpaddd	%ymm13,%ymm9,%ymm9
5117	vpxor	%ymm9,%ymm5,%ymm5
5118	vpsrld	$20,%ymm5,%ymm3
5119	vpslld	$12,%ymm5,%ymm5
5120	vpxor	%ymm3,%ymm5,%ymm5
5121	vpaddd	%ymm5,%ymm1,%ymm1
5122	vpxor	%ymm1,%ymm13,%ymm13
5123	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5124	vpaddd	%ymm13,%ymm9,%ymm9
5125	vpxor	%ymm9,%ymm5,%ymm5
5126	vpslld	$7,%ymm5,%ymm3
5127	vpsrld	$25,%ymm5,%ymm5
5128	vpxor	%ymm3,%ymm5,%ymm5
5129	vpalignr	$12,%ymm13,%ymm13,%ymm13
5130	vpalignr	$8,%ymm9,%ymm9,%ymm9
5131	vpalignr	$4,%ymm5,%ymm5,%ymm5
5132	vpaddd	%ymm4,%ymm0,%ymm0
5133	vpxor	%ymm0,%ymm12,%ymm12
5134	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5135	vpaddd	%ymm12,%ymm8,%ymm8
5136	vpxor	%ymm8,%ymm4,%ymm4
5137	vpsrld	$20,%ymm4,%ymm3
5138	vpslld	$12,%ymm4,%ymm4
5139	vpxor	%ymm3,%ymm4,%ymm4
5140	vpaddd	%ymm4,%ymm0,%ymm0
5141	vpxor	%ymm0,%ymm12,%ymm12
5142	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5143	vpaddd	%ymm12,%ymm8,%ymm8
5144	vpxor	%ymm8,%ymm4,%ymm4
5145	vpslld	$7,%ymm4,%ymm3
5146	vpsrld	$25,%ymm4,%ymm4
5147	vpxor	%ymm3,%ymm4,%ymm4
5148	vpalignr	$12,%ymm12,%ymm12,%ymm12
5149	vpalignr	$8,%ymm8,%ymm8,%ymm8
5150	vpalignr	$4,%ymm4,%ymm4,%ymm4
5151	addq	0+0(%rbx),%r10
5152	adcq	8+0(%rbx),%r11
5153	adcq	$1,%r12
5154	movq	0+0+0(%rbp),%rax
5155	movq	%rax,%r15
5156	mulq	%r10
5157	movq	%rax,%r13
5158	movq	%rdx,%r14
5159	movq	0+0+0(%rbp),%rax
5160	mulq	%r11
5161	imulq	%r12,%r15
5162	addq	%rax,%r14
5163	adcq	%rdx,%r15
5164	movq	8+0+0(%rbp),%rax
5165	movq	%rax,%r9
5166	mulq	%r10
5167	addq	%rax,%r14
5168	adcq	$0,%rdx
5169	movq	%rdx,%r10
5170	movq	8+0+0(%rbp),%rax
5171	mulq	%r11
5172	addq	%rax,%r15
5173	adcq	$0,%rdx
5174	imulq	%r12,%r9
5175	addq	%r10,%r15
5176	adcq	%rdx,%r9
5177	movq	%r13,%r10
5178	movq	%r14,%r11
5179	movq	%r15,%r12
5180	andq	$3,%r12
5181	movq	%r15,%r13
5182	andq	$-4,%r13
5183	movq	%r9,%r14
5184	shrdq	$2,%r9,%r15
5185	shrq	$2,%r9
5186	addq	%r13,%r15
5187	adcq	%r14,%r9
5188	addq	%r15,%r10
5189	adcq	%r9,%r11
5190	adcq	$0,%r12
5191
5192	leaq	16(%rbx),%rbx
5193	incq	%r8
5194	vpaddd	%ymm6,%ymm2,%ymm2
5195	vpxor	%ymm2,%ymm14,%ymm14
5196	vpshufb	L$rol16(%rip),%ymm14,%ymm14
5197	vpaddd	%ymm14,%ymm10,%ymm10
5198	vpxor	%ymm10,%ymm6,%ymm6
5199	vpsrld	$20,%ymm6,%ymm3
5200	vpslld	$12,%ymm6,%ymm6
5201	vpxor	%ymm3,%ymm6,%ymm6
5202	vpaddd	%ymm6,%ymm2,%ymm2
5203	vpxor	%ymm2,%ymm14,%ymm14
5204	vpshufb	L$rol8(%rip),%ymm14,%ymm14
5205	vpaddd	%ymm14,%ymm10,%ymm10
5206	vpxor	%ymm10,%ymm6,%ymm6
5207	vpslld	$7,%ymm6,%ymm3
5208	vpsrld	$25,%ymm6,%ymm6
5209	vpxor	%ymm3,%ymm6,%ymm6
5210	vpalignr	$4,%ymm14,%ymm14,%ymm14
5211	vpalignr	$8,%ymm10,%ymm10,%ymm10
5212	vpalignr	$12,%ymm6,%ymm6,%ymm6
5213	vpaddd	%ymm5,%ymm1,%ymm1
5214	vpxor	%ymm1,%ymm13,%ymm13
5215	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5216	vpaddd	%ymm13,%ymm9,%ymm9
5217	vpxor	%ymm9,%ymm5,%ymm5
5218	vpsrld	$20,%ymm5,%ymm3
5219	vpslld	$12,%ymm5,%ymm5
5220	vpxor	%ymm3,%ymm5,%ymm5
5221	vpaddd	%ymm5,%ymm1,%ymm1
5222	vpxor	%ymm1,%ymm13,%ymm13
5223	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5224	vpaddd	%ymm13,%ymm9,%ymm9
5225	vpxor	%ymm9,%ymm5,%ymm5
5226	vpslld	$7,%ymm5,%ymm3
5227	vpsrld	$25,%ymm5,%ymm5
5228	vpxor	%ymm3,%ymm5,%ymm5
5229	vpalignr	$4,%ymm13,%ymm13,%ymm13
5230	vpalignr	$8,%ymm9,%ymm9,%ymm9
5231	vpalignr	$12,%ymm5,%ymm5,%ymm5
5232	vpaddd	%ymm4,%ymm0,%ymm0
5233	vpxor	%ymm0,%ymm12,%ymm12
5234	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5235	vpaddd	%ymm12,%ymm8,%ymm8
5236	vpxor	%ymm8,%ymm4,%ymm4
5237	vpsrld	$20,%ymm4,%ymm3
5238	vpslld	$12,%ymm4,%ymm4
5239	vpxor	%ymm3,%ymm4,%ymm4
5240	vpaddd	%ymm4,%ymm0,%ymm0
5241	vpxor	%ymm0,%ymm12,%ymm12
5242	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5243	vpaddd	%ymm12,%ymm8,%ymm8
5244	vpxor	%ymm8,%ymm4,%ymm4
5245	vpslld	$7,%ymm4,%ymm3
5246	vpsrld	$25,%ymm4,%ymm4
5247	vpxor	%ymm3,%ymm4,%ymm4
5248	vpalignr	$4,%ymm12,%ymm12,%ymm12
5249	vpalignr	$8,%ymm8,%ymm8,%ymm8
5250	vpalignr	$12,%ymm4,%ymm4,%ymm4
5251
5252	cmpq	%rcx,%r8
5253	jb	L$open_avx2_tail_384_rounds_and_x2hash
5254	cmpq	$10,%r8
5255	jne	L$open_avx2_tail_384_rounds_and_x1hash
5256	movq	%rbx,%r8
5257	subq	%rsi,%rbx
5258	movq	%rbx,%rcx
5259	movq	0+128(%rbp),%rbx
5260L$open_avx2_384_tail_hash:
5261	addq	$16,%rcx
5262	cmpq	%rbx,%rcx
5263	jg	L$open_avx2_384_tail_done
5264	addq	0+0(%r8),%r10
5265	adcq	8+0(%r8),%r11
5266	adcq	$1,%r12
5267	movq	0+0+0(%rbp),%rdx
5268	movq	%rdx,%r15
5269	mulxq	%r10,%r13,%r14
5270	mulxq	%r11,%rax,%rdx
5271	imulq	%r12,%r15
5272	addq	%rax,%r14
5273	adcq	%rdx,%r15
5274	movq	8+0+0(%rbp),%rdx
5275	mulxq	%r10,%r10,%rax
5276	addq	%r10,%r14
5277	mulxq	%r11,%r11,%r9
5278	adcq	%r11,%r15
5279	adcq	$0,%r9
5280	imulq	%r12,%rdx
5281	addq	%rax,%r15
5282	adcq	%rdx,%r9
5283	movq	%r13,%r10
5284	movq	%r14,%r11
5285	movq	%r15,%r12
5286	andq	$3,%r12
5287	movq	%r15,%r13
5288	andq	$-4,%r13
5289	movq	%r9,%r14
5290	shrdq	$2,%r9,%r15
5291	shrq	$2,%r9
5292	addq	%r13,%r15
5293	adcq	%r14,%r9
5294	addq	%r15,%r10
5295	adcq	%r9,%r11
5296	adcq	$0,%r12
5297
5298	leaq	16(%r8),%r8
5299	jmp	L$open_avx2_384_tail_hash
5300L$open_avx2_384_tail_done:
5301	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
5302	vpaddd	0+64(%rbp),%ymm6,%ymm6
5303	vpaddd	0+96(%rbp),%ymm10,%ymm10
5304	vpaddd	0+224(%rbp),%ymm14,%ymm14
5305	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
5306	vpaddd	0+64(%rbp),%ymm5,%ymm5
5307	vpaddd	0+96(%rbp),%ymm9,%ymm9
5308	vpaddd	0+192(%rbp),%ymm13,%ymm13
5309	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
5310	vpaddd	0+64(%rbp),%ymm4,%ymm4
5311	vpaddd	0+96(%rbp),%ymm8,%ymm8
5312	vpaddd	0+160(%rbp),%ymm12,%ymm12
5313	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5314	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5315	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5316	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5317	vpxor	0+0(%rsi),%ymm3,%ymm3
5318	vpxor	32+0(%rsi),%ymm2,%ymm2
5319	vpxor	64+0(%rsi),%ymm6,%ymm6
5320	vpxor	96+0(%rsi),%ymm10,%ymm10
5321	vmovdqu	%ymm3,0+0(%rdi)
5322	vmovdqu	%ymm2,32+0(%rdi)
5323	vmovdqu	%ymm6,64+0(%rdi)
5324	vmovdqu	%ymm10,96+0(%rdi)
5325	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5326	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5327	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5328	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5329	vpxor	0+128(%rsi),%ymm3,%ymm3
5330	vpxor	32+128(%rsi),%ymm1,%ymm1
5331	vpxor	64+128(%rsi),%ymm5,%ymm5
5332	vpxor	96+128(%rsi),%ymm9,%ymm9
5333	vmovdqu	%ymm3,0+128(%rdi)
5334	vmovdqu	%ymm1,32+128(%rdi)
5335	vmovdqu	%ymm5,64+128(%rdi)
5336	vmovdqu	%ymm9,96+128(%rdi)
5337	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5338	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5339	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5340	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5341	vmovdqa	%ymm3,%ymm8
5342
5343	leaq	256(%rsi),%rsi
5344	leaq	256(%rdi),%rdi
5345	subq	$256,%rbx
5346	jmp	L$open_avx2_tail_128_xor
5347
5348L$open_avx2_tail_512:
5349	vmovdqa	L$chacha20_consts(%rip),%ymm0
5350	vmovdqa	0+64(%rbp),%ymm4
5351	vmovdqa	0+96(%rbp),%ymm8
5352	vmovdqa	%ymm0,%ymm1
5353	vmovdqa	%ymm4,%ymm5
5354	vmovdqa	%ymm8,%ymm9
5355	vmovdqa	%ymm0,%ymm2
5356	vmovdqa	%ymm4,%ymm6
5357	vmovdqa	%ymm8,%ymm10
5358	vmovdqa	%ymm0,%ymm3
5359	vmovdqa	%ymm4,%ymm7
5360	vmovdqa	%ymm8,%ymm11
5361	vmovdqa	L$avx2_inc(%rip),%ymm12
5362	vpaddd	0+160(%rbp),%ymm12,%ymm15
5363	vpaddd	%ymm15,%ymm12,%ymm14
5364	vpaddd	%ymm14,%ymm12,%ymm13
5365	vpaddd	%ymm13,%ymm12,%ymm12
5366	vmovdqa	%ymm15,0+256(%rbp)
5367	vmovdqa	%ymm14,0+224(%rbp)
5368	vmovdqa	%ymm13,0+192(%rbp)
5369	vmovdqa	%ymm12,0+160(%rbp)
5370
5371	xorq	%rcx,%rcx
5372	movq	%rsi,%r8
5373L$open_avx2_tail_512_rounds_and_x2hash:
5374	addq	0+0(%r8),%r10
5375	adcq	8+0(%r8),%r11
5376	adcq	$1,%r12
5377	movq	0+0+0(%rbp),%rax
5378	movq	%rax,%r15
5379	mulq	%r10
5380	movq	%rax,%r13
5381	movq	%rdx,%r14
5382	movq	0+0+0(%rbp),%rax
5383	mulq	%r11
5384	imulq	%r12,%r15
5385	addq	%rax,%r14
5386	adcq	%rdx,%r15
5387	movq	8+0+0(%rbp),%rax
5388	movq	%rax,%r9
5389	mulq	%r10
5390	addq	%rax,%r14
5391	adcq	$0,%rdx
5392	movq	%rdx,%r10
5393	movq	8+0+0(%rbp),%rax
5394	mulq	%r11
5395	addq	%rax,%r15
5396	adcq	$0,%rdx
5397	imulq	%r12,%r9
5398	addq	%r10,%r15
5399	adcq	%rdx,%r9
5400	movq	%r13,%r10
5401	movq	%r14,%r11
5402	movq	%r15,%r12
5403	andq	$3,%r12
5404	movq	%r15,%r13
5405	andq	$-4,%r13
5406	movq	%r9,%r14
5407	shrdq	$2,%r9,%r15
5408	shrq	$2,%r9
5409	addq	%r13,%r15
5410	adcq	%r14,%r9
5411	addq	%r15,%r10
5412	adcq	%r9,%r11
5413	adcq	$0,%r12
5414
5415	leaq	16(%r8),%r8
5416L$open_avx2_tail_512_rounds_and_x1hash:
5417	vmovdqa	%ymm8,0+128(%rbp)
5418	vmovdqa	L$rol16(%rip),%ymm8
5419	vpaddd	%ymm7,%ymm3,%ymm3
5420	vpaddd	%ymm6,%ymm2,%ymm2
5421	vpaddd	%ymm5,%ymm1,%ymm1
5422	vpaddd	%ymm4,%ymm0,%ymm0
5423	vpxor	%ymm3,%ymm15,%ymm15
5424	vpxor	%ymm2,%ymm14,%ymm14
5425	vpxor	%ymm1,%ymm13,%ymm13
5426	vpxor	%ymm0,%ymm12,%ymm12
5427	vpshufb	%ymm8,%ymm15,%ymm15
5428	vpshufb	%ymm8,%ymm14,%ymm14
5429	vpshufb	%ymm8,%ymm13,%ymm13
5430	vpshufb	%ymm8,%ymm12,%ymm12
5431	vpaddd	%ymm15,%ymm11,%ymm11
5432	vpaddd	%ymm14,%ymm10,%ymm10
5433	vpaddd	%ymm13,%ymm9,%ymm9
5434	vpaddd	0+128(%rbp),%ymm12,%ymm8
5435	vpxor	%ymm11,%ymm7,%ymm7
5436	vpxor	%ymm10,%ymm6,%ymm6
5437	vpxor	%ymm9,%ymm5,%ymm5
5438	vpxor	%ymm8,%ymm4,%ymm4
5439	vmovdqa	%ymm8,0+128(%rbp)
5440	vpsrld	$20,%ymm7,%ymm8
5441	vpslld	$32-20,%ymm7,%ymm7
5442	vpxor	%ymm8,%ymm7,%ymm7
5443	vpsrld	$20,%ymm6,%ymm8
5444	vpslld	$32-20,%ymm6,%ymm6
5445	vpxor	%ymm8,%ymm6,%ymm6
5446	vpsrld	$20,%ymm5,%ymm8
5447	vpslld	$32-20,%ymm5,%ymm5
5448	vpxor	%ymm8,%ymm5,%ymm5
5449	vpsrld	$20,%ymm4,%ymm8
5450	vpslld	$32-20,%ymm4,%ymm4
5451	vpxor	%ymm8,%ymm4,%ymm4
5452	vmovdqa	L$rol8(%rip),%ymm8
5453	vpaddd	%ymm7,%ymm3,%ymm3
5454	addq	0+0(%r8),%r10
5455	adcq	8+0(%r8),%r11
5456	adcq	$1,%r12
5457	movq	0+0+0(%rbp),%rdx
5458	movq	%rdx,%r15
5459	mulxq	%r10,%r13,%r14
5460	mulxq	%r11,%rax,%rdx
5461	imulq	%r12,%r15
5462	addq	%rax,%r14
5463	adcq	%rdx,%r15
5464	movq	8+0+0(%rbp),%rdx
5465	mulxq	%r10,%r10,%rax
5466	addq	%r10,%r14
5467	mulxq	%r11,%r11,%r9
5468	adcq	%r11,%r15
5469	adcq	$0,%r9
5470	imulq	%r12,%rdx
5471	addq	%rax,%r15
5472	adcq	%rdx,%r9
5473	movq	%r13,%r10
5474	movq	%r14,%r11
5475	movq	%r15,%r12
5476	andq	$3,%r12
5477	movq	%r15,%r13
5478	andq	$-4,%r13
5479	movq	%r9,%r14
5480	shrdq	$2,%r9,%r15
5481	shrq	$2,%r9
5482	addq	%r13,%r15
5483	adcq	%r14,%r9
5484	addq	%r15,%r10
5485	adcq	%r9,%r11
5486	adcq	$0,%r12
5487	vpaddd	%ymm6,%ymm2,%ymm2
5488	vpaddd	%ymm5,%ymm1,%ymm1
5489	vpaddd	%ymm4,%ymm0,%ymm0
5490	vpxor	%ymm3,%ymm15,%ymm15
5491	vpxor	%ymm2,%ymm14,%ymm14
5492	vpxor	%ymm1,%ymm13,%ymm13
5493	vpxor	%ymm0,%ymm12,%ymm12
5494	vpshufb	%ymm8,%ymm15,%ymm15
5495	vpshufb	%ymm8,%ymm14,%ymm14
5496	vpshufb	%ymm8,%ymm13,%ymm13
5497	vpshufb	%ymm8,%ymm12,%ymm12
5498	vpaddd	%ymm15,%ymm11,%ymm11
5499	vpaddd	%ymm14,%ymm10,%ymm10
5500	vpaddd	%ymm13,%ymm9,%ymm9
5501	vpaddd	0+128(%rbp),%ymm12,%ymm8
5502	vpxor	%ymm11,%ymm7,%ymm7
5503	vpxor	%ymm10,%ymm6,%ymm6
5504	vpxor	%ymm9,%ymm5,%ymm5
5505	vpxor	%ymm8,%ymm4,%ymm4
5506	vmovdqa	%ymm8,0+128(%rbp)
5507	vpsrld	$25,%ymm7,%ymm8
5508	vpslld	$32-25,%ymm7,%ymm7
5509	vpxor	%ymm8,%ymm7,%ymm7
5510	vpsrld	$25,%ymm6,%ymm8
5511	vpslld	$32-25,%ymm6,%ymm6
5512	vpxor	%ymm8,%ymm6,%ymm6
5513	vpsrld	$25,%ymm5,%ymm8
5514	vpslld	$32-25,%ymm5,%ymm5
5515	vpxor	%ymm8,%ymm5,%ymm5
5516	vpsrld	$25,%ymm4,%ymm8
5517	vpslld	$32-25,%ymm4,%ymm4
5518	vpxor	%ymm8,%ymm4,%ymm4
5519	vmovdqa	0+128(%rbp),%ymm8
5520	vpalignr	$4,%ymm7,%ymm7,%ymm7
5521	vpalignr	$8,%ymm11,%ymm11,%ymm11
5522	vpalignr	$12,%ymm15,%ymm15,%ymm15
5523	vpalignr	$4,%ymm6,%ymm6,%ymm6
5524	vpalignr	$8,%ymm10,%ymm10,%ymm10
5525	vpalignr	$12,%ymm14,%ymm14,%ymm14
5526	vpalignr	$4,%ymm5,%ymm5,%ymm5
5527	vpalignr	$8,%ymm9,%ymm9,%ymm9
5528	vpalignr	$12,%ymm13,%ymm13,%ymm13
5529	vpalignr	$4,%ymm4,%ymm4,%ymm4
5530	vpalignr	$8,%ymm8,%ymm8,%ymm8
5531	vpalignr	$12,%ymm12,%ymm12,%ymm12
5532	vmovdqa	%ymm8,0+128(%rbp)
5533	vmovdqa	L$rol16(%rip),%ymm8
5534	vpaddd	%ymm7,%ymm3,%ymm3
5535	addq	0+16(%r8),%r10
5536	adcq	8+16(%r8),%r11
5537	adcq	$1,%r12
5538	movq	0+0+0(%rbp),%rdx
5539	movq	%rdx,%r15
5540	mulxq	%r10,%r13,%r14
5541	mulxq	%r11,%rax,%rdx
5542	imulq	%r12,%r15
5543	addq	%rax,%r14
5544	adcq	%rdx,%r15
5545	movq	8+0+0(%rbp),%rdx
5546	mulxq	%r10,%r10,%rax
5547	addq	%r10,%r14
5548	mulxq	%r11,%r11,%r9
5549	adcq	%r11,%r15
5550	adcq	$0,%r9
5551	imulq	%r12,%rdx
5552	addq	%rax,%r15
5553	adcq	%rdx,%r9
5554	movq	%r13,%r10
5555	movq	%r14,%r11
5556	movq	%r15,%r12
5557	andq	$3,%r12
5558	movq	%r15,%r13
5559	andq	$-4,%r13
5560	movq	%r9,%r14
5561	shrdq	$2,%r9,%r15
5562	shrq	$2,%r9
5563	addq	%r13,%r15
5564	adcq	%r14,%r9
5565	addq	%r15,%r10
5566	adcq	%r9,%r11
5567	adcq	$0,%r12
5568
5569	leaq	32(%r8),%r8
5570	vpaddd	%ymm6,%ymm2,%ymm2
5571	vpaddd	%ymm5,%ymm1,%ymm1
5572	vpaddd	%ymm4,%ymm0,%ymm0
5573	vpxor	%ymm3,%ymm15,%ymm15
5574	vpxor	%ymm2,%ymm14,%ymm14
5575	vpxor	%ymm1,%ymm13,%ymm13
5576	vpxor	%ymm0,%ymm12,%ymm12
5577	vpshufb	%ymm8,%ymm15,%ymm15
5578	vpshufb	%ymm8,%ymm14,%ymm14
5579	vpshufb	%ymm8,%ymm13,%ymm13
5580	vpshufb	%ymm8,%ymm12,%ymm12
5581	vpaddd	%ymm15,%ymm11,%ymm11
5582	vpaddd	%ymm14,%ymm10,%ymm10
5583	vpaddd	%ymm13,%ymm9,%ymm9
5584	vpaddd	0+128(%rbp),%ymm12,%ymm8
5585	vpxor	%ymm11,%ymm7,%ymm7
5586	vpxor	%ymm10,%ymm6,%ymm6
5587	vpxor	%ymm9,%ymm5,%ymm5
5588	vpxor	%ymm8,%ymm4,%ymm4
5589	vmovdqa	%ymm8,0+128(%rbp)
5590	vpsrld	$20,%ymm7,%ymm8
5591	vpslld	$32-20,%ymm7,%ymm7
5592	vpxor	%ymm8,%ymm7,%ymm7
5593	vpsrld	$20,%ymm6,%ymm8
5594	vpslld	$32-20,%ymm6,%ymm6
5595	vpxor	%ymm8,%ymm6,%ymm6
5596	vpsrld	$20,%ymm5,%ymm8
5597	vpslld	$32-20,%ymm5,%ymm5
5598	vpxor	%ymm8,%ymm5,%ymm5
5599	vpsrld	$20,%ymm4,%ymm8
5600	vpslld	$32-20,%ymm4,%ymm4
5601	vpxor	%ymm8,%ymm4,%ymm4
5602	vmovdqa	L$rol8(%rip),%ymm8
5603	vpaddd	%ymm7,%ymm3,%ymm3
5604	vpaddd	%ymm6,%ymm2,%ymm2
5605	vpaddd	%ymm5,%ymm1,%ymm1
5606	vpaddd	%ymm4,%ymm0,%ymm0
5607	vpxor	%ymm3,%ymm15,%ymm15
5608	vpxor	%ymm2,%ymm14,%ymm14
5609	vpxor	%ymm1,%ymm13,%ymm13
5610	vpxor	%ymm0,%ymm12,%ymm12
5611	vpshufb	%ymm8,%ymm15,%ymm15
5612	vpshufb	%ymm8,%ymm14,%ymm14
5613	vpshufb	%ymm8,%ymm13,%ymm13
5614	vpshufb	%ymm8,%ymm12,%ymm12
5615	vpaddd	%ymm15,%ymm11,%ymm11
5616	vpaddd	%ymm14,%ymm10,%ymm10
5617	vpaddd	%ymm13,%ymm9,%ymm9
5618	vpaddd	0+128(%rbp),%ymm12,%ymm8
5619	vpxor	%ymm11,%ymm7,%ymm7
5620	vpxor	%ymm10,%ymm6,%ymm6
5621	vpxor	%ymm9,%ymm5,%ymm5
5622	vpxor	%ymm8,%ymm4,%ymm4
5623	vmovdqa	%ymm8,0+128(%rbp)
5624	vpsrld	$25,%ymm7,%ymm8
5625	vpslld	$32-25,%ymm7,%ymm7
5626	vpxor	%ymm8,%ymm7,%ymm7
5627	vpsrld	$25,%ymm6,%ymm8
5628	vpslld	$32-25,%ymm6,%ymm6
5629	vpxor	%ymm8,%ymm6,%ymm6
5630	vpsrld	$25,%ymm5,%ymm8
5631	vpslld	$32-25,%ymm5,%ymm5
5632	vpxor	%ymm8,%ymm5,%ymm5
5633	vpsrld	$25,%ymm4,%ymm8
5634	vpslld	$32-25,%ymm4,%ymm4
5635	vpxor	%ymm8,%ymm4,%ymm4
5636	vmovdqa	0+128(%rbp),%ymm8
5637	vpalignr	$12,%ymm7,%ymm7,%ymm7
5638	vpalignr	$8,%ymm11,%ymm11,%ymm11
5639	vpalignr	$4,%ymm15,%ymm15,%ymm15
5640	vpalignr	$12,%ymm6,%ymm6,%ymm6
5641	vpalignr	$8,%ymm10,%ymm10,%ymm10
5642	vpalignr	$4,%ymm14,%ymm14,%ymm14
5643	vpalignr	$12,%ymm5,%ymm5,%ymm5
5644	vpalignr	$8,%ymm9,%ymm9,%ymm9
5645	vpalignr	$4,%ymm13,%ymm13,%ymm13
5646	vpalignr	$12,%ymm4,%ymm4,%ymm4
5647	vpalignr	$8,%ymm8,%ymm8,%ymm8
5648	vpalignr	$4,%ymm12,%ymm12,%ymm12
5649
5650	incq	%rcx
5651	cmpq	$4,%rcx
5652	jl	L$open_avx2_tail_512_rounds_and_x2hash
5653	cmpq	$10,%rcx
5654	jne	L$open_avx2_tail_512_rounds_and_x1hash
5655	movq	%rbx,%rcx
5656	subq	$384,%rcx
5657	andq	$-16,%rcx
5658L$open_avx2_tail_512_hash:
5659	testq	%rcx,%rcx
5660	je	L$open_avx2_tail_512_done
5661	addq	0+0(%r8),%r10
5662	adcq	8+0(%r8),%r11
5663	adcq	$1,%r12
5664	movq	0+0+0(%rbp),%rdx
5665	movq	%rdx,%r15
5666	mulxq	%r10,%r13,%r14
5667	mulxq	%r11,%rax,%rdx
5668	imulq	%r12,%r15
5669	addq	%rax,%r14
5670	adcq	%rdx,%r15
5671	movq	8+0+0(%rbp),%rdx
5672	mulxq	%r10,%r10,%rax
5673	addq	%r10,%r14
5674	mulxq	%r11,%r11,%r9
5675	adcq	%r11,%r15
5676	adcq	$0,%r9
5677	imulq	%r12,%rdx
5678	addq	%rax,%r15
5679	adcq	%rdx,%r9
5680	movq	%r13,%r10
5681	movq	%r14,%r11
5682	movq	%r15,%r12
5683	andq	$3,%r12
5684	movq	%r15,%r13
5685	andq	$-4,%r13
5686	movq	%r9,%r14
5687	shrdq	$2,%r9,%r15
5688	shrq	$2,%r9
5689	addq	%r13,%r15
5690	adcq	%r14,%r9
5691	addq	%r15,%r10
5692	adcq	%r9,%r11
5693	adcq	$0,%r12
5694
5695	leaq	16(%r8),%r8
5696	subq	$16,%rcx
5697	jmp	L$open_avx2_tail_512_hash
5698L$open_avx2_tail_512_done:
5699	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
5700	vpaddd	0+64(%rbp),%ymm7,%ymm7
5701	vpaddd	0+96(%rbp),%ymm11,%ymm11
5702	vpaddd	0+256(%rbp),%ymm15,%ymm15
5703	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
5704	vpaddd	0+64(%rbp),%ymm6,%ymm6
5705	vpaddd	0+96(%rbp),%ymm10,%ymm10
5706	vpaddd	0+224(%rbp),%ymm14,%ymm14
5707	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
5708	vpaddd	0+64(%rbp),%ymm5,%ymm5
5709	vpaddd	0+96(%rbp),%ymm9,%ymm9
5710	vpaddd	0+192(%rbp),%ymm13,%ymm13
5711	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
5712	vpaddd	0+64(%rbp),%ymm4,%ymm4
5713	vpaddd	0+96(%rbp),%ymm8,%ymm8
5714	vpaddd	0+160(%rbp),%ymm12,%ymm12
5715
5716	vmovdqa	%ymm0,0+128(%rbp)
5717	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
5718	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
5719	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
5720	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
5721	vpxor	0+0(%rsi),%ymm0,%ymm0
5722	vpxor	32+0(%rsi),%ymm3,%ymm3
5723	vpxor	64+0(%rsi),%ymm7,%ymm7
5724	vpxor	96+0(%rsi),%ymm11,%ymm11
5725	vmovdqu	%ymm0,0+0(%rdi)
5726	vmovdqu	%ymm3,32+0(%rdi)
5727	vmovdqu	%ymm7,64+0(%rdi)
5728	vmovdqu	%ymm11,96+0(%rdi)
5729
5730	vmovdqa	0+128(%rbp),%ymm0
5731	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5732	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5733	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5734	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5735	vpxor	0+128(%rsi),%ymm3,%ymm3
5736	vpxor	32+128(%rsi),%ymm2,%ymm2
5737	vpxor	64+128(%rsi),%ymm6,%ymm6
5738	vpxor	96+128(%rsi),%ymm10,%ymm10
5739	vmovdqu	%ymm3,0+128(%rdi)
5740	vmovdqu	%ymm2,32+128(%rdi)
5741	vmovdqu	%ymm6,64+128(%rdi)
5742	vmovdqu	%ymm10,96+128(%rdi)
5743	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5744	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5745	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5746	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5747	vpxor	0+256(%rsi),%ymm3,%ymm3
5748	vpxor	32+256(%rsi),%ymm1,%ymm1
5749	vpxor	64+256(%rsi),%ymm5,%ymm5
5750	vpxor	96+256(%rsi),%ymm9,%ymm9
5751	vmovdqu	%ymm3,0+256(%rdi)
5752	vmovdqu	%ymm1,32+256(%rdi)
5753	vmovdqu	%ymm5,64+256(%rdi)
5754	vmovdqu	%ymm9,96+256(%rdi)
5755	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5756	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5757	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5758	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5759	vmovdqa	%ymm3,%ymm8
5760
5761	leaq	384(%rsi),%rsi
5762	leaq	384(%rdi),%rdi
5763	subq	$384,%rbx
5764L$open_avx2_tail_128_xor:
5765	cmpq	$32,%rbx
5766	jb	L$open_avx2_tail_32_xor
5767	subq	$32,%rbx
5768	vpxor	(%rsi),%ymm0,%ymm0
5769	vmovdqu	%ymm0,(%rdi)
5770	leaq	32(%rsi),%rsi
5771	leaq	32(%rdi),%rdi
5772	vmovdqa	%ymm4,%ymm0
5773	vmovdqa	%ymm8,%ymm4
5774	vmovdqa	%ymm12,%ymm8
5775	jmp	L$open_avx2_tail_128_xor
5776L$open_avx2_tail_32_xor:
5777	cmpq	$16,%rbx
5778	vmovdqa	%xmm0,%xmm1
5779	jb	L$open_avx2_exit
5780	subq	$16,%rbx
5781
5782	vpxor	(%rsi),%xmm0,%xmm1
5783	vmovdqu	%xmm1,(%rdi)
5784	leaq	16(%rsi),%rsi
5785	leaq	16(%rdi),%rdi
5786	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
5787	vmovdqa	%xmm0,%xmm1
5788L$open_avx2_exit:
5789	vzeroupper
5790	jmp	L$open_sse_tail_16
5791
5792L$open_avx2_192:
5793	vmovdqa	%ymm0,%ymm1
5794	vmovdqa	%ymm0,%ymm2
5795	vmovdqa	%ymm4,%ymm5
5796	vmovdqa	%ymm4,%ymm6
5797	vmovdqa	%ymm8,%ymm9
5798	vmovdqa	%ymm8,%ymm10
5799	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
5800	vmovdqa	%ymm12,%ymm11
5801	vmovdqa	%ymm13,%ymm15
5802	movq	$10,%r10
5803L$open_avx2_192_rounds:
5804	vpaddd	%ymm4,%ymm0,%ymm0
5805	vpxor	%ymm0,%ymm12,%ymm12
5806	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5807	vpaddd	%ymm12,%ymm8,%ymm8
5808	vpxor	%ymm8,%ymm4,%ymm4
5809	vpsrld	$20,%ymm4,%ymm3
5810	vpslld	$12,%ymm4,%ymm4
5811	vpxor	%ymm3,%ymm4,%ymm4
5812	vpaddd	%ymm4,%ymm0,%ymm0
5813	vpxor	%ymm0,%ymm12,%ymm12
5814	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5815	vpaddd	%ymm12,%ymm8,%ymm8
5816	vpxor	%ymm8,%ymm4,%ymm4
5817	vpslld	$7,%ymm4,%ymm3
5818	vpsrld	$25,%ymm4,%ymm4
5819	vpxor	%ymm3,%ymm4,%ymm4
5820	vpalignr	$12,%ymm12,%ymm12,%ymm12
5821	vpalignr	$8,%ymm8,%ymm8,%ymm8
5822	vpalignr	$4,%ymm4,%ymm4,%ymm4
5823	vpaddd	%ymm5,%ymm1,%ymm1
5824	vpxor	%ymm1,%ymm13,%ymm13
5825	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5826	vpaddd	%ymm13,%ymm9,%ymm9
5827	vpxor	%ymm9,%ymm5,%ymm5
5828	vpsrld	$20,%ymm5,%ymm3
5829	vpslld	$12,%ymm5,%ymm5
5830	vpxor	%ymm3,%ymm5,%ymm5
5831	vpaddd	%ymm5,%ymm1,%ymm1
5832	vpxor	%ymm1,%ymm13,%ymm13
5833	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5834	vpaddd	%ymm13,%ymm9,%ymm9
5835	vpxor	%ymm9,%ymm5,%ymm5
5836	vpslld	$7,%ymm5,%ymm3
5837	vpsrld	$25,%ymm5,%ymm5
5838	vpxor	%ymm3,%ymm5,%ymm5
5839	vpalignr	$12,%ymm13,%ymm13,%ymm13
5840	vpalignr	$8,%ymm9,%ymm9,%ymm9
5841	vpalignr	$4,%ymm5,%ymm5,%ymm5
5842	vpaddd	%ymm4,%ymm0,%ymm0
5843	vpxor	%ymm0,%ymm12,%ymm12
5844	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5845	vpaddd	%ymm12,%ymm8,%ymm8
5846	vpxor	%ymm8,%ymm4,%ymm4
5847	vpsrld	$20,%ymm4,%ymm3
5848	vpslld	$12,%ymm4,%ymm4
5849	vpxor	%ymm3,%ymm4,%ymm4
5850	vpaddd	%ymm4,%ymm0,%ymm0
5851	vpxor	%ymm0,%ymm12,%ymm12
5852	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5853	vpaddd	%ymm12,%ymm8,%ymm8
5854	vpxor	%ymm8,%ymm4,%ymm4
5855	vpslld	$7,%ymm4,%ymm3
5856	vpsrld	$25,%ymm4,%ymm4
5857	vpxor	%ymm3,%ymm4,%ymm4
5858	vpalignr	$4,%ymm12,%ymm12,%ymm12
5859	vpalignr	$8,%ymm8,%ymm8,%ymm8
5860	vpalignr	$12,%ymm4,%ymm4,%ymm4
5861	vpaddd	%ymm5,%ymm1,%ymm1
5862	vpxor	%ymm1,%ymm13,%ymm13
5863	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5864	vpaddd	%ymm13,%ymm9,%ymm9
5865	vpxor	%ymm9,%ymm5,%ymm5
5866	vpsrld	$20,%ymm5,%ymm3
5867	vpslld	$12,%ymm5,%ymm5
5868	vpxor	%ymm3,%ymm5,%ymm5
5869	vpaddd	%ymm5,%ymm1,%ymm1
5870	vpxor	%ymm1,%ymm13,%ymm13
5871	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5872	vpaddd	%ymm13,%ymm9,%ymm9
5873	vpxor	%ymm9,%ymm5,%ymm5
5874	vpslld	$7,%ymm5,%ymm3
5875	vpsrld	$25,%ymm5,%ymm5
5876	vpxor	%ymm3,%ymm5,%ymm5
5877	vpalignr	$4,%ymm13,%ymm13,%ymm13
5878	vpalignr	$8,%ymm9,%ymm9,%ymm9
5879	vpalignr	$12,%ymm5,%ymm5,%ymm5
5880
5881	decq	%r10
5882	jne	L$open_avx2_192_rounds
5883	vpaddd	%ymm2,%ymm0,%ymm0
5884	vpaddd	%ymm2,%ymm1,%ymm1
5885	vpaddd	%ymm6,%ymm4,%ymm4
5886	vpaddd	%ymm6,%ymm5,%ymm5
5887	vpaddd	%ymm10,%ymm8,%ymm8
5888	vpaddd	%ymm10,%ymm9,%ymm9
5889	vpaddd	%ymm11,%ymm12,%ymm12
5890	vpaddd	%ymm15,%ymm13,%ymm13
5891	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
5892
5893	vpand	L$clamp(%rip),%ymm3,%ymm3
5894	vmovdqa	%ymm3,0+0(%rbp)
5895
5896	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
5897	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
5898	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
5899	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
5900	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
5901	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
5902L$open_avx2_short:
5903	movq	%r8,%r8
5904	call	poly_hash_ad_internal
5905L$open_avx2_short_hash_and_xor_loop:
5906	cmpq	$32,%rbx
5907	jb	L$open_avx2_short_tail_32
5908	subq	$32,%rbx
5909	addq	0+0(%rsi),%r10
5910	adcq	8+0(%rsi),%r11
5911	adcq	$1,%r12
5912	movq	0+0+0(%rbp),%rax
5913	movq	%rax,%r15
5914	mulq	%r10
5915	movq	%rax,%r13
5916	movq	%rdx,%r14
5917	movq	0+0+0(%rbp),%rax
5918	mulq	%r11
5919	imulq	%r12,%r15
5920	addq	%rax,%r14
5921	adcq	%rdx,%r15
5922	movq	8+0+0(%rbp),%rax
5923	movq	%rax,%r9
5924	mulq	%r10
5925	addq	%rax,%r14
5926	adcq	$0,%rdx
5927	movq	%rdx,%r10
5928	movq	8+0+0(%rbp),%rax
5929	mulq	%r11
5930	addq	%rax,%r15
5931	adcq	$0,%rdx
5932	imulq	%r12,%r9
5933	addq	%r10,%r15
5934	adcq	%rdx,%r9
5935	movq	%r13,%r10
5936	movq	%r14,%r11
5937	movq	%r15,%r12
5938	andq	$3,%r12
5939	movq	%r15,%r13
5940	andq	$-4,%r13
5941	movq	%r9,%r14
5942	shrdq	$2,%r9,%r15
5943	shrq	$2,%r9
5944	addq	%r13,%r15
5945	adcq	%r14,%r9
5946	addq	%r15,%r10
5947	adcq	%r9,%r11
5948	adcq	$0,%r12
5949	addq	0+16(%rsi),%r10
5950	adcq	8+16(%rsi),%r11
5951	adcq	$1,%r12
5952	movq	0+0+0(%rbp),%rax
5953	movq	%rax,%r15
5954	mulq	%r10
5955	movq	%rax,%r13
5956	movq	%rdx,%r14
5957	movq	0+0+0(%rbp),%rax
5958	mulq	%r11
5959	imulq	%r12,%r15
5960	addq	%rax,%r14
5961	adcq	%rdx,%r15
5962	movq	8+0+0(%rbp),%rax
5963	movq	%rax,%r9
5964	mulq	%r10
5965	addq	%rax,%r14
5966	adcq	$0,%rdx
5967	movq	%rdx,%r10
5968	movq	8+0+0(%rbp),%rax
5969	mulq	%r11
5970	addq	%rax,%r15
5971	adcq	$0,%rdx
5972	imulq	%r12,%r9
5973	addq	%r10,%r15
5974	adcq	%rdx,%r9
5975	movq	%r13,%r10
5976	movq	%r14,%r11
5977	movq	%r15,%r12
5978	andq	$3,%r12
5979	movq	%r15,%r13
5980	andq	$-4,%r13
5981	movq	%r9,%r14
5982	shrdq	$2,%r9,%r15
5983	shrq	$2,%r9
5984	addq	%r13,%r15
5985	adcq	%r14,%r9
5986	addq	%r15,%r10
5987	adcq	%r9,%r11
5988	adcq	$0,%r12
5989
5990
5991	vpxor	(%rsi),%ymm0,%ymm0
5992	vmovdqu	%ymm0,(%rdi)
5993	leaq	32(%rsi),%rsi
5994	leaq	32(%rdi),%rdi
5995
5996	vmovdqa	%ymm4,%ymm0
5997	vmovdqa	%ymm8,%ymm4
5998	vmovdqa	%ymm12,%ymm8
5999	vmovdqa	%ymm1,%ymm12
6000	vmovdqa	%ymm5,%ymm1
6001	vmovdqa	%ymm9,%ymm5
6002	vmovdqa	%ymm13,%ymm9
6003	vmovdqa	%ymm2,%ymm13
6004	vmovdqa	%ymm6,%ymm2
6005	jmp	L$open_avx2_short_hash_and_xor_loop
6006L$open_avx2_short_tail_32:
6007	cmpq	$16,%rbx
6008	vmovdqa	%xmm0,%xmm1
6009	jb	L$open_avx2_short_tail_32_exit
6010	subq	$16,%rbx
6011	addq	0+0(%rsi),%r10
6012	adcq	8+0(%rsi),%r11
6013	adcq	$1,%r12
6014	movq	0+0+0(%rbp),%rax
6015	movq	%rax,%r15
6016	mulq	%r10
6017	movq	%rax,%r13
6018	movq	%rdx,%r14
6019	movq	0+0+0(%rbp),%rax
6020	mulq	%r11
6021	imulq	%r12,%r15
6022	addq	%rax,%r14
6023	adcq	%rdx,%r15
6024	movq	8+0+0(%rbp),%rax
6025	movq	%rax,%r9
6026	mulq	%r10
6027	addq	%rax,%r14
6028	adcq	$0,%rdx
6029	movq	%rdx,%r10
6030	movq	8+0+0(%rbp),%rax
6031	mulq	%r11
6032	addq	%rax,%r15
6033	adcq	$0,%rdx
6034	imulq	%r12,%r9
6035	addq	%r10,%r15
6036	adcq	%rdx,%r9
6037	movq	%r13,%r10
6038	movq	%r14,%r11
6039	movq	%r15,%r12
6040	andq	$3,%r12
6041	movq	%r15,%r13
6042	andq	$-4,%r13
6043	movq	%r9,%r14
6044	shrdq	$2,%r9,%r15
6045	shrq	$2,%r9
6046	addq	%r13,%r15
6047	adcq	%r14,%r9
6048	addq	%r15,%r10
6049	adcq	%r9,%r11
6050	adcq	$0,%r12
6051
6052	vpxor	(%rsi),%xmm0,%xmm3
6053	vmovdqu	%xmm3,(%rdi)
6054	leaq	16(%rsi),%rsi
6055	leaq	16(%rdi),%rdi
6056	vextracti128	$1,%ymm0,%xmm1
6057L$open_avx2_short_tail_32_exit:
6058	vzeroupper
6059	jmp	L$open_sse_tail_16
6060
6061L$open_avx2_320:
6062	vmovdqa	%ymm0,%ymm1
6063	vmovdqa	%ymm0,%ymm2
6064	vmovdqa	%ymm4,%ymm5
6065	vmovdqa	%ymm4,%ymm6
6066	vmovdqa	%ymm8,%ymm9
6067	vmovdqa	%ymm8,%ymm10
6068	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
6069	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm14
6070	vmovdqa	%ymm4,%ymm7
6071	vmovdqa	%ymm8,%ymm11
6072	vmovdqa	%ymm12,0+160(%rbp)
6073	vmovdqa	%ymm13,0+192(%rbp)
6074	vmovdqa	%ymm14,0+224(%rbp)
6075	movq	$10,%r10
6076L$open_avx2_320_rounds:
6077	vpaddd	%ymm4,%ymm0,%ymm0
6078	vpxor	%ymm0,%ymm12,%ymm12
6079	vpshufb	L$rol16(%rip),%ymm12,%ymm12
6080	vpaddd	%ymm12,%ymm8,%ymm8
6081	vpxor	%ymm8,%ymm4,%ymm4
6082	vpsrld	$20,%ymm4,%ymm3
6083	vpslld	$12,%ymm4,%ymm4
6084	vpxor	%ymm3,%ymm4,%ymm4
6085	vpaddd	%ymm4,%ymm0,%ymm0
6086	vpxor	%ymm0,%ymm12,%ymm12
6087	vpshufb	L$rol8(%rip),%ymm12,%ymm12
6088	vpaddd	%ymm12,%ymm8,%ymm8
6089	vpxor	%ymm8,%ymm4,%ymm4
6090	vpslld	$7,%ymm4,%ymm3
6091	vpsrld	$25,%ymm4,%ymm4
6092	vpxor	%ymm3,%ymm4,%ymm4
6093	vpalignr	$12,%ymm12,%ymm12,%ymm12
6094	vpalignr	$8,%ymm8,%ymm8,%ymm8
6095	vpalignr	$4,%ymm4,%ymm4,%ymm4
6096	vpaddd	%ymm5,%ymm1,%ymm1
6097	vpxor	%ymm1,%ymm13,%ymm13
6098	vpshufb	L$rol16(%rip),%ymm13,%ymm13
6099	vpaddd	%ymm13,%ymm9,%ymm9
6100	vpxor	%ymm9,%ymm5,%ymm5
6101	vpsrld	$20,%ymm5,%ymm3
6102	vpslld	$12,%ymm5,%ymm5
6103	vpxor	%ymm3,%ymm5,%ymm5
6104	vpaddd	%ymm5,%ymm1,%ymm1
6105	vpxor	%ymm1,%ymm13,%ymm13
6106	vpshufb	L$rol8(%rip),%ymm13,%ymm13
6107	vpaddd	%ymm13,%ymm9,%ymm9
6108	vpxor	%ymm9,%ymm5,%ymm5
6109	vpslld	$7,%ymm5,%ymm3
6110	vpsrld	$25,%ymm5,%ymm5
6111	vpxor	%ymm3,%ymm5,%ymm5
6112	vpalignr	$12,%ymm13,%ymm13,%ymm13
6113	vpalignr	$8,%ymm9,%ymm9,%ymm9
6114	vpalignr	$4,%ymm5,%ymm5,%ymm5
6115	vpaddd	%ymm6,%ymm2,%ymm2
6116	vpxor	%ymm2,%ymm14,%ymm14
6117	vpshufb	L$rol16(%rip),%ymm14,%ymm14
6118	vpaddd	%ymm14,%ymm10,%ymm10
6119	vpxor	%ymm10,%ymm6,%ymm6
6120	vpsrld	$20,%ymm6,%ymm3
6121	vpslld	$12,%ymm6,%ymm6
6122	vpxor	%ymm3,%ymm6,%ymm6
6123	vpaddd	%ymm6,%ymm2,%ymm2
6124	vpxor	%ymm2,%ymm14,%ymm14
6125	vpshufb	L$rol8(%rip),%ymm14,%ymm14
6126	vpaddd	%ymm14,%ymm10,%ymm10
6127	vpxor	%ymm10,%ymm6,%ymm6
6128	vpslld	$7,%ymm6,%ymm3
6129	vpsrld	$25,%ymm6,%ymm6
6130	vpxor	%ymm3,%ymm6,%ymm6
6131	vpalignr	$12,%ymm14,%ymm14,%ymm14
6132	vpalignr	$8,%ymm10,%ymm10,%ymm10
6133	vpalignr	$4,%ymm6,%ymm6,%ymm6
6134	vpaddd	%ymm4,%ymm0,%ymm0
6135	vpxor	%ymm0,%ymm12,%ymm12
6136	vpshufb	L$rol16(%rip),%ymm12,%ymm12
6137	vpaddd	%ymm12,%ymm8,%ymm8
6138	vpxor	%ymm8,%ymm4,%ymm4
6139	vpsrld	$20,%ymm4,%ymm3
6140	vpslld	$12,%ymm4,%ymm4
6141	vpxor	%ymm3,%ymm4,%ymm4
6142	vpaddd	%ymm4,%ymm0,%ymm0
6143	vpxor	%ymm0,%ymm12,%ymm12
6144	vpshufb	L$rol8(%rip),%ymm12,%ymm12
6145	vpaddd	%ymm12,%ymm8,%ymm8
6146	vpxor	%ymm8,%ymm4,%ymm4
6147	vpslld	$7,%ymm4,%ymm3
6148	vpsrld	$25,%ymm4,%ymm4
6149	vpxor	%ymm3,%ymm4,%ymm4
6150	vpalignr	$4,%ymm12,%ymm12,%ymm12
6151	vpalignr	$8,%ymm8,%ymm8,%ymm8
6152	vpalignr	$12,%ymm4,%ymm4,%ymm4
6153	vpaddd	%ymm5,%ymm1,%ymm1
6154	vpxor	%ymm1,%ymm13,%ymm13
6155	vpshufb	L$rol16(%rip),%ymm13,%ymm13
6156	vpaddd	%ymm13,%ymm9,%ymm9
6157	vpxor	%ymm9,%ymm5,%ymm5
6158	vpsrld	$20,%ymm5,%ymm3
6159	vpslld	$12,%ymm5,%ymm5
6160	vpxor	%ymm3,%ymm5,%ymm5
6161	vpaddd	%ymm5,%ymm1,%ymm1
6162	vpxor	%ymm1,%ymm13,%ymm13
6163	vpshufb	L$rol8(%rip),%ymm13,%ymm13
6164	vpaddd	%ymm13,%ymm9,%ymm9
6165	vpxor	%ymm9,%ymm5,%ymm5
6166	vpslld	$7,%ymm5,%ymm3
6167	vpsrld	$25,%ymm5,%ymm5
6168	vpxor	%ymm3,%ymm5,%ymm5
6169	vpalignr	$4,%ymm13,%ymm13,%ymm13
6170	vpalignr	$8,%ymm9,%ymm9,%ymm9
6171	vpalignr	$12,%ymm5,%ymm5,%ymm5
6172	vpaddd	%ymm6,%ymm2,%ymm2
6173	vpxor	%ymm2,%ymm14,%ymm14
6174	vpshufb	L$rol16(%rip),%ymm14,%ymm14
6175	vpaddd	%ymm14,%ymm10,%ymm10
6176	vpxor	%ymm10,%ymm6,%ymm6
6177	vpsrld	$20,%ymm6,%ymm3
6178	vpslld	$12,%ymm6,%ymm6
6179	vpxor	%ymm3,%ymm6,%ymm6
6180	vpaddd	%ymm6,%ymm2,%ymm2
6181	vpxor	%ymm2,%ymm14,%ymm14
6182	vpshufb	L$rol8(%rip),%ymm14,%ymm14
6183	vpaddd	%ymm14,%ymm10,%ymm10
6184	vpxor	%ymm10,%ymm6,%ymm6
6185	vpslld	$7,%ymm6,%ymm3
6186	vpsrld	$25,%ymm6,%ymm6
6187	vpxor	%ymm3,%ymm6,%ymm6
6188	vpalignr	$4,%ymm14,%ymm14,%ymm14
6189	vpalignr	$8,%ymm10,%ymm10,%ymm10
6190	vpalignr	$12,%ymm6,%ymm6,%ymm6
6191
6192	decq	%r10
6193	jne	L$open_avx2_320_rounds
6194	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
6195	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
6196	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
6197	vpaddd	%ymm7,%ymm4,%ymm4
6198	vpaddd	%ymm7,%ymm5,%ymm5
6199	vpaddd	%ymm7,%ymm6,%ymm6
6200	vpaddd	%ymm11,%ymm8,%ymm8
6201	vpaddd	%ymm11,%ymm9,%ymm9
6202	vpaddd	%ymm11,%ymm10,%ymm10
6203	vpaddd	0+160(%rbp),%ymm12,%ymm12
6204	vpaddd	0+192(%rbp),%ymm13,%ymm13
6205	vpaddd	0+224(%rbp),%ymm14,%ymm14
6206	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
6207
6208	vpand	L$clamp(%rip),%ymm3,%ymm3
6209	vmovdqa	%ymm3,0+0(%rbp)
6210
6211	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
6212	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
6213	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
6214	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
6215	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
6216	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
6217	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
6218	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
6219	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
6220	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
6221	jmp	L$open_avx2_short
6222
6223
6224
6225
6226
6227.p2align	6
6228chacha20_poly1305_seal_avx2:
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241	vzeroupper
6242	vmovdqa	L$chacha20_consts(%rip),%ymm0
6243	vbroadcasti128	0(%r9),%ymm4
6244	vbroadcasti128	16(%r9),%ymm8
6245	vbroadcasti128	32(%r9),%ymm12
6246	vpaddd	L$avx2_init(%rip),%ymm12,%ymm12
6247	cmpq	$192,%rbx
6248	jbe	L$seal_avx2_192
6249	cmpq	$320,%rbx
6250	jbe	L$seal_avx2_320
6251	vmovdqa	%ymm0,%ymm1
6252	vmovdqa	%ymm0,%ymm2
6253	vmovdqa	%ymm0,%ymm3
6254	vmovdqa	%ymm4,%ymm5
6255	vmovdqa	%ymm4,%ymm6
6256	vmovdqa	%ymm4,%ymm7
6257	vmovdqa	%ymm4,0+64(%rbp)
6258	vmovdqa	%ymm8,%ymm9
6259	vmovdqa	%ymm8,%ymm10
6260	vmovdqa	%ymm8,%ymm11
6261	vmovdqa	%ymm8,0+96(%rbp)
6262	vmovdqa	%ymm12,%ymm15
6263	vpaddd	L$avx2_inc(%rip),%ymm15,%ymm14
6264	vpaddd	L$avx2_inc(%rip),%ymm14,%ymm13
6265	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm12
6266	vmovdqa	%ymm12,0+160(%rbp)
6267	vmovdqa	%ymm13,0+192(%rbp)
6268	vmovdqa	%ymm14,0+224(%rbp)
6269	vmovdqa	%ymm15,0+256(%rbp)
6270	movq	$10,%r10
6271L$seal_avx2_init_rounds:
6272	vmovdqa	%ymm8,0+128(%rbp)
6273	vmovdqa	L$rol16(%rip),%ymm8
6274	vpaddd	%ymm7,%ymm3,%ymm3
6275	vpaddd	%ymm6,%ymm2,%ymm2
6276	vpaddd	%ymm5,%ymm1,%ymm1
6277	vpaddd	%ymm4,%ymm0,%ymm0
6278	vpxor	%ymm3,%ymm15,%ymm15
6279	vpxor	%ymm2,%ymm14,%ymm14
6280	vpxor	%ymm1,%ymm13,%ymm13
6281	vpxor	%ymm0,%ymm12,%ymm12
6282	vpshufb	%ymm8,%ymm15,%ymm15
6283	vpshufb	%ymm8,%ymm14,%ymm14
6284	vpshufb	%ymm8,%ymm13,%ymm13
6285	vpshufb	%ymm8,%ymm12,%ymm12
6286	vpaddd	%ymm15,%ymm11,%ymm11
6287	vpaddd	%ymm14,%ymm10,%ymm10
6288	vpaddd	%ymm13,%ymm9,%ymm9
6289	vpaddd	0+128(%rbp),%ymm12,%ymm8
6290	vpxor	%ymm11,%ymm7,%ymm7
6291	vpxor	%ymm10,%ymm6,%ymm6
6292	vpxor	%ymm9,%ymm5,%ymm5
6293	vpxor	%ymm8,%ymm4,%ymm4
6294	vmovdqa	%ymm8,0+128(%rbp)
6295	vpsrld	$20,%ymm7,%ymm8
6296	vpslld	$32-20,%ymm7,%ymm7
6297	vpxor	%ymm8,%ymm7,%ymm7
6298	vpsrld	$20,%ymm6,%ymm8
6299	vpslld	$32-20,%ymm6,%ymm6
6300	vpxor	%ymm8,%ymm6,%ymm6
6301	vpsrld	$20,%ymm5,%ymm8
6302	vpslld	$32-20,%ymm5,%ymm5
6303	vpxor	%ymm8,%ymm5,%ymm5
6304	vpsrld	$20,%ymm4,%ymm8
6305	vpslld	$32-20,%ymm4,%ymm4
6306	vpxor	%ymm8,%ymm4,%ymm4
6307	vmovdqa	L$rol8(%rip),%ymm8
6308	vpaddd	%ymm7,%ymm3,%ymm3
6309	vpaddd	%ymm6,%ymm2,%ymm2
6310	vpaddd	%ymm5,%ymm1,%ymm1
6311	vpaddd	%ymm4,%ymm0,%ymm0
6312	vpxor	%ymm3,%ymm15,%ymm15
6313	vpxor	%ymm2,%ymm14,%ymm14
6314	vpxor	%ymm1,%ymm13,%ymm13
6315	vpxor	%ymm0,%ymm12,%ymm12
6316	vpshufb	%ymm8,%ymm15,%ymm15
6317	vpshufb	%ymm8,%ymm14,%ymm14
6318	vpshufb	%ymm8,%ymm13,%ymm13
6319	vpshufb	%ymm8,%ymm12,%ymm12
6320	vpaddd	%ymm15,%ymm11,%ymm11
6321	vpaddd	%ymm14,%ymm10,%ymm10
6322	vpaddd	%ymm13,%ymm9,%ymm9
6323	vpaddd	0+128(%rbp),%ymm12,%ymm8
6324	vpxor	%ymm11,%ymm7,%ymm7
6325	vpxor	%ymm10,%ymm6,%ymm6
6326	vpxor	%ymm9,%ymm5,%ymm5
6327	vpxor	%ymm8,%ymm4,%ymm4
6328	vmovdqa	%ymm8,0+128(%rbp)
6329	vpsrld	$25,%ymm7,%ymm8
6330	vpslld	$32-25,%ymm7,%ymm7
6331	vpxor	%ymm8,%ymm7,%ymm7
6332	vpsrld	$25,%ymm6,%ymm8
6333	vpslld	$32-25,%ymm6,%ymm6
6334	vpxor	%ymm8,%ymm6,%ymm6
6335	vpsrld	$25,%ymm5,%ymm8
6336	vpslld	$32-25,%ymm5,%ymm5
6337	vpxor	%ymm8,%ymm5,%ymm5
6338	vpsrld	$25,%ymm4,%ymm8
6339	vpslld	$32-25,%ymm4,%ymm4
6340	vpxor	%ymm8,%ymm4,%ymm4
6341	vmovdqa	0+128(%rbp),%ymm8
6342	vpalignr	$4,%ymm7,%ymm7,%ymm7
6343	vpalignr	$8,%ymm11,%ymm11,%ymm11
6344	vpalignr	$12,%ymm15,%ymm15,%ymm15
6345	vpalignr	$4,%ymm6,%ymm6,%ymm6
6346	vpalignr	$8,%ymm10,%ymm10,%ymm10
6347	vpalignr	$12,%ymm14,%ymm14,%ymm14
6348	vpalignr	$4,%ymm5,%ymm5,%ymm5
6349	vpalignr	$8,%ymm9,%ymm9,%ymm9
6350	vpalignr	$12,%ymm13,%ymm13,%ymm13
6351	vpalignr	$4,%ymm4,%ymm4,%ymm4
6352	vpalignr	$8,%ymm8,%ymm8,%ymm8
6353	vpalignr	$12,%ymm12,%ymm12,%ymm12
6354	vmovdqa	%ymm8,0+128(%rbp)
6355	vmovdqa	L$rol16(%rip),%ymm8
6356	vpaddd	%ymm7,%ymm3,%ymm3
6357	vpaddd	%ymm6,%ymm2,%ymm2
6358	vpaddd	%ymm5,%ymm1,%ymm1
6359	vpaddd	%ymm4,%ymm0,%ymm0
6360	vpxor	%ymm3,%ymm15,%ymm15
6361	vpxor	%ymm2,%ymm14,%ymm14
6362	vpxor	%ymm1,%ymm13,%ymm13
6363	vpxor	%ymm0,%ymm12,%ymm12
6364	vpshufb	%ymm8,%ymm15,%ymm15
6365	vpshufb	%ymm8,%ymm14,%ymm14
6366	vpshufb	%ymm8,%ymm13,%ymm13
6367	vpshufb	%ymm8,%ymm12,%ymm12
6368	vpaddd	%ymm15,%ymm11,%ymm11
6369	vpaddd	%ymm14,%ymm10,%ymm10
6370	vpaddd	%ymm13,%ymm9,%ymm9
6371	vpaddd	0+128(%rbp),%ymm12,%ymm8
6372	vpxor	%ymm11,%ymm7,%ymm7
6373	vpxor	%ymm10,%ymm6,%ymm6
6374	vpxor	%ymm9,%ymm5,%ymm5
6375	vpxor	%ymm8,%ymm4,%ymm4
6376	vmovdqa	%ymm8,0+128(%rbp)
6377	vpsrld	$20,%ymm7,%ymm8
6378	vpslld	$32-20,%ymm7,%ymm7
6379	vpxor	%ymm8,%ymm7,%ymm7
6380	vpsrld	$20,%ymm6,%ymm8
6381	vpslld	$32-20,%ymm6,%ymm6
6382	vpxor	%ymm8,%ymm6,%ymm6
6383	vpsrld	$20,%ymm5,%ymm8
6384	vpslld	$32-20,%ymm5,%ymm5
6385	vpxor	%ymm8,%ymm5,%ymm5
6386	vpsrld	$20,%ymm4,%ymm8
6387	vpslld	$32-20,%ymm4,%ymm4
6388	vpxor	%ymm8,%ymm4,%ymm4
6389	vmovdqa	L$rol8(%rip),%ymm8
6390	vpaddd	%ymm7,%ymm3,%ymm3
6391	vpaddd	%ymm6,%ymm2,%ymm2
6392	vpaddd	%ymm5,%ymm1,%ymm1
6393	vpaddd	%ymm4,%ymm0,%ymm0
6394	vpxor	%ymm3,%ymm15,%ymm15
6395	vpxor	%ymm2,%ymm14,%ymm14
6396	vpxor	%ymm1,%ymm13,%ymm13
6397	vpxor	%ymm0,%ymm12,%ymm12
6398	vpshufb	%ymm8,%ymm15,%ymm15
6399	vpshufb	%ymm8,%ymm14,%ymm14
6400	vpshufb	%ymm8,%ymm13,%ymm13
6401	vpshufb	%ymm8,%ymm12,%ymm12
6402	vpaddd	%ymm15,%ymm11,%ymm11
6403	vpaddd	%ymm14,%ymm10,%ymm10
6404	vpaddd	%ymm13,%ymm9,%ymm9
6405	vpaddd	0+128(%rbp),%ymm12,%ymm8
6406	vpxor	%ymm11,%ymm7,%ymm7
6407	vpxor	%ymm10,%ymm6,%ymm6
6408	vpxor	%ymm9,%ymm5,%ymm5
6409	vpxor	%ymm8,%ymm4,%ymm4
6410	vmovdqa	%ymm8,0+128(%rbp)
6411	vpsrld	$25,%ymm7,%ymm8
6412	vpslld	$32-25,%ymm7,%ymm7
6413	vpxor	%ymm8,%ymm7,%ymm7
6414	vpsrld	$25,%ymm6,%ymm8
6415	vpslld	$32-25,%ymm6,%ymm6
6416	vpxor	%ymm8,%ymm6,%ymm6
6417	vpsrld	$25,%ymm5,%ymm8
6418	vpslld	$32-25,%ymm5,%ymm5
6419	vpxor	%ymm8,%ymm5,%ymm5
6420	vpsrld	$25,%ymm4,%ymm8
6421	vpslld	$32-25,%ymm4,%ymm4
6422	vpxor	%ymm8,%ymm4,%ymm4
6423	vmovdqa	0+128(%rbp),%ymm8
6424	vpalignr	$12,%ymm7,%ymm7,%ymm7
6425	vpalignr	$8,%ymm11,%ymm11,%ymm11
6426	vpalignr	$4,%ymm15,%ymm15,%ymm15
6427	vpalignr	$12,%ymm6,%ymm6,%ymm6
6428	vpalignr	$8,%ymm10,%ymm10,%ymm10
6429	vpalignr	$4,%ymm14,%ymm14,%ymm14
6430	vpalignr	$12,%ymm5,%ymm5,%ymm5
6431	vpalignr	$8,%ymm9,%ymm9,%ymm9
6432	vpalignr	$4,%ymm13,%ymm13,%ymm13
6433	vpalignr	$12,%ymm4,%ymm4,%ymm4
6434	vpalignr	$8,%ymm8,%ymm8,%ymm8
6435	vpalignr	$4,%ymm12,%ymm12,%ymm12
6436
6437	decq	%r10
6438	jnz	L$seal_avx2_init_rounds
6439	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
6440	vpaddd	0+64(%rbp),%ymm7,%ymm7
6441	vpaddd	0+96(%rbp),%ymm11,%ymm11
6442	vpaddd	0+256(%rbp),%ymm15,%ymm15
6443	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
6444	vpaddd	0+64(%rbp),%ymm6,%ymm6
6445	vpaddd	0+96(%rbp),%ymm10,%ymm10
6446	vpaddd	0+224(%rbp),%ymm14,%ymm14
6447	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
6448	vpaddd	0+64(%rbp),%ymm5,%ymm5
6449	vpaddd	0+96(%rbp),%ymm9,%ymm9
6450	vpaddd	0+192(%rbp),%ymm13,%ymm13
6451	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
6452	vpaddd	0+64(%rbp),%ymm4,%ymm4
6453	vpaddd	0+96(%rbp),%ymm8,%ymm8
6454	vpaddd	0+160(%rbp),%ymm12,%ymm12
6455
6456	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
6457	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
6458	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
6459	vpand	L$clamp(%rip),%ymm15,%ymm15
6460	vmovdqa	%ymm15,0+0(%rbp)
6461	movq	%r8,%r8
6462	call	poly_hash_ad_internal
6463
6464	vpxor	0(%rsi),%ymm3,%ymm3
6465	vpxor	32(%rsi),%ymm11,%ymm11
6466	vmovdqu	%ymm3,0(%rdi)
6467	vmovdqu	%ymm11,32(%rdi)
6468	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
6469	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
6470	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
6471	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
6472	vpxor	0+64(%rsi),%ymm15,%ymm15
6473	vpxor	32+64(%rsi),%ymm2,%ymm2
6474	vpxor	64+64(%rsi),%ymm6,%ymm6
6475	vpxor	96+64(%rsi),%ymm10,%ymm10
6476	vmovdqu	%ymm15,0+64(%rdi)
6477	vmovdqu	%ymm2,32+64(%rdi)
6478	vmovdqu	%ymm6,64+64(%rdi)
6479	vmovdqu	%ymm10,96+64(%rdi)
6480	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
6481	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
6482	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
6483	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
6484	vpxor	0+192(%rsi),%ymm15,%ymm15
6485	vpxor	32+192(%rsi),%ymm1,%ymm1
6486	vpxor	64+192(%rsi),%ymm5,%ymm5
6487	vpxor	96+192(%rsi),%ymm9,%ymm9
6488	vmovdqu	%ymm15,0+192(%rdi)
6489	vmovdqu	%ymm1,32+192(%rdi)
6490	vmovdqu	%ymm5,64+192(%rdi)
6491	vmovdqu	%ymm9,96+192(%rdi)
6492	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
6493	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
6494	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
6495	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
6496	vmovdqa	%ymm15,%ymm8
6497
6498	leaq	320(%rsi),%rsi
6499	subq	$320,%rbx
6500	movq	$320,%rcx
6501	cmpq	$128,%rbx
6502	jbe	L$seal_avx2_short_hash_remainder
6503	vpxor	0(%rsi),%ymm0,%ymm0
6504	vpxor	32(%rsi),%ymm4,%ymm4
6505	vpxor	64(%rsi),%ymm8,%ymm8
6506	vpxor	96(%rsi),%ymm12,%ymm12
6507	vmovdqu	%ymm0,320(%rdi)
6508	vmovdqu	%ymm4,352(%rdi)
6509	vmovdqu	%ymm8,384(%rdi)
6510	vmovdqu	%ymm12,416(%rdi)
6511	leaq	128(%rsi),%rsi
6512	subq	$128,%rbx
6513	movq	$8,%rcx
6514	movq	$2,%r8
6515	cmpq	$128,%rbx
6516	jbe	L$seal_avx2_tail_128
6517	cmpq	$256,%rbx
6518	jbe	L$seal_avx2_tail_256
6519	cmpq	$384,%rbx
6520	jbe	L$seal_avx2_tail_384
6521	cmpq	$512,%rbx
6522	jbe	L$seal_avx2_tail_512
6523	vmovdqa	L$chacha20_consts(%rip),%ymm0
6524	vmovdqa	0+64(%rbp),%ymm4
6525	vmovdqa	0+96(%rbp),%ymm8
6526	vmovdqa	%ymm0,%ymm1
6527	vmovdqa	%ymm4,%ymm5
6528	vmovdqa	%ymm8,%ymm9
6529	vmovdqa	%ymm0,%ymm2
6530	vmovdqa	%ymm4,%ymm6
6531	vmovdqa	%ymm8,%ymm10
6532	vmovdqa	%ymm0,%ymm3
6533	vmovdqa	%ymm4,%ymm7
6534	vmovdqa	%ymm8,%ymm11
6535	vmovdqa	L$avx2_inc(%rip),%ymm12
6536	vpaddd	0+160(%rbp),%ymm12,%ymm15
6537	vpaddd	%ymm15,%ymm12,%ymm14
6538	vpaddd	%ymm14,%ymm12,%ymm13
6539	vpaddd	%ymm13,%ymm12,%ymm12
6540	vmovdqa	%ymm15,0+256(%rbp)
6541	vmovdqa	%ymm14,0+224(%rbp)
6542	vmovdqa	%ymm13,0+192(%rbp)
6543	vmovdqa	%ymm12,0+160(%rbp)
6544	vmovdqa	%ymm8,0+128(%rbp)
6545	vmovdqa	L$rol16(%rip),%ymm8
6546	vpaddd	%ymm7,%ymm3,%ymm3
6547	vpaddd	%ymm6,%ymm2,%ymm2
6548	vpaddd	%ymm5,%ymm1,%ymm1
6549	vpaddd	%ymm4,%ymm0,%ymm0
6550	vpxor	%ymm3,%ymm15,%ymm15
6551	vpxor	%ymm2,%ymm14,%ymm14
6552	vpxor	%ymm1,%ymm13,%ymm13
6553	vpxor	%ymm0,%ymm12,%ymm12
6554	vpshufb	%ymm8,%ymm15,%ymm15
6555	vpshufb	%ymm8,%ymm14,%ymm14
6556	vpshufb	%ymm8,%ymm13,%ymm13
6557	vpshufb	%ymm8,%ymm12,%ymm12
6558	vpaddd	%ymm15,%ymm11,%ymm11
6559	vpaddd	%ymm14,%ymm10,%ymm10
6560	vpaddd	%ymm13,%ymm9,%ymm9
6561	vpaddd	0+128(%rbp),%ymm12,%ymm8
6562	vpxor	%ymm11,%ymm7,%ymm7
6563	vpxor	%ymm10,%ymm6,%ymm6
6564	vpxor	%ymm9,%ymm5,%ymm5
6565	vpxor	%ymm8,%ymm4,%ymm4
6566	vmovdqa	%ymm8,0+128(%rbp)
6567	vpsrld	$20,%ymm7,%ymm8
6568	vpslld	$32-20,%ymm7,%ymm7
6569	vpxor	%ymm8,%ymm7,%ymm7
6570	vpsrld	$20,%ymm6,%ymm8
6571	vpslld	$32-20,%ymm6,%ymm6
6572	vpxor	%ymm8,%ymm6,%ymm6
6573	vpsrld	$20,%ymm5,%ymm8
6574	vpslld	$32-20,%ymm5,%ymm5
6575	vpxor	%ymm8,%ymm5,%ymm5
6576	vpsrld	$20,%ymm4,%ymm8
6577	vpslld	$32-20,%ymm4,%ymm4
6578	vpxor	%ymm8,%ymm4,%ymm4
6579	vmovdqa	L$rol8(%rip),%ymm8
6580	vpaddd	%ymm7,%ymm3,%ymm3
6581	vpaddd	%ymm6,%ymm2,%ymm2
6582	vpaddd	%ymm5,%ymm1,%ymm1
6583	vpaddd	%ymm4,%ymm0,%ymm0
6584	vpxor	%ymm3,%ymm15,%ymm15
6585	vpxor	%ymm2,%ymm14,%ymm14
6586	vpxor	%ymm1,%ymm13,%ymm13
6587	vpxor	%ymm0,%ymm12,%ymm12
6588	vpshufb	%ymm8,%ymm15,%ymm15
6589	vpshufb	%ymm8,%ymm14,%ymm14
6590	vpshufb	%ymm8,%ymm13,%ymm13
6591	vpshufb	%ymm8,%ymm12,%ymm12
6592	vpaddd	%ymm15,%ymm11,%ymm11
6593	vpaddd	%ymm14,%ymm10,%ymm10
6594	vpaddd	%ymm13,%ymm9,%ymm9
6595	vpaddd	0+128(%rbp),%ymm12,%ymm8
6596	vpxor	%ymm11,%ymm7,%ymm7
6597	vpxor	%ymm10,%ymm6,%ymm6
6598	vpxor	%ymm9,%ymm5,%ymm5
6599	vpxor	%ymm8,%ymm4,%ymm4
6600	vmovdqa	%ymm8,0+128(%rbp)
6601	vpsrld	$25,%ymm7,%ymm8
6602	vpslld	$32-25,%ymm7,%ymm7
6603	vpxor	%ymm8,%ymm7,%ymm7
6604	vpsrld	$25,%ymm6,%ymm8
6605	vpslld	$32-25,%ymm6,%ymm6
6606	vpxor	%ymm8,%ymm6,%ymm6
6607	vpsrld	$25,%ymm5,%ymm8
6608	vpslld	$32-25,%ymm5,%ymm5
6609	vpxor	%ymm8,%ymm5,%ymm5
6610	vpsrld	$25,%ymm4,%ymm8
6611	vpslld	$32-25,%ymm4,%ymm4
6612	vpxor	%ymm8,%ymm4,%ymm4
6613	vmovdqa	0+128(%rbp),%ymm8
6614	vpalignr	$4,%ymm7,%ymm7,%ymm7
6615	vpalignr	$8,%ymm11,%ymm11,%ymm11
6616	vpalignr	$12,%ymm15,%ymm15,%ymm15
6617	vpalignr	$4,%ymm6,%ymm6,%ymm6
6618	vpalignr	$8,%ymm10,%ymm10,%ymm10
6619	vpalignr	$12,%ymm14,%ymm14,%ymm14
6620	vpalignr	$4,%ymm5,%ymm5,%ymm5
6621	vpalignr	$8,%ymm9,%ymm9,%ymm9
6622	vpalignr	$12,%ymm13,%ymm13,%ymm13
6623	vpalignr	$4,%ymm4,%ymm4,%ymm4
6624	vpalignr	$8,%ymm8,%ymm8,%ymm8
6625	vpalignr	$12,%ymm12,%ymm12,%ymm12
6626	vmovdqa	%ymm8,0+128(%rbp)
6627	vmovdqa	L$rol16(%rip),%ymm8
6628	vpaddd	%ymm7,%ymm3,%ymm3
6629	vpaddd	%ymm6,%ymm2,%ymm2
6630	vpaddd	%ymm5,%ymm1,%ymm1
6631	vpaddd	%ymm4,%ymm0,%ymm0
6632	vpxor	%ymm3,%ymm15,%ymm15
6633	vpxor	%ymm2,%ymm14,%ymm14
6634	vpxor	%ymm1,%ymm13,%ymm13
6635	vpxor	%ymm0,%ymm12,%ymm12
6636	vpshufb	%ymm8,%ymm15,%ymm15
6637	vpshufb	%ymm8,%ymm14,%ymm14
6638	vpshufb	%ymm8,%ymm13,%ymm13
6639	vpshufb	%ymm8,%ymm12,%ymm12
6640	vpaddd	%ymm15,%ymm11,%ymm11
6641	vpaddd	%ymm14,%ymm10,%ymm10
6642	vpaddd	%ymm13,%ymm9,%ymm9
6643	vpaddd	0+128(%rbp),%ymm12,%ymm8
6644	vpxor	%ymm11,%ymm7,%ymm7
6645	vpxor	%ymm10,%ymm6,%ymm6
6646	vpxor	%ymm9,%ymm5,%ymm5
6647	vpxor	%ymm8,%ymm4,%ymm4
6648	vmovdqa	%ymm8,0+128(%rbp)
6649	vpsrld	$20,%ymm7,%ymm8
6650	vpslld	$32-20,%ymm7,%ymm7
6651	vpxor	%ymm8,%ymm7,%ymm7
6652	vpsrld	$20,%ymm6,%ymm8
6653	vpslld	$32-20,%ymm6,%ymm6
6654	vpxor	%ymm8,%ymm6,%ymm6
6655	vpsrld	$20,%ymm5,%ymm8
6656	vpslld	$32-20,%ymm5,%ymm5
6657	vpxor	%ymm8,%ymm5,%ymm5
6658	vpsrld	$20,%ymm4,%ymm8
6659	vpslld	$32-20,%ymm4,%ymm4
6660	vpxor	%ymm8,%ymm4,%ymm4
6661	vmovdqa	L$rol8(%rip),%ymm8
6662	vpaddd	%ymm7,%ymm3,%ymm3
6663	vpaddd	%ymm6,%ymm2,%ymm2
6664	vpaddd	%ymm5,%ymm1,%ymm1
6665	vpaddd	%ymm4,%ymm0,%ymm0
6666	vpxor	%ymm3,%ymm15,%ymm15
6667	vpxor	%ymm2,%ymm14,%ymm14
6668	vpxor	%ymm1,%ymm13,%ymm13
6669	vpxor	%ymm0,%ymm12,%ymm12
6670	vpshufb	%ymm8,%ymm15,%ymm15
6671	vpshufb	%ymm8,%ymm14,%ymm14
6672	vpshufb	%ymm8,%ymm13,%ymm13
6673	vpshufb	%ymm8,%ymm12,%ymm12
6674	vpaddd	%ymm15,%ymm11,%ymm11
6675	vpaddd	%ymm14,%ymm10,%ymm10
6676	vpaddd	%ymm13,%ymm9,%ymm9
6677	vpaddd	0+128(%rbp),%ymm12,%ymm8
6678	vpxor	%ymm11,%ymm7,%ymm7
6679	vpxor	%ymm10,%ymm6,%ymm6
6680	vpxor	%ymm9,%ymm5,%ymm5
6681	vpxor	%ymm8,%ymm4,%ymm4
6682	vmovdqa	%ymm8,0+128(%rbp)
6683	vpsrld	$25,%ymm7,%ymm8
6684	vpslld	$32-25,%ymm7,%ymm7
6685	vpxor	%ymm8,%ymm7,%ymm7
6686	vpsrld	$25,%ymm6,%ymm8
6687	vpslld	$32-25,%ymm6,%ymm6
6688	vpxor	%ymm8,%ymm6,%ymm6
6689	vpsrld	$25,%ymm5,%ymm8
6690	vpslld	$32-25,%ymm5,%ymm5
6691	vpxor	%ymm8,%ymm5,%ymm5
6692	vpsrld	$25,%ymm4,%ymm8
6693	vpslld	$32-25,%ymm4,%ymm4
6694	vpxor	%ymm8,%ymm4,%ymm4
6695	vmovdqa	0+128(%rbp),%ymm8
6696	vpalignr	$12,%ymm7,%ymm7,%ymm7
6697	vpalignr	$8,%ymm11,%ymm11,%ymm11
6698	vpalignr	$4,%ymm15,%ymm15,%ymm15
6699	vpalignr	$12,%ymm6,%ymm6,%ymm6
6700	vpalignr	$8,%ymm10,%ymm10,%ymm10
6701	vpalignr	$4,%ymm14,%ymm14,%ymm14
6702	vpalignr	$12,%ymm5,%ymm5,%ymm5
6703	vpalignr	$8,%ymm9,%ymm9,%ymm9
6704	vpalignr	$4,%ymm13,%ymm13,%ymm13
6705	vpalignr	$12,%ymm4,%ymm4,%ymm4
6706	vpalignr	$8,%ymm8,%ymm8,%ymm8
6707	vpalignr	$4,%ymm12,%ymm12,%ymm12
6708	vmovdqa	%ymm8,0+128(%rbp)
6709	vmovdqa	L$rol16(%rip),%ymm8
6710	vpaddd	%ymm7,%ymm3,%ymm3
6711	vpaddd	%ymm6,%ymm2,%ymm2
6712	vpaddd	%ymm5,%ymm1,%ymm1
6713	vpaddd	%ymm4,%ymm0,%ymm0
6714	vpxor	%ymm3,%ymm15,%ymm15
6715	vpxor	%ymm2,%ymm14,%ymm14
6716	vpxor	%ymm1,%ymm13,%ymm13
6717	vpxor	%ymm0,%ymm12,%ymm12
6718	vpshufb	%ymm8,%ymm15,%ymm15
6719	vpshufb	%ymm8,%ymm14,%ymm14
6720	vpshufb	%ymm8,%ymm13,%ymm13
6721	vpshufb	%ymm8,%ymm12,%ymm12
6722	vpaddd	%ymm15,%ymm11,%ymm11
6723	vpaddd	%ymm14,%ymm10,%ymm10
6724	vpaddd	%ymm13,%ymm9,%ymm9
6725	vpaddd	0+128(%rbp),%ymm12,%ymm8
6726	vpxor	%ymm11,%ymm7,%ymm7
6727	vpxor	%ymm10,%ymm6,%ymm6
6728	vpxor	%ymm9,%ymm5,%ymm5
6729	vpxor	%ymm8,%ymm4,%ymm4
6730	vmovdqa	%ymm8,0+128(%rbp)
6731	vpsrld	$20,%ymm7,%ymm8
6732	vpslld	$32-20,%ymm7,%ymm7
6733	vpxor	%ymm8,%ymm7,%ymm7
6734	vpsrld	$20,%ymm6,%ymm8
6735	vpslld	$32-20,%ymm6,%ymm6
6736	vpxor	%ymm8,%ymm6,%ymm6
6737	vpsrld	$20,%ymm5,%ymm8
6738	vpslld	$32-20,%ymm5,%ymm5
6739	vpxor	%ymm8,%ymm5,%ymm5
6740	vpsrld	$20,%ymm4,%ymm8
6741	vpslld	$32-20,%ymm4,%ymm4
6742	vpxor	%ymm8,%ymm4,%ymm4
6743	vmovdqa	L$rol8(%rip),%ymm8
6744	vpaddd	%ymm7,%ymm3,%ymm3
6745	vpaddd	%ymm6,%ymm2,%ymm2
6746	vpaddd	%ymm5,%ymm1,%ymm1
6747	vpaddd	%ymm4,%ymm0,%ymm0
6748	vpxor	%ymm3,%ymm15,%ymm15
6749
6750	subq	$16,%rdi
6751	movq	$9,%rcx
6752	jmp	L$seal_avx2_main_loop_rounds_entry
6753.p2align	5
6754L$seal_avx2_main_loop:
6755	vmovdqa	L$chacha20_consts(%rip),%ymm0
6756	vmovdqa	0+64(%rbp),%ymm4
6757	vmovdqa	0+96(%rbp),%ymm8
6758	vmovdqa	%ymm0,%ymm1
6759	vmovdqa	%ymm4,%ymm5
6760	vmovdqa	%ymm8,%ymm9
6761	vmovdqa	%ymm0,%ymm2
6762	vmovdqa	%ymm4,%ymm6
6763	vmovdqa	%ymm8,%ymm10
6764	vmovdqa	%ymm0,%ymm3
6765	vmovdqa	%ymm4,%ymm7
6766	vmovdqa	%ymm8,%ymm11
6767	vmovdqa	L$avx2_inc(%rip),%ymm12
6768	vpaddd	0+160(%rbp),%ymm12,%ymm15
6769	vpaddd	%ymm15,%ymm12,%ymm14
6770	vpaddd	%ymm14,%ymm12,%ymm13
6771	vpaddd	%ymm13,%ymm12,%ymm12
6772	vmovdqa	%ymm15,0+256(%rbp)
6773	vmovdqa	%ymm14,0+224(%rbp)
6774	vmovdqa	%ymm13,0+192(%rbp)
6775	vmovdqa	%ymm12,0+160(%rbp)
6776
6777	movq	$10,%rcx
6778.p2align	5
6779L$seal_avx2_main_loop_rounds:
6780	addq	0+0(%rdi),%r10
6781	adcq	8+0(%rdi),%r11
6782	adcq	$1,%r12
6783	vmovdqa	%ymm8,0+128(%rbp)
6784	vmovdqa	L$rol16(%rip),%ymm8
6785	vpaddd	%ymm7,%ymm3,%ymm3
6786	vpaddd	%ymm6,%ymm2,%ymm2
6787	vpaddd	%ymm5,%ymm1,%ymm1
6788	vpaddd	%ymm4,%ymm0,%ymm0
6789	vpxor	%ymm3,%ymm15,%ymm15
6790	vpxor	%ymm2,%ymm14,%ymm14
6791	vpxor	%ymm1,%ymm13,%ymm13
6792	vpxor	%ymm0,%ymm12,%ymm12
6793	movq	0+0+0(%rbp),%rdx
6794	movq	%rdx,%r15
6795	mulxq	%r10,%r13,%r14
6796	mulxq	%r11,%rax,%rdx
6797	imulq	%r12,%r15
6798	addq	%rax,%r14
6799	adcq	%rdx,%r15
6800	vpshufb	%ymm8,%ymm15,%ymm15
6801	vpshufb	%ymm8,%ymm14,%ymm14
6802	vpshufb	%ymm8,%ymm13,%ymm13
6803	vpshufb	%ymm8,%ymm12,%ymm12
6804	vpaddd	%ymm15,%ymm11,%ymm11
6805	vpaddd	%ymm14,%ymm10,%ymm10
6806	vpaddd	%ymm13,%ymm9,%ymm9
6807	vpaddd	0+128(%rbp),%ymm12,%ymm8
6808	vpxor	%ymm11,%ymm7,%ymm7
6809	movq	8+0+0(%rbp),%rdx
6810	mulxq	%r10,%r10,%rax
6811	addq	%r10,%r14
6812	mulxq	%r11,%r11,%r9
6813	adcq	%r11,%r15
6814	adcq	$0,%r9
6815	imulq	%r12,%rdx
6816	vpxor	%ymm10,%ymm6,%ymm6
6817	vpxor	%ymm9,%ymm5,%ymm5
6818	vpxor	%ymm8,%ymm4,%ymm4
6819	vmovdqa	%ymm8,0+128(%rbp)
6820	vpsrld	$20,%ymm7,%ymm8
6821	vpslld	$32-20,%ymm7,%ymm7
6822	vpxor	%ymm8,%ymm7,%ymm7
6823	vpsrld	$20,%ymm6,%ymm8
6824	vpslld	$32-20,%ymm6,%ymm6
6825	vpxor	%ymm8,%ymm6,%ymm6
6826	vpsrld	$20,%ymm5,%ymm8
6827	vpslld	$32-20,%ymm5,%ymm5
6828	addq	%rax,%r15
6829	adcq	%rdx,%r9
6830	vpxor	%ymm8,%ymm5,%ymm5
6831	vpsrld	$20,%ymm4,%ymm8
6832	vpslld	$32-20,%ymm4,%ymm4
6833	vpxor	%ymm8,%ymm4,%ymm4
6834	vmovdqa	L$rol8(%rip),%ymm8
6835	vpaddd	%ymm7,%ymm3,%ymm3
6836	vpaddd	%ymm6,%ymm2,%ymm2
6837	vpaddd	%ymm5,%ymm1,%ymm1
6838	vpaddd	%ymm4,%ymm0,%ymm0
6839	vpxor	%ymm3,%ymm15,%ymm15
6840	movq	%r13,%r10
6841	movq	%r14,%r11
6842	movq	%r15,%r12
6843	andq	$3,%r12
6844	movq	%r15,%r13
6845	andq	$-4,%r13
6846	movq	%r9,%r14
6847	shrdq	$2,%r9,%r15
6848	shrq	$2,%r9
6849	addq	%r13,%r15
6850	adcq	%r14,%r9
6851	addq	%r15,%r10
6852	adcq	%r9,%r11
6853	adcq	$0,%r12
6854
6855L$seal_avx2_main_loop_rounds_entry:
6856	vpxor	%ymm2,%ymm14,%ymm14
6857	vpxor	%ymm1,%ymm13,%ymm13
6858	vpxor	%ymm0,%ymm12,%ymm12
6859	vpshufb	%ymm8,%ymm15,%ymm15
6860	vpshufb	%ymm8,%ymm14,%ymm14
6861	vpshufb	%ymm8,%ymm13,%ymm13
6862	vpshufb	%ymm8,%ymm12,%ymm12
6863	vpaddd	%ymm15,%ymm11,%ymm11
6864	vpaddd	%ymm14,%ymm10,%ymm10
6865	addq	0+16(%rdi),%r10
6866	adcq	8+16(%rdi),%r11
6867	adcq	$1,%r12
6868	vpaddd	%ymm13,%ymm9,%ymm9
6869	vpaddd	0+128(%rbp),%ymm12,%ymm8
6870	vpxor	%ymm11,%ymm7,%ymm7
6871	vpxor	%ymm10,%ymm6,%ymm6
6872	vpxor	%ymm9,%ymm5,%ymm5
6873	vpxor	%ymm8,%ymm4,%ymm4
6874	vmovdqa	%ymm8,0+128(%rbp)
6875	vpsrld	$25,%ymm7,%ymm8
6876	movq	0+0+0(%rbp),%rdx
6877	movq	%rdx,%r15
6878	mulxq	%r10,%r13,%r14
6879	mulxq	%r11,%rax,%rdx
6880	imulq	%r12,%r15
6881	addq	%rax,%r14
6882	adcq	%rdx,%r15
6883	vpslld	$32-25,%ymm7,%ymm7
6884	vpxor	%ymm8,%ymm7,%ymm7
6885	vpsrld	$25,%ymm6,%ymm8
6886	vpslld	$32-25,%ymm6,%ymm6
6887	vpxor	%ymm8,%ymm6,%ymm6
6888	vpsrld	$25,%ymm5,%ymm8
6889	vpslld	$32-25,%ymm5,%ymm5
6890	vpxor	%ymm8,%ymm5,%ymm5
6891	vpsrld	$25,%ymm4,%ymm8
6892	vpslld	$32-25,%ymm4,%ymm4
6893	vpxor	%ymm8,%ymm4,%ymm4
6894	vmovdqa	0+128(%rbp),%ymm8
6895	vpalignr	$4,%ymm7,%ymm7,%ymm7
6896	vpalignr	$8,%ymm11,%ymm11,%ymm11
6897	vpalignr	$12,%ymm15,%ymm15,%ymm15
6898	vpalignr	$4,%ymm6,%ymm6,%ymm6
6899	vpalignr	$8,%ymm10,%ymm10,%ymm10
6900	vpalignr	$12,%ymm14,%ymm14,%ymm14
6901	movq	8+0+0(%rbp),%rdx
6902	mulxq	%r10,%r10,%rax
6903	addq	%r10,%r14
6904	mulxq	%r11,%r11,%r9
6905	adcq	%r11,%r15
6906	adcq	$0,%r9
6907	imulq	%r12,%rdx
6908	vpalignr	$4,%ymm5,%ymm5,%ymm5
6909	vpalignr	$8,%ymm9,%ymm9,%ymm9
6910	vpalignr	$12,%ymm13,%ymm13,%ymm13
6911	vpalignr	$4,%ymm4,%ymm4,%ymm4
6912	vpalignr	$8,%ymm8,%ymm8,%ymm8
6913	vpalignr	$12,%ymm12,%ymm12,%ymm12
6914	vmovdqa	%ymm8,0+128(%rbp)
6915	vmovdqa	L$rol16(%rip),%ymm8
6916	vpaddd	%ymm7,%ymm3,%ymm3
6917	vpaddd	%ymm6,%ymm2,%ymm2
6918	vpaddd	%ymm5,%ymm1,%ymm1
6919	vpaddd	%ymm4,%ymm0,%ymm0
6920	vpxor	%ymm3,%ymm15,%ymm15
6921	vpxor	%ymm2,%ymm14,%ymm14
6922	vpxor	%ymm1,%ymm13,%ymm13
6923	vpxor	%ymm0,%ymm12,%ymm12
6924	vpshufb	%ymm8,%ymm15,%ymm15
6925	vpshufb	%ymm8,%ymm14,%ymm14
6926	addq	%rax,%r15
6927	adcq	%rdx,%r9
6928	vpshufb	%ymm8,%ymm13,%ymm13
6929	vpshufb	%ymm8,%ymm12,%ymm12
6930	vpaddd	%ymm15,%ymm11,%ymm11
6931	vpaddd	%ymm14,%ymm10,%ymm10
6932	vpaddd	%ymm13,%ymm9,%ymm9
6933	vpaddd	0+128(%rbp),%ymm12,%ymm8
6934	vpxor	%ymm11,%ymm7,%ymm7
6935	vpxor	%ymm10,%ymm6,%ymm6
6936	vpxor	%ymm9,%ymm5,%ymm5
6937	movq	%r13,%r10
6938	movq	%r14,%r11
6939	movq	%r15,%r12
6940	andq	$3,%r12
6941	movq	%r15,%r13
6942	andq	$-4,%r13
6943	movq	%r9,%r14
6944	shrdq	$2,%r9,%r15
6945	shrq	$2,%r9
6946	addq	%r13,%r15
6947	adcq	%r14,%r9
6948	addq	%r15,%r10
6949	adcq	%r9,%r11
6950	adcq	$0,%r12
6951	vpxor	%ymm8,%ymm4,%ymm4
6952	vmovdqa	%ymm8,0+128(%rbp)
6953	vpsrld	$20,%ymm7,%ymm8
6954	vpslld	$32-20,%ymm7,%ymm7
6955	vpxor	%ymm8,%ymm7,%ymm7
6956	vpsrld	$20,%ymm6,%ymm8
6957	vpslld	$32-20,%ymm6,%ymm6
6958	vpxor	%ymm8,%ymm6,%ymm6
6959	addq	0+32(%rdi),%r10
6960	adcq	8+32(%rdi),%r11
6961	adcq	$1,%r12
6962
6963	leaq	48(%rdi),%rdi
6964	vpsrld	$20,%ymm5,%ymm8
6965	vpslld	$32-20,%ymm5,%ymm5
6966	vpxor	%ymm8,%ymm5,%ymm5
6967	vpsrld	$20,%ymm4,%ymm8
6968	vpslld	$32-20,%ymm4,%ymm4
6969	vpxor	%ymm8,%ymm4,%ymm4
6970	vmovdqa	L$rol8(%rip),%ymm8
6971	vpaddd	%ymm7,%ymm3,%ymm3
6972	vpaddd	%ymm6,%ymm2,%ymm2
6973	vpaddd	%ymm5,%ymm1,%ymm1
6974	vpaddd	%ymm4,%ymm0,%ymm0
6975	vpxor	%ymm3,%ymm15,%ymm15
6976	vpxor	%ymm2,%ymm14,%ymm14
6977	vpxor	%ymm1,%ymm13,%ymm13
6978	vpxor	%ymm0,%ymm12,%ymm12
6979	vpshufb	%ymm8,%ymm15,%ymm15
6980	vpshufb	%ymm8,%ymm14,%ymm14
6981	vpshufb	%ymm8,%ymm13,%ymm13
6982	movq	0+0+0(%rbp),%rdx
6983	movq	%rdx,%r15
6984	mulxq	%r10,%r13,%r14
6985	mulxq	%r11,%rax,%rdx
6986	imulq	%r12,%r15
6987	addq	%rax,%r14
6988	adcq	%rdx,%r15
6989	vpshufb	%ymm8,%ymm12,%ymm12
6990	vpaddd	%ymm15,%ymm11,%ymm11
6991	vpaddd	%ymm14,%ymm10,%ymm10
6992	vpaddd	%ymm13,%ymm9,%ymm9
6993	vpaddd	0+128(%rbp),%ymm12,%ymm8
6994	vpxor	%ymm11,%ymm7,%ymm7
6995	vpxor	%ymm10,%ymm6,%ymm6
6996	vpxor	%ymm9,%ymm5,%ymm5
6997	movq	8+0+0(%rbp),%rdx
6998	mulxq	%r10,%r10,%rax
6999	addq	%r10,%r14
7000	mulxq	%r11,%r11,%r9
7001	adcq	%r11,%r15
7002	adcq	$0,%r9
7003	imulq	%r12,%rdx
7004	vpxor	%ymm8,%ymm4,%ymm4
7005	vmovdqa	%ymm8,0+128(%rbp)
7006	vpsrld	$25,%ymm7,%ymm8
7007	vpslld	$32-25,%ymm7,%ymm7
7008	vpxor	%ymm8,%ymm7,%ymm7
7009	vpsrld	$25,%ymm6,%ymm8
7010	vpslld	$32-25,%ymm6,%ymm6
7011	vpxor	%ymm8,%ymm6,%ymm6
7012	addq	%rax,%r15
7013	adcq	%rdx,%r9
7014	vpsrld	$25,%ymm5,%ymm8
7015	vpslld	$32-25,%ymm5,%ymm5
7016	vpxor	%ymm8,%ymm5,%ymm5
7017	vpsrld	$25,%ymm4,%ymm8
7018	vpslld	$32-25,%ymm4,%ymm4
7019	vpxor	%ymm8,%ymm4,%ymm4
7020	vmovdqa	0+128(%rbp),%ymm8
7021	vpalignr	$12,%ymm7,%ymm7,%ymm7
7022	vpalignr	$8,%ymm11,%ymm11,%ymm11
7023	vpalignr	$4,%ymm15,%ymm15,%ymm15
7024	vpalignr	$12,%ymm6,%ymm6,%ymm6
7025	vpalignr	$8,%ymm10,%ymm10,%ymm10
7026	vpalignr	$4,%ymm14,%ymm14,%ymm14
7027	vpalignr	$12,%ymm5,%ymm5,%ymm5
7028	vpalignr	$8,%ymm9,%ymm9,%ymm9
7029	vpalignr	$4,%ymm13,%ymm13,%ymm13
7030	vpalignr	$12,%ymm4,%ymm4,%ymm4
7031	vpalignr	$8,%ymm8,%ymm8,%ymm8
7032	movq	%r13,%r10
7033	movq	%r14,%r11
7034	movq	%r15,%r12
7035	andq	$3,%r12
7036	movq	%r15,%r13
7037	andq	$-4,%r13
7038	movq	%r9,%r14
7039	shrdq	$2,%r9,%r15
7040	shrq	$2,%r9
7041	addq	%r13,%r15
7042	adcq	%r14,%r9
7043	addq	%r15,%r10
7044	adcq	%r9,%r11
7045	adcq	$0,%r12
7046	vpalignr	$4,%ymm12,%ymm12,%ymm12
7047
7048	decq	%rcx
7049	jne	L$seal_avx2_main_loop_rounds
7050	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
7051	vpaddd	0+64(%rbp),%ymm7,%ymm7
7052	vpaddd	0+96(%rbp),%ymm11,%ymm11
7053	vpaddd	0+256(%rbp),%ymm15,%ymm15
7054	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
7055	vpaddd	0+64(%rbp),%ymm6,%ymm6
7056	vpaddd	0+96(%rbp),%ymm10,%ymm10
7057	vpaddd	0+224(%rbp),%ymm14,%ymm14
7058	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
7059	vpaddd	0+64(%rbp),%ymm5,%ymm5
7060	vpaddd	0+96(%rbp),%ymm9,%ymm9
7061	vpaddd	0+192(%rbp),%ymm13,%ymm13
7062	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7063	vpaddd	0+64(%rbp),%ymm4,%ymm4
7064	vpaddd	0+96(%rbp),%ymm8,%ymm8
7065	vpaddd	0+160(%rbp),%ymm12,%ymm12
7066
7067	vmovdqa	%ymm0,0+128(%rbp)
7068	addq	0+0(%rdi),%r10
7069	adcq	8+0(%rdi),%r11
7070	adcq	$1,%r12
7071	movq	0+0+0(%rbp),%rdx
7072	movq	%rdx,%r15
7073	mulxq	%r10,%r13,%r14
7074	mulxq	%r11,%rax,%rdx
7075	imulq	%r12,%r15
7076	addq	%rax,%r14
7077	adcq	%rdx,%r15
7078	movq	8+0+0(%rbp),%rdx
7079	mulxq	%r10,%r10,%rax
7080	addq	%r10,%r14
7081	mulxq	%r11,%r11,%r9
7082	adcq	%r11,%r15
7083	adcq	$0,%r9
7084	imulq	%r12,%rdx
7085	addq	%rax,%r15
7086	adcq	%rdx,%r9
7087	movq	%r13,%r10
7088	movq	%r14,%r11
7089	movq	%r15,%r12
7090	andq	$3,%r12
7091	movq	%r15,%r13
7092	andq	$-4,%r13
7093	movq	%r9,%r14
7094	shrdq	$2,%r9,%r15
7095	shrq	$2,%r9
7096	addq	%r13,%r15
7097	adcq	%r14,%r9
7098	addq	%r15,%r10
7099	adcq	%r9,%r11
7100	adcq	$0,%r12
7101	addq	0+16(%rdi),%r10
7102	adcq	8+16(%rdi),%r11
7103	adcq	$1,%r12
7104	movq	0+0+0(%rbp),%rdx
7105	movq	%rdx,%r15
7106	mulxq	%r10,%r13,%r14
7107	mulxq	%r11,%rax,%rdx
7108	imulq	%r12,%r15
7109	addq	%rax,%r14
7110	adcq	%rdx,%r15
7111	movq	8+0+0(%rbp),%rdx
7112	mulxq	%r10,%r10,%rax
7113	addq	%r10,%r14
7114	mulxq	%r11,%r11,%r9
7115	adcq	%r11,%r15
7116	adcq	$0,%r9
7117	imulq	%r12,%rdx
7118	addq	%rax,%r15
7119	adcq	%rdx,%r9
7120	movq	%r13,%r10
7121	movq	%r14,%r11
7122	movq	%r15,%r12
7123	andq	$3,%r12
7124	movq	%r15,%r13
7125	andq	$-4,%r13
7126	movq	%r9,%r14
7127	shrdq	$2,%r9,%r15
7128	shrq	$2,%r9
7129	addq	%r13,%r15
7130	adcq	%r14,%r9
7131	addq	%r15,%r10
7132	adcq	%r9,%r11
7133	adcq	$0,%r12
7134
7135	leaq	32(%rdi),%rdi
7136	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
7137	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
7138	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
7139	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
7140	vpxor	0+0(%rsi),%ymm0,%ymm0
7141	vpxor	32+0(%rsi),%ymm3,%ymm3
7142	vpxor	64+0(%rsi),%ymm7,%ymm7
7143	vpxor	96+0(%rsi),%ymm11,%ymm11
7144	vmovdqu	%ymm0,0+0(%rdi)
7145	vmovdqu	%ymm3,32+0(%rdi)
7146	vmovdqu	%ymm7,64+0(%rdi)
7147	vmovdqu	%ymm11,96+0(%rdi)
7148
7149	vmovdqa	0+128(%rbp),%ymm0
7150	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
7151	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
7152	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
7153	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
7154	vpxor	0+128(%rsi),%ymm3,%ymm3
7155	vpxor	32+128(%rsi),%ymm2,%ymm2
7156	vpxor	64+128(%rsi),%ymm6,%ymm6
7157	vpxor	96+128(%rsi),%ymm10,%ymm10
7158	vmovdqu	%ymm3,0+128(%rdi)
7159	vmovdqu	%ymm2,32+128(%rdi)
7160	vmovdqu	%ymm6,64+128(%rdi)
7161	vmovdqu	%ymm10,96+128(%rdi)
7162	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7163	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7164	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7165	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7166	vpxor	0+256(%rsi),%ymm3,%ymm3
7167	vpxor	32+256(%rsi),%ymm1,%ymm1
7168	vpxor	64+256(%rsi),%ymm5,%ymm5
7169	vpxor	96+256(%rsi),%ymm9,%ymm9
7170	vmovdqu	%ymm3,0+256(%rdi)
7171	vmovdqu	%ymm1,32+256(%rdi)
7172	vmovdqu	%ymm5,64+256(%rdi)
7173	vmovdqu	%ymm9,96+256(%rdi)
7174	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
7175	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
7176	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
7177	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
7178	vpxor	0+384(%rsi),%ymm3,%ymm3
7179	vpxor	32+384(%rsi),%ymm0,%ymm0
7180	vpxor	64+384(%rsi),%ymm4,%ymm4
7181	vpxor	96+384(%rsi),%ymm8,%ymm8
7182	vmovdqu	%ymm3,0+384(%rdi)
7183	vmovdqu	%ymm0,32+384(%rdi)
7184	vmovdqu	%ymm4,64+384(%rdi)
7185	vmovdqu	%ymm8,96+384(%rdi)
7186
7187	leaq	512(%rsi),%rsi
7188	subq	$512,%rbx
7189	cmpq	$512,%rbx
7190	jg	L$seal_avx2_main_loop
7191
7192	addq	0+0(%rdi),%r10
7193	adcq	8+0(%rdi),%r11
7194	adcq	$1,%r12
7195	movq	0+0+0(%rbp),%rdx
7196	movq	%rdx,%r15
7197	mulxq	%r10,%r13,%r14
7198	mulxq	%r11,%rax,%rdx
7199	imulq	%r12,%r15
7200	addq	%rax,%r14
7201	adcq	%rdx,%r15
7202	movq	8+0+0(%rbp),%rdx
7203	mulxq	%r10,%r10,%rax
7204	addq	%r10,%r14
7205	mulxq	%r11,%r11,%r9
7206	adcq	%r11,%r15
7207	adcq	$0,%r9
7208	imulq	%r12,%rdx
7209	addq	%rax,%r15
7210	adcq	%rdx,%r9
7211	movq	%r13,%r10
7212	movq	%r14,%r11
7213	movq	%r15,%r12
7214	andq	$3,%r12
7215	movq	%r15,%r13
7216	andq	$-4,%r13
7217	movq	%r9,%r14
7218	shrdq	$2,%r9,%r15
7219	shrq	$2,%r9
7220	addq	%r13,%r15
7221	adcq	%r14,%r9
7222	addq	%r15,%r10
7223	adcq	%r9,%r11
7224	adcq	$0,%r12
7225	addq	0+16(%rdi),%r10
7226	adcq	8+16(%rdi),%r11
7227	adcq	$1,%r12
7228	movq	0+0+0(%rbp),%rdx
7229	movq	%rdx,%r15
7230	mulxq	%r10,%r13,%r14
7231	mulxq	%r11,%rax,%rdx
7232	imulq	%r12,%r15
7233	addq	%rax,%r14
7234	adcq	%rdx,%r15
7235	movq	8+0+0(%rbp),%rdx
7236	mulxq	%r10,%r10,%rax
7237	addq	%r10,%r14
7238	mulxq	%r11,%r11,%r9
7239	adcq	%r11,%r15
7240	adcq	$0,%r9
7241	imulq	%r12,%rdx
7242	addq	%rax,%r15
7243	adcq	%rdx,%r9
7244	movq	%r13,%r10
7245	movq	%r14,%r11
7246	movq	%r15,%r12
7247	andq	$3,%r12
7248	movq	%r15,%r13
7249	andq	$-4,%r13
7250	movq	%r9,%r14
7251	shrdq	$2,%r9,%r15
7252	shrq	$2,%r9
7253	addq	%r13,%r15
7254	adcq	%r14,%r9
7255	addq	%r15,%r10
7256	adcq	%r9,%r11
7257	adcq	$0,%r12
7258
7259	leaq	32(%rdi),%rdi
7260	movq	$10,%rcx
7261	xorq	%r8,%r8
7262
7263	cmpq	$384,%rbx
7264	ja	L$seal_avx2_tail_512
7265	cmpq	$256,%rbx
7266	ja	L$seal_avx2_tail_384
7267	cmpq	$128,%rbx
7268	ja	L$seal_avx2_tail_256
7269
7270L$seal_avx2_tail_128:
7271	vmovdqa	L$chacha20_consts(%rip),%ymm0
7272	vmovdqa	0+64(%rbp),%ymm4
7273	vmovdqa	0+96(%rbp),%ymm8
7274	vmovdqa	L$avx2_inc(%rip),%ymm12
7275	vpaddd	0+160(%rbp),%ymm12,%ymm12
7276	vmovdqa	%ymm12,0+160(%rbp)
7277
7278L$seal_avx2_tail_128_rounds_and_3xhash:
7279	addq	0+0(%rdi),%r10
7280	adcq	8+0(%rdi),%r11
7281	adcq	$1,%r12
7282	movq	0+0+0(%rbp),%rdx
7283	movq	%rdx,%r15
7284	mulxq	%r10,%r13,%r14
7285	mulxq	%r11,%rax,%rdx
7286	imulq	%r12,%r15
7287	addq	%rax,%r14
7288	adcq	%rdx,%r15
7289	movq	8+0+0(%rbp),%rdx
7290	mulxq	%r10,%r10,%rax
7291	addq	%r10,%r14
7292	mulxq	%r11,%r11,%r9
7293	adcq	%r11,%r15
7294	adcq	$0,%r9
7295	imulq	%r12,%rdx
7296	addq	%rax,%r15
7297	adcq	%rdx,%r9
7298	movq	%r13,%r10
7299	movq	%r14,%r11
7300	movq	%r15,%r12
7301	andq	$3,%r12
7302	movq	%r15,%r13
7303	andq	$-4,%r13
7304	movq	%r9,%r14
7305	shrdq	$2,%r9,%r15
7306	shrq	$2,%r9
7307	addq	%r13,%r15
7308	adcq	%r14,%r9
7309	addq	%r15,%r10
7310	adcq	%r9,%r11
7311	adcq	$0,%r12
7312
7313	leaq	16(%rdi),%rdi
7314L$seal_avx2_tail_128_rounds_and_2xhash:
7315	vpaddd	%ymm4,%ymm0,%ymm0
7316	vpxor	%ymm0,%ymm12,%ymm12
7317	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7318	vpaddd	%ymm12,%ymm8,%ymm8
7319	vpxor	%ymm8,%ymm4,%ymm4
7320	vpsrld	$20,%ymm4,%ymm3
7321	vpslld	$12,%ymm4,%ymm4
7322	vpxor	%ymm3,%ymm4,%ymm4
7323	vpaddd	%ymm4,%ymm0,%ymm0
7324	vpxor	%ymm0,%ymm12,%ymm12
7325	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7326	vpaddd	%ymm12,%ymm8,%ymm8
7327	vpxor	%ymm8,%ymm4,%ymm4
7328	vpslld	$7,%ymm4,%ymm3
7329	vpsrld	$25,%ymm4,%ymm4
7330	vpxor	%ymm3,%ymm4,%ymm4
7331	vpalignr	$12,%ymm12,%ymm12,%ymm12
7332	vpalignr	$8,%ymm8,%ymm8,%ymm8
7333	vpalignr	$4,%ymm4,%ymm4,%ymm4
7334	addq	0+0(%rdi),%r10
7335	adcq	8+0(%rdi),%r11
7336	adcq	$1,%r12
7337	movq	0+0+0(%rbp),%rdx
7338	movq	%rdx,%r15
7339	mulxq	%r10,%r13,%r14
7340	mulxq	%r11,%rax,%rdx
7341	imulq	%r12,%r15
7342	addq	%rax,%r14
7343	adcq	%rdx,%r15
7344	movq	8+0+0(%rbp),%rdx
7345	mulxq	%r10,%r10,%rax
7346	addq	%r10,%r14
7347	mulxq	%r11,%r11,%r9
7348	adcq	%r11,%r15
7349	adcq	$0,%r9
7350	imulq	%r12,%rdx
7351	addq	%rax,%r15
7352	adcq	%rdx,%r9
7353	movq	%r13,%r10
7354	movq	%r14,%r11
7355	movq	%r15,%r12
7356	andq	$3,%r12
7357	movq	%r15,%r13
7358	andq	$-4,%r13
7359	movq	%r9,%r14
7360	shrdq	$2,%r9,%r15
7361	shrq	$2,%r9
7362	addq	%r13,%r15
7363	adcq	%r14,%r9
7364	addq	%r15,%r10
7365	adcq	%r9,%r11
7366	adcq	$0,%r12
7367	vpaddd	%ymm4,%ymm0,%ymm0
7368	vpxor	%ymm0,%ymm12,%ymm12
7369	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7370	vpaddd	%ymm12,%ymm8,%ymm8
7371	vpxor	%ymm8,%ymm4,%ymm4
7372	vpsrld	$20,%ymm4,%ymm3
7373	vpslld	$12,%ymm4,%ymm4
7374	vpxor	%ymm3,%ymm4,%ymm4
7375	vpaddd	%ymm4,%ymm0,%ymm0
7376	vpxor	%ymm0,%ymm12,%ymm12
7377	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7378	vpaddd	%ymm12,%ymm8,%ymm8
7379	vpxor	%ymm8,%ymm4,%ymm4
7380	vpslld	$7,%ymm4,%ymm3
7381	vpsrld	$25,%ymm4,%ymm4
7382	vpxor	%ymm3,%ymm4,%ymm4
7383	vpalignr	$4,%ymm12,%ymm12,%ymm12
7384	vpalignr	$8,%ymm8,%ymm8,%ymm8
7385	vpalignr	$12,%ymm4,%ymm4,%ymm4
7386	addq	0+16(%rdi),%r10
7387	adcq	8+16(%rdi),%r11
7388	adcq	$1,%r12
7389	movq	0+0+0(%rbp),%rdx
7390	movq	%rdx,%r15
7391	mulxq	%r10,%r13,%r14
7392	mulxq	%r11,%rax,%rdx
7393	imulq	%r12,%r15
7394	addq	%rax,%r14
7395	adcq	%rdx,%r15
7396	movq	8+0+0(%rbp),%rdx
7397	mulxq	%r10,%r10,%rax
7398	addq	%r10,%r14
7399	mulxq	%r11,%r11,%r9
7400	adcq	%r11,%r15
7401	adcq	$0,%r9
7402	imulq	%r12,%rdx
7403	addq	%rax,%r15
7404	adcq	%rdx,%r9
7405	movq	%r13,%r10
7406	movq	%r14,%r11
7407	movq	%r15,%r12
7408	andq	$3,%r12
7409	movq	%r15,%r13
7410	andq	$-4,%r13
7411	movq	%r9,%r14
7412	shrdq	$2,%r9,%r15
7413	shrq	$2,%r9
7414	addq	%r13,%r15
7415	adcq	%r14,%r9
7416	addq	%r15,%r10
7417	adcq	%r9,%r11
7418	adcq	$0,%r12
7419
7420	leaq	32(%rdi),%rdi
7421	decq	%rcx
7422	jg	L$seal_avx2_tail_128_rounds_and_3xhash
7423	decq	%r8
7424	jge	L$seal_avx2_tail_128_rounds_and_2xhash
7425	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7426	vpaddd	0+64(%rbp),%ymm4,%ymm4
7427	vpaddd	0+96(%rbp),%ymm8,%ymm8
7428	vpaddd	0+160(%rbp),%ymm12,%ymm12
7429	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7430	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7431	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7432	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7433	vmovdqa	%ymm3,%ymm8
7434
7435	jmp	L$seal_avx2_short_loop
7436
7437L$seal_avx2_tail_256:
7438	vmovdqa	L$chacha20_consts(%rip),%ymm0
7439	vmovdqa	0+64(%rbp),%ymm4
7440	vmovdqa	0+96(%rbp),%ymm8
7441	vmovdqa	%ymm0,%ymm1
7442	vmovdqa	%ymm4,%ymm5
7443	vmovdqa	%ymm8,%ymm9
7444	vmovdqa	L$avx2_inc(%rip),%ymm12
7445	vpaddd	0+160(%rbp),%ymm12,%ymm13
7446	vpaddd	%ymm13,%ymm12,%ymm12
7447	vmovdqa	%ymm12,0+160(%rbp)
7448	vmovdqa	%ymm13,0+192(%rbp)
7449
7450L$seal_avx2_tail_256_rounds_and_3xhash:
7451	addq	0+0(%rdi),%r10
7452	adcq	8+0(%rdi),%r11
7453	adcq	$1,%r12
7454	movq	0+0+0(%rbp),%rax
7455	movq	%rax,%r15
7456	mulq	%r10
7457	movq	%rax,%r13
7458	movq	%rdx,%r14
7459	movq	0+0+0(%rbp),%rax
7460	mulq	%r11
7461	imulq	%r12,%r15
7462	addq	%rax,%r14
7463	adcq	%rdx,%r15
7464	movq	8+0+0(%rbp),%rax
7465	movq	%rax,%r9
7466	mulq	%r10
7467	addq	%rax,%r14
7468	adcq	$0,%rdx
7469	movq	%rdx,%r10
7470	movq	8+0+0(%rbp),%rax
7471	mulq	%r11
7472	addq	%rax,%r15
7473	adcq	$0,%rdx
7474	imulq	%r12,%r9
7475	addq	%r10,%r15
7476	adcq	%rdx,%r9
7477	movq	%r13,%r10
7478	movq	%r14,%r11
7479	movq	%r15,%r12
7480	andq	$3,%r12
7481	movq	%r15,%r13
7482	andq	$-4,%r13
7483	movq	%r9,%r14
7484	shrdq	$2,%r9,%r15
7485	shrq	$2,%r9
7486	addq	%r13,%r15
7487	adcq	%r14,%r9
7488	addq	%r15,%r10
7489	adcq	%r9,%r11
7490	adcq	$0,%r12
7491
7492	leaq	16(%rdi),%rdi
7493L$seal_avx2_tail_256_rounds_and_2xhash:
7494	vpaddd	%ymm4,%ymm0,%ymm0
7495	vpxor	%ymm0,%ymm12,%ymm12
7496	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7497	vpaddd	%ymm12,%ymm8,%ymm8
7498	vpxor	%ymm8,%ymm4,%ymm4
7499	vpsrld	$20,%ymm4,%ymm3
7500	vpslld	$12,%ymm4,%ymm4
7501	vpxor	%ymm3,%ymm4,%ymm4
7502	vpaddd	%ymm4,%ymm0,%ymm0
7503	vpxor	%ymm0,%ymm12,%ymm12
7504	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7505	vpaddd	%ymm12,%ymm8,%ymm8
7506	vpxor	%ymm8,%ymm4,%ymm4
7507	vpslld	$7,%ymm4,%ymm3
7508	vpsrld	$25,%ymm4,%ymm4
7509	vpxor	%ymm3,%ymm4,%ymm4
7510	vpalignr	$12,%ymm12,%ymm12,%ymm12
7511	vpalignr	$8,%ymm8,%ymm8,%ymm8
7512	vpalignr	$4,%ymm4,%ymm4,%ymm4
7513	vpaddd	%ymm5,%ymm1,%ymm1
7514	vpxor	%ymm1,%ymm13,%ymm13
7515	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7516	vpaddd	%ymm13,%ymm9,%ymm9
7517	vpxor	%ymm9,%ymm5,%ymm5
7518	vpsrld	$20,%ymm5,%ymm3
7519	vpslld	$12,%ymm5,%ymm5
7520	vpxor	%ymm3,%ymm5,%ymm5
7521	vpaddd	%ymm5,%ymm1,%ymm1
7522	vpxor	%ymm1,%ymm13,%ymm13
7523	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7524	vpaddd	%ymm13,%ymm9,%ymm9
7525	vpxor	%ymm9,%ymm5,%ymm5
7526	vpslld	$7,%ymm5,%ymm3
7527	vpsrld	$25,%ymm5,%ymm5
7528	vpxor	%ymm3,%ymm5,%ymm5
7529	vpalignr	$12,%ymm13,%ymm13,%ymm13
7530	vpalignr	$8,%ymm9,%ymm9,%ymm9
7531	vpalignr	$4,%ymm5,%ymm5,%ymm5
7532	addq	0+0(%rdi),%r10
7533	adcq	8+0(%rdi),%r11
7534	adcq	$1,%r12
7535	movq	0+0+0(%rbp),%rax
7536	movq	%rax,%r15
7537	mulq	%r10
7538	movq	%rax,%r13
7539	movq	%rdx,%r14
7540	movq	0+0+0(%rbp),%rax
7541	mulq	%r11
7542	imulq	%r12,%r15
7543	addq	%rax,%r14
7544	adcq	%rdx,%r15
7545	movq	8+0+0(%rbp),%rax
7546	movq	%rax,%r9
7547	mulq	%r10
7548	addq	%rax,%r14
7549	adcq	$0,%rdx
7550	movq	%rdx,%r10
7551	movq	8+0+0(%rbp),%rax
7552	mulq	%r11
7553	addq	%rax,%r15
7554	adcq	$0,%rdx
7555	imulq	%r12,%r9
7556	addq	%r10,%r15
7557	adcq	%rdx,%r9
7558	movq	%r13,%r10
7559	movq	%r14,%r11
7560	movq	%r15,%r12
7561	andq	$3,%r12
7562	movq	%r15,%r13
7563	andq	$-4,%r13
7564	movq	%r9,%r14
7565	shrdq	$2,%r9,%r15
7566	shrq	$2,%r9
7567	addq	%r13,%r15
7568	adcq	%r14,%r9
7569	addq	%r15,%r10
7570	adcq	%r9,%r11
7571	adcq	$0,%r12
7572	vpaddd	%ymm4,%ymm0,%ymm0
7573	vpxor	%ymm0,%ymm12,%ymm12
7574	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7575	vpaddd	%ymm12,%ymm8,%ymm8
7576	vpxor	%ymm8,%ymm4,%ymm4
7577	vpsrld	$20,%ymm4,%ymm3
7578	vpslld	$12,%ymm4,%ymm4
7579	vpxor	%ymm3,%ymm4,%ymm4
7580	vpaddd	%ymm4,%ymm0,%ymm0
7581	vpxor	%ymm0,%ymm12,%ymm12
7582	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7583	vpaddd	%ymm12,%ymm8,%ymm8
7584	vpxor	%ymm8,%ymm4,%ymm4
7585	vpslld	$7,%ymm4,%ymm3
7586	vpsrld	$25,%ymm4,%ymm4
7587	vpxor	%ymm3,%ymm4,%ymm4
7588	vpalignr	$4,%ymm12,%ymm12,%ymm12
7589	vpalignr	$8,%ymm8,%ymm8,%ymm8
7590	vpalignr	$12,%ymm4,%ymm4,%ymm4
7591	vpaddd	%ymm5,%ymm1,%ymm1
7592	vpxor	%ymm1,%ymm13,%ymm13
7593	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7594	vpaddd	%ymm13,%ymm9,%ymm9
7595	vpxor	%ymm9,%ymm5,%ymm5
7596	vpsrld	$20,%ymm5,%ymm3
7597	vpslld	$12,%ymm5,%ymm5
7598	vpxor	%ymm3,%ymm5,%ymm5
7599	vpaddd	%ymm5,%ymm1,%ymm1
7600	vpxor	%ymm1,%ymm13,%ymm13
7601	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7602	vpaddd	%ymm13,%ymm9,%ymm9
7603	vpxor	%ymm9,%ymm5,%ymm5
7604	vpslld	$7,%ymm5,%ymm3
7605	vpsrld	$25,%ymm5,%ymm5
7606	vpxor	%ymm3,%ymm5,%ymm5
7607	vpalignr	$4,%ymm13,%ymm13,%ymm13
7608	vpalignr	$8,%ymm9,%ymm9,%ymm9
7609	vpalignr	$12,%ymm5,%ymm5,%ymm5
7610	addq	0+16(%rdi),%r10
7611	adcq	8+16(%rdi),%r11
7612	adcq	$1,%r12
7613	movq	0+0+0(%rbp),%rax
7614	movq	%rax,%r15
7615	mulq	%r10
7616	movq	%rax,%r13
7617	movq	%rdx,%r14
7618	movq	0+0+0(%rbp),%rax
7619	mulq	%r11
7620	imulq	%r12,%r15
7621	addq	%rax,%r14
7622	adcq	%rdx,%r15
7623	movq	8+0+0(%rbp),%rax
7624	movq	%rax,%r9
7625	mulq	%r10
7626	addq	%rax,%r14
7627	adcq	$0,%rdx
7628	movq	%rdx,%r10
7629	movq	8+0+0(%rbp),%rax
7630	mulq	%r11
7631	addq	%rax,%r15
7632	adcq	$0,%rdx
7633	imulq	%r12,%r9
7634	addq	%r10,%r15
7635	adcq	%rdx,%r9
7636	movq	%r13,%r10
7637	movq	%r14,%r11
7638	movq	%r15,%r12
7639	andq	$3,%r12
7640	movq	%r15,%r13
7641	andq	$-4,%r13
7642	movq	%r9,%r14
7643	shrdq	$2,%r9,%r15
7644	shrq	$2,%r9
7645	addq	%r13,%r15
7646	adcq	%r14,%r9
7647	addq	%r15,%r10
7648	adcq	%r9,%r11
7649	adcq	$0,%r12
7650
7651	leaq	32(%rdi),%rdi
7652	decq	%rcx
7653	jg	L$seal_avx2_tail_256_rounds_and_3xhash
7654	decq	%r8
7655	jge	L$seal_avx2_tail_256_rounds_and_2xhash
7656	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
7657	vpaddd	0+64(%rbp),%ymm5,%ymm5
7658	vpaddd	0+96(%rbp),%ymm9,%ymm9
7659	vpaddd	0+192(%rbp),%ymm13,%ymm13
7660	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7661	vpaddd	0+64(%rbp),%ymm4,%ymm4
7662	vpaddd	0+96(%rbp),%ymm8,%ymm8
7663	vpaddd	0+160(%rbp),%ymm12,%ymm12
7664	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7665	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7666	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7667	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7668	vpxor	0+0(%rsi),%ymm3,%ymm3
7669	vpxor	32+0(%rsi),%ymm1,%ymm1
7670	vpxor	64+0(%rsi),%ymm5,%ymm5
7671	vpxor	96+0(%rsi),%ymm9,%ymm9
7672	vmovdqu	%ymm3,0+0(%rdi)
7673	vmovdqu	%ymm1,32+0(%rdi)
7674	vmovdqu	%ymm5,64+0(%rdi)
7675	vmovdqu	%ymm9,96+0(%rdi)
7676	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7677	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7678	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7679	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7680	vmovdqa	%ymm3,%ymm8
7681
7682	movq	$128,%rcx
7683	leaq	128(%rsi),%rsi
7684	subq	$128,%rbx
7685	jmp	L$seal_avx2_short_hash_remainder
7686
7687L$seal_avx2_tail_384:
7688	vmovdqa	L$chacha20_consts(%rip),%ymm0
7689	vmovdqa	0+64(%rbp),%ymm4
7690	vmovdqa	0+96(%rbp),%ymm8
7691	vmovdqa	%ymm0,%ymm1
7692	vmovdqa	%ymm4,%ymm5
7693	vmovdqa	%ymm8,%ymm9
7694	vmovdqa	%ymm0,%ymm2
7695	vmovdqa	%ymm4,%ymm6
7696	vmovdqa	%ymm8,%ymm10
7697	vmovdqa	L$avx2_inc(%rip),%ymm12
7698	vpaddd	0+160(%rbp),%ymm12,%ymm14
7699	vpaddd	%ymm14,%ymm12,%ymm13
7700	vpaddd	%ymm13,%ymm12,%ymm12
7701	vmovdqa	%ymm12,0+160(%rbp)
7702	vmovdqa	%ymm13,0+192(%rbp)
7703	vmovdqa	%ymm14,0+224(%rbp)
7704
7705L$seal_avx2_tail_384_rounds_and_3xhash:
7706	addq	0+0(%rdi),%r10
7707	adcq	8+0(%rdi),%r11
7708	adcq	$1,%r12
7709	movq	0+0+0(%rbp),%rax
7710	movq	%rax,%r15
7711	mulq	%r10
7712	movq	%rax,%r13
7713	movq	%rdx,%r14
7714	movq	0+0+0(%rbp),%rax
7715	mulq	%r11
7716	imulq	%r12,%r15
7717	addq	%rax,%r14
7718	adcq	%rdx,%r15
7719	movq	8+0+0(%rbp),%rax
7720	movq	%rax,%r9
7721	mulq	%r10
7722	addq	%rax,%r14
7723	adcq	$0,%rdx
7724	movq	%rdx,%r10
7725	movq	8+0+0(%rbp),%rax
7726	mulq	%r11
7727	addq	%rax,%r15
7728	adcq	$0,%rdx
7729	imulq	%r12,%r9
7730	addq	%r10,%r15
7731	adcq	%rdx,%r9
7732	movq	%r13,%r10
7733	movq	%r14,%r11
7734	movq	%r15,%r12
7735	andq	$3,%r12
7736	movq	%r15,%r13
7737	andq	$-4,%r13
7738	movq	%r9,%r14
7739	shrdq	$2,%r9,%r15
7740	shrq	$2,%r9
7741	addq	%r13,%r15
7742	adcq	%r14,%r9
7743	addq	%r15,%r10
7744	adcq	%r9,%r11
7745	adcq	$0,%r12
7746
7747	leaq	16(%rdi),%rdi
7748L$seal_avx2_tail_384_rounds_and_2xhash:
7749	vpaddd	%ymm4,%ymm0,%ymm0
7750	vpxor	%ymm0,%ymm12,%ymm12
7751	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7752	vpaddd	%ymm12,%ymm8,%ymm8
7753	vpxor	%ymm8,%ymm4,%ymm4
7754	vpsrld	$20,%ymm4,%ymm3
7755	vpslld	$12,%ymm4,%ymm4
7756	vpxor	%ymm3,%ymm4,%ymm4
7757	vpaddd	%ymm4,%ymm0,%ymm0
7758	vpxor	%ymm0,%ymm12,%ymm12
7759	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7760	vpaddd	%ymm12,%ymm8,%ymm8
7761	vpxor	%ymm8,%ymm4,%ymm4
7762	vpslld	$7,%ymm4,%ymm3
7763	vpsrld	$25,%ymm4,%ymm4
7764	vpxor	%ymm3,%ymm4,%ymm4
7765	vpalignr	$12,%ymm12,%ymm12,%ymm12
7766	vpalignr	$8,%ymm8,%ymm8,%ymm8
7767	vpalignr	$4,%ymm4,%ymm4,%ymm4
7768	vpaddd	%ymm5,%ymm1,%ymm1
7769	vpxor	%ymm1,%ymm13,%ymm13
7770	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7771	vpaddd	%ymm13,%ymm9,%ymm9
7772	vpxor	%ymm9,%ymm5,%ymm5
7773	vpsrld	$20,%ymm5,%ymm3
7774	vpslld	$12,%ymm5,%ymm5
7775	vpxor	%ymm3,%ymm5,%ymm5
7776	vpaddd	%ymm5,%ymm1,%ymm1
7777	vpxor	%ymm1,%ymm13,%ymm13
7778	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7779	vpaddd	%ymm13,%ymm9,%ymm9
7780	vpxor	%ymm9,%ymm5,%ymm5
7781	vpslld	$7,%ymm5,%ymm3
7782	vpsrld	$25,%ymm5,%ymm5
7783	vpxor	%ymm3,%ymm5,%ymm5
7784	vpalignr	$12,%ymm13,%ymm13,%ymm13
7785	vpalignr	$8,%ymm9,%ymm9,%ymm9
7786	vpalignr	$4,%ymm5,%ymm5,%ymm5
7787	addq	0+0(%rdi),%r10
7788	adcq	8+0(%rdi),%r11
7789	adcq	$1,%r12
7790	movq	0+0+0(%rbp),%rax
7791	movq	%rax,%r15
7792	mulq	%r10
7793	movq	%rax,%r13
7794	movq	%rdx,%r14
7795	movq	0+0+0(%rbp),%rax
7796	mulq	%r11
7797	imulq	%r12,%r15
7798	addq	%rax,%r14
7799	adcq	%rdx,%r15
7800	movq	8+0+0(%rbp),%rax
7801	movq	%rax,%r9
7802	mulq	%r10
7803	addq	%rax,%r14
7804	adcq	$0,%rdx
7805	movq	%rdx,%r10
7806	movq	8+0+0(%rbp),%rax
7807	mulq	%r11
7808	addq	%rax,%r15
7809	adcq	$0,%rdx
7810	imulq	%r12,%r9
7811	addq	%r10,%r15
7812	adcq	%rdx,%r9
7813	movq	%r13,%r10
7814	movq	%r14,%r11
7815	movq	%r15,%r12
7816	andq	$3,%r12
7817	movq	%r15,%r13
7818	andq	$-4,%r13
7819	movq	%r9,%r14
7820	shrdq	$2,%r9,%r15
7821	shrq	$2,%r9
7822	addq	%r13,%r15
7823	adcq	%r14,%r9
7824	addq	%r15,%r10
7825	adcq	%r9,%r11
7826	adcq	$0,%r12
7827	vpaddd	%ymm6,%ymm2,%ymm2
7828	vpxor	%ymm2,%ymm14,%ymm14
7829	vpshufb	L$rol16(%rip),%ymm14,%ymm14
7830	vpaddd	%ymm14,%ymm10,%ymm10
7831	vpxor	%ymm10,%ymm6,%ymm6
7832	vpsrld	$20,%ymm6,%ymm3
7833	vpslld	$12,%ymm6,%ymm6
7834	vpxor	%ymm3,%ymm6,%ymm6
7835	vpaddd	%ymm6,%ymm2,%ymm2
7836	vpxor	%ymm2,%ymm14,%ymm14
7837	vpshufb	L$rol8(%rip),%ymm14,%ymm14
7838	vpaddd	%ymm14,%ymm10,%ymm10
7839	vpxor	%ymm10,%ymm6,%ymm6
7840	vpslld	$7,%ymm6,%ymm3
7841	vpsrld	$25,%ymm6,%ymm6
7842	vpxor	%ymm3,%ymm6,%ymm6
7843	vpalignr	$12,%ymm14,%ymm14,%ymm14
7844	vpalignr	$8,%ymm10,%ymm10,%ymm10
7845	vpalignr	$4,%ymm6,%ymm6,%ymm6
7846	vpaddd	%ymm4,%ymm0,%ymm0
7847	vpxor	%ymm0,%ymm12,%ymm12
7848	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7849	vpaddd	%ymm12,%ymm8,%ymm8
7850	vpxor	%ymm8,%ymm4,%ymm4
7851	vpsrld	$20,%ymm4,%ymm3
7852	vpslld	$12,%ymm4,%ymm4
7853	vpxor	%ymm3,%ymm4,%ymm4
7854	vpaddd	%ymm4,%ymm0,%ymm0
7855	vpxor	%ymm0,%ymm12,%ymm12
7856	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7857	vpaddd	%ymm12,%ymm8,%ymm8
7858	vpxor	%ymm8,%ymm4,%ymm4
7859	vpslld	$7,%ymm4,%ymm3
7860	vpsrld	$25,%ymm4,%ymm4
7861	vpxor	%ymm3,%ymm4,%ymm4
7862	vpalignr	$4,%ymm12,%ymm12,%ymm12
7863	vpalignr	$8,%ymm8,%ymm8,%ymm8
7864	vpalignr	$12,%ymm4,%ymm4,%ymm4
7865	addq	0+16(%rdi),%r10
7866	adcq	8+16(%rdi),%r11
7867	adcq	$1,%r12
7868	movq	0+0+0(%rbp),%rax
7869	movq	%rax,%r15
7870	mulq	%r10
7871	movq	%rax,%r13
7872	movq	%rdx,%r14
7873	movq	0+0+0(%rbp),%rax
7874	mulq	%r11
7875	imulq	%r12,%r15
7876	addq	%rax,%r14
7877	adcq	%rdx,%r15
7878	movq	8+0+0(%rbp),%rax
7879	movq	%rax,%r9
7880	mulq	%r10
7881	addq	%rax,%r14
7882	adcq	$0,%rdx
7883	movq	%rdx,%r10
7884	movq	8+0+0(%rbp),%rax
7885	mulq	%r11
7886	addq	%rax,%r15
7887	adcq	$0,%rdx
7888	imulq	%r12,%r9
7889	addq	%r10,%r15
7890	adcq	%rdx,%r9
7891	movq	%r13,%r10
7892	movq	%r14,%r11
7893	movq	%r15,%r12
7894	andq	$3,%r12
7895	movq	%r15,%r13
7896	andq	$-4,%r13
7897	movq	%r9,%r14
7898	shrdq	$2,%r9,%r15
7899	shrq	$2,%r9
7900	addq	%r13,%r15
7901	adcq	%r14,%r9
7902	addq	%r15,%r10
7903	adcq	%r9,%r11
7904	adcq	$0,%r12
7905	vpaddd	%ymm5,%ymm1,%ymm1
7906	vpxor	%ymm1,%ymm13,%ymm13
7907	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7908	vpaddd	%ymm13,%ymm9,%ymm9
7909	vpxor	%ymm9,%ymm5,%ymm5
7910	vpsrld	$20,%ymm5,%ymm3
7911	vpslld	$12,%ymm5,%ymm5
7912	vpxor	%ymm3,%ymm5,%ymm5
7913	vpaddd	%ymm5,%ymm1,%ymm1
7914	vpxor	%ymm1,%ymm13,%ymm13
7915	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7916	vpaddd	%ymm13,%ymm9,%ymm9
7917	vpxor	%ymm9,%ymm5,%ymm5
7918	vpslld	$7,%ymm5,%ymm3
7919	vpsrld	$25,%ymm5,%ymm5
7920	vpxor	%ymm3,%ymm5,%ymm5
7921	vpalignr	$4,%ymm13,%ymm13,%ymm13
7922	vpalignr	$8,%ymm9,%ymm9,%ymm9
7923	vpalignr	$12,%ymm5,%ymm5,%ymm5
7924	vpaddd	%ymm6,%ymm2,%ymm2
7925	vpxor	%ymm2,%ymm14,%ymm14
7926	vpshufb	L$rol16(%rip),%ymm14,%ymm14
7927	vpaddd	%ymm14,%ymm10,%ymm10
7928	vpxor	%ymm10,%ymm6,%ymm6
7929	vpsrld	$20,%ymm6,%ymm3
7930	vpslld	$12,%ymm6,%ymm6
7931	vpxor	%ymm3,%ymm6,%ymm6
7932	vpaddd	%ymm6,%ymm2,%ymm2
7933	vpxor	%ymm2,%ymm14,%ymm14
7934	vpshufb	L$rol8(%rip),%ymm14,%ymm14
7935	vpaddd	%ymm14,%ymm10,%ymm10
7936	vpxor	%ymm10,%ymm6,%ymm6
7937	vpslld	$7,%ymm6,%ymm3
7938	vpsrld	$25,%ymm6,%ymm6
7939	vpxor	%ymm3,%ymm6,%ymm6
7940	vpalignr	$4,%ymm14,%ymm14,%ymm14
7941	vpalignr	$8,%ymm10,%ymm10,%ymm10
7942	vpalignr	$12,%ymm6,%ymm6,%ymm6
7943
7944	leaq	32(%rdi),%rdi
7945	decq	%rcx
7946	jg	L$seal_avx2_tail_384_rounds_and_3xhash
7947	decq	%r8
7948	jge	L$seal_avx2_tail_384_rounds_and_2xhash
7949	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
7950	vpaddd	0+64(%rbp),%ymm6,%ymm6
7951	vpaddd	0+96(%rbp),%ymm10,%ymm10
7952	vpaddd	0+224(%rbp),%ymm14,%ymm14
7953	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
7954	vpaddd	0+64(%rbp),%ymm5,%ymm5
7955	vpaddd	0+96(%rbp),%ymm9,%ymm9
7956	vpaddd	0+192(%rbp),%ymm13,%ymm13
7957	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7958	vpaddd	0+64(%rbp),%ymm4,%ymm4
7959	vpaddd	0+96(%rbp),%ymm8,%ymm8
7960	vpaddd	0+160(%rbp),%ymm12,%ymm12
7961	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
7962	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
7963	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
7964	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
7965	vpxor	0+0(%rsi),%ymm3,%ymm3
7966	vpxor	32+0(%rsi),%ymm2,%ymm2
7967	vpxor	64+0(%rsi),%ymm6,%ymm6
7968	vpxor	96+0(%rsi),%ymm10,%ymm10
7969	vmovdqu	%ymm3,0+0(%rdi)
7970	vmovdqu	%ymm2,32+0(%rdi)
7971	vmovdqu	%ymm6,64+0(%rdi)
7972	vmovdqu	%ymm10,96+0(%rdi)
7973	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7974	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7975	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7976	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7977	vpxor	0+128(%rsi),%ymm3,%ymm3
7978	vpxor	32+128(%rsi),%ymm1,%ymm1
7979	vpxor	64+128(%rsi),%ymm5,%ymm5
7980	vpxor	96+128(%rsi),%ymm9,%ymm9
7981	vmovdqu	%ymm3,0+128(%rdi)
7982	vmovdqu	%ymm1,32+128(%rdi)
7983	vmovdqu	%ymm5,64+128(%rdi)
7984	vmovdqu	%ymm9,96+128(%rdi)
7985	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7986	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7987	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7988	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7989	vmovdqa	%ymm3,%ymm8
7990
7991	movq	$256,%rcx
7992	leaq	256(%rsi),%rsi
7993	subq	$256,%rbx
7994	jmp	L$seal_avx2_short_hash_remainder
7995
7996L$seal_avx2_tail_512:
7997	vmovdqa	L$chacha20_consts(%rip),%ymm0
7998	vmovdqa	0+64(%rbp),%ymm4
7999	vmovdqa	0+96(%rbp),%ymm8
8000	vmovdqa	%ymm0,%ymm1
8001	vmovdqa	%ymm4,%ymm5
8002	vmovdqa	%ymm8,%ymm9
8003	vmovdqa	%ymm0,%ymm2
8004	vmovdqa	%ymm4,%ymm6
8005	vmovdqa	%ymm8,%ymm10
8006	vmovdqa	%ymm0,%ymm3
8007	vmovdqa	%ymm4,%ymm7
8008	vmovdqa	%ymm8,%ymm11
8009	vmovdqa	L$avx2_inc(%rip),%ymm12
8010	vpaddd	0+160(%rbp),%ymm12,%ymm15
8011	vpaddd	%ymm15,%ymm12,%ymm14
8012	vpaddd	%ymm14,%ymm12,%ymm13
8013	vpaddd	%ymm13,%ymm12,%ymm12
8014	vmovdqa	%ymm15,0+256(%rbp)
8015	vmovdqa	%ymm14,0+224(%rbp)
8016	vmovdqa	%ymm13,0+192(%rbp)
8017	vmovdqa	%ymm12,0+160(%rbp)
8018
8019L$seal_avx2_tail_512_rounds_and_3xhash:
8020	addq	0+0(%rdi),%r10
8021	adcq	8+0(%rdi),%r11
8022	adcq	$1,%r12
8023	movq	0+0+0(%rbp),%rdx
8024	movq	%rdx,%r15
8025	mulxq	%r10,%r13,%r14
8026	mulxq	%r11,%rax,%rdx
8027	imulq	%r12,%r15
8028	addq	%rax,%r14
8029	adcq	%rdx,%r15
8030	movq	8+0+0(%rbp),%rdx
8031	mulxq	%r10,%r10,%rax
8032	addq	%r10,%r14
8033	mulxq	%r11,%r11,%r9
8034	adcq	%r11,%r15
8035	adcq	$0,%r9
8036	imulq	%r12,%rdx
8037	addq	%rax,%r15
8038	adcq	%rdx,%r9
8039	movq	%r13,%r10
8040	movq	%r14,%r11
8041	movq	%r15,%r12
8042	andq	$3,%r12
8043	movq	%r15,%r13
8044	andq	$-4,%r13
8045	movq	%r9,%r14
8046	shrdq	$2,%r9,%r15
8047	shrq	$2,%r9
8048	addq	%r13,%r15
8049	adcq	%r14,%r9
8050	addq	%r15,%r10
8051	adcq	%r9,%r11
8052	adcq	$0,%r12
8053
8054	leaq	16(%rdi),%rdi
8055L$seal_avx2_tail_512_rounds_and_2xhash:
8056	vmovdqa	%ymm8,0+128(%rbp)
8057	vmovdqa	L$rol16(%rip),%ymm8
8058	vpaddd	%ymm7,%ymm3,%ymm3
8059	vpaddd	%ymm6,%ymm2,%ymm2
8060	vpaddd	%ymm5,%ymm1,%ymm1
8061	vpaddd	%ymm4,%ymm0,%ymm0
8062	vpxor	%ymm3,%ymm15,%ymm15
8063	vpxor	%ymm2,%ymm14,%ymm14
8064	vpxor	%ymm1,%ymm13,%ymm13
8065	vpxor	%ymm0,%ymm12,%ymm12
8066	vpshufb	%ymm8,%ymm15,%ymm15
8067	vpshufb	%ymm8,%ymm14,%ymm14
8068	vpshufb	%ymm8,%ymm13,%ymm13
8069	vpshufb	%ymm8,%ymm12,%ymm12
8070	vpaddd	%ymm15,%ymm11,%ymm11
8071	vpaddd	%ymm14,%ymm10,%ymm10
8072	vpaddd	%ymm13,%ymm9,%ymm9
8073	vpaddd	0+128(%rbp),%ymm12,%ymm8
8074	vpxor	%ymm11,%ymm7,%ymm7
8075	vpxor	%ymm10,%ymm6,%ymm6
8076	addq	0+0(%rdi),%r10
8077	adcq	8+0(%rdi),%r11
8078	adcq	$1,%r12
8079	vpxor	%ymm9,%ymm5,%ymm5
8080	vpxor	%ymm8,%ymm4,%ymm4
8081	vmovdqa	%ymm8,0+128(%rbp)
8082	vpsrld	$20,%ymm7,%ymm8
8083	vpslld	$32-20,%ymm7,%ymm7
8084	vpxor	%ymm8,%ymm7,%ymm7
8085	vpsrld	$20,%ymm6,%ymm8
8086	vpslld	$32-20,%ymm6,%ymm6
8087	vpxor	%ymm8,%ymm6,%ymm6
8088	vpsrld	$20,%ymm5,%ymm8
8089	vpslld	$32-20,%ymm5,%ymm5
8090	vpxor	%ymm8,%ymm5,%ymm5
8091	vpsrld	$20,%ymm4,%ymm8
8092	vpslld	$32-20,%ymm4,%ymm4
8093	vpxor	%ymm8,%ymm4,%ymm4
8094	vmovdqa	L$rol8(%rip),%ymm8
8095	vpaddd	%ymm7,%ymm3,%ymm3
8096	vpaddd	%ymm6,%ymm2,%ymm2
8097	vpaddd	%ymm5,%ymm1,%ymm1
8098	vpaddd	%ymm4,%ymm0,%ymm0
8099	movq	0+0+0(%rbp),%rdx
8100	movq	%rdx,%r15
8101	mulxq	%r10,%r13,%r14
8102	mulxq	%r11,%rax,%rdx
8103	imulq	%r12,%r15
8104	addq	%rax,%r14
8105	adcq	%rdx,%r15
8106	vpxor	%ymm3,%ymm15,%ymm15
8107	vpxor	%ymm2,%ymm14,%ymm14
8108	vpxor	%ymm1,%ymm13,%ymm13
8109	vpxor	%ymm0,%ymm12,%ymm12
8110	vpshufb	%ymm8,%ymm15,%ymm15
8111	vpshufb	%ymm8,%ymm14,%ymm14
8112	vpshufb	%ymm8,%ymm13,%ymm13
8113	vpshufb	%ymm8,%ymm12,%ymm12
8114	vpaddd	%ymm15,%ymm11,%ymm11
8115	vpaddd	%ymm14,%ymm10,%ymm10
8116	vpaddd	%ymm13,%ymm9,%ymm9
8117	vpaddd	0+128(%rbp),%ymm12,%ymm8
8118	vpxor	%ymm11,%ymm7,%ymm7
8119	vpxor	%ymm10,%ymm6,%ymm6
8120	vpxor	%ymm9,%ymm5,%ymm5
8121	vpxor	%ymm8,%ymm4,%ymm4
8122	vmovdqa	%ymm8,0+128(%rbp)
8123	vpsrld	$25,%ymm7,%ymm8
8124	vpslld	$32-25,%ymm7,%ymm7
8125	vpxor	%ymm8,%ymm7,%ymm7
8126	movq	8+0+0(%rbp),%rdx
8127	mulxq	%r10,%r10,%rax
8128	addq	%r10,%r14
8129	mulxq	%r11,%r11,%r9
8130	adcq	%r11,%r15
8131	adcq	$0,%r9
8132	imulq	%r12,%rdx
8133	vpsrld	$25,%ymm6,%ymm8
8134	vpslld	$32-25,%ymm6,%ymm6
8135	vpxor	%ymm8,%ymm6,%ymm6
8136	vpsrld	$25,%ymm5,%ymm8
8137	vpslld	$32-25,%ymm5,%ymm5
8138	vpxor	%ymm8,%ymm5,%ymm5
8139	vpsrld	$25,%ymm4,%ymm8
8140	vpslld	$32-25,%ymm4,%ymm4
8141	vpxor	%ymm8,%ymm4,%ymm4
8142	vmovdqa	0+128(%rbp),%ymm8
8143	vpalignr	$4,%ymm7,%ymm7,%ymm7
8144	vpalignr	$8,%ymm11,%ymm11,%ymm11
8145	vpalignr	$12,%ymm15,%ymm15,%ymm15
8146	vpalignr	$4,%ymm6,%ymm6,%ymm6
8147	vpalignr	$8,%ymm10,%ymm10,%ymm10
8148	vpalignr	$12,%ymm14,%ymm14,%ymm14
8149	vpalignr	$4,%ymm5,%ymm5,%ymm5
8150	vpalignr	$8,%ymm9,%ymm9,%ymm9
8151	vpalignr	$12,%ymm13,%ymm13,%ymm13
8152	vpalignr	$4,%ymm4,%ymm4,%ymm4
8153	addq	%rax,%r15
8154	adcq	%rdx,%r9
8155	vpalignr	$8,%ymm8,%ymm8,%ymm8
8156	vpalignr	$12,%ymm12,%ymm12,%ymm12
8157	vmovdqa	%ymm8,0+128(%rbp)
8158	vmovdqa	L$rol16(%rip),%ymm8
8159	vpaddd	%ymm7,%ymm3,%ymm3
8160	vpaddd	%ymm6,%ymm2,%ymm2
8161	vpaddd	%ymm5,%ymm1,%ymm1
8162	vpaddd	%ymm4,%ymm0,%ymm0
8163	vpxor	%ymm3,%ymm15,%ymm15
8164	vpxor	%ymm2,%ymm14,%ymm14
8165	vpxor	%ymm1,%ymm13,%ymm13
8166	vpxor	%ymm0,%ymm12,%ymm12
8167	vpshufb	%ymm8,%ymm15,%ymm15
8168	vpshufb	%ymm8,%ymm14,%ymm14
8169	vpshufb	%ymm8,%ymm13,%ymm13
8170	vpshufb	%ymm8,%ymm12,%ymm12
8171	vpaddd	%ymm15,%ymm11,%ymm11
8172	vpaddd	%ymm14,%ymm10,%ymm10
8173	vpaddd	%ymm13,%ymm9,%ymm9
8174	vpaddd	0+128(%rbp),%ymm12,%ymm8
8175	movq	%r13,%r10
8176	movq	%r14,%r11
8177	movq	%r15,%r12
8178	andq	$3,%r12
8179	movq	%r15,%r13
8180	andq	$-4,%r13
8181	movq	%r9,%r14
8182	shrdq	$2,%r9,%r15
8183	shrq	$2,%r9
8184	addq	%r13,%r15
8185	adcq	%r14,%r9
8186	addq	%r15,%r10
8187	adcq	%r9,%r11
8188	adcq	$0,%r12
8189	vpxor	%ymm11,%ymm7,%ymm7
8190	vpxor	%ymm10,%ymm6,%ymm6
8191	vpxor	%ymm9,%ymm5,%ymm5
8192	vpxor	%ymm8,%ymm4,%ymm4
8193	vmovdqa	%ymm8,0+128(%rbp)
8194	vpsrld	$20,%ymm7,%ymm8
8195	vpslld	$32-20,%ymm7,%ymm7
8196	vpxor	%ymm8,%ymm7,%ymm7
8197	vpsrld	$20,%ymm6,%ymm8
8198	vpslld	$32-20,%ymm6,%ymm6
8199	vpxor	%ymm8,%ymm6,%ymm6
8200	vpsrld	$20,%ymm5,%ymm8
8201	vpslld	$32-20,%ymm5,%ymm5
8202	vpxor	%ymm8,%ymm5,%ymm5
8203	vpsrld	$20,%ymm4,%ymm8
8204	vpslld	$32-20,%ymm4,%ymm4
8205	vpxor	%ymm8,%ymm4,%ymm4
8206	vmovdqa	L$rol8(%rip),%ymm8
8207	vpaddd	%ymm7,%ymm3,%ymm3
8208	vpaddd	%ymm6,%ymm2,%ymm2
8209	addq	0+16(%rdi),%r10
8210	adcq	8+16(%rdi),%r11
8211	adcq	$1,%r12
8212	vpaddd	%ymm5,%ymm1,%ymm1
8213	vpaddd	%ymm4,%ymm0,%ymm0
8214	vpxor	%ymm3,%ymm15,%ymm15
8215	vpxor	%ymm2,%ymm14,%ymm14
8216	vpxor	%ymm1,%ymm13,%ymm13
8217	vpxor	%ymm0,%ymm12,%ymm12
8218	vpshufb	%ymm8,%ymm15,%ymm15
8219	vpshufb	%ymm8,%ymm14,%ymm14
8220	vpshufb	%ymm8,%ymm13,%ymm13
8221	vpshufb	%ymm8,%ymm12,%ymm12
8222	vpaddd	%ymm15,%ymm11,%ymm11
8223	vpaddd	%ymm14,%ymm10,%ymm10
8224	vpaddd	%ymm13,%ymm9,%ymm9
8225	vpaddd	0+128(%rbp),%ymm12,%ymm8
8226	vpxor	%ymm11,%ymm7,%ymm7
8227	vpxor	%ymm10,%ymm6,%ymm6
8228	vpxor	%ymm9,%ymm5,%ymm5
8229	vpxor	%ymm8,%ymm4,%ymm4
8230	vmovdqa	%ymm8,0+128(%rbp)
8231	vpsrld	$25,%ymm7,%ymm8
8232	movq	0+0+0(%rbp),%rdx
8233	movq	%rdx,%r15
8234	mulxq	%r10,%r13,%r14
8235	mulxq	%r11,%rax,%rdx
8236	imulq	%r12,%r15
8237	addq	%rax,%r14
8238	adcq	%rdx,%r15
8239	vpslld	$32-25,%ymm7,%ymm7
8240	vpxor	%ymm8,%ymm7,%ymm7
8241	vpsrld	$25,%ymm6,%ymm8
8242	vpslld	$32-25,%ymm6,%ymm6
8243	vpxor	%ymm8,%ymm6,%ymm6
8244	vpsrld	$25,%ymm5,%ymm8
8245	vpslld	$32-25,%ymm5,%ymm5
8246	vpxor	%ymm8,%ymm5,%ymm5
8247	vpsrld	$25,%ymm4,%ymm8
8248	vpslld	$32-25,%ymm4,%ymm4
8249	vpxor	%ymm8,%ymm4,%ymm4
8250	vmovdqa	0+128(%rbp),%ymm8
8251	vpalignr	$12,%ymm7,%ymm7,%ymm7
8252	vpalignr	$8,%ymm11,%ymm11,%ymm11
8253	vpalignr	$4,%ymm15,%ymm15,%ymm15
8254	vpalignr	$12,%ymm6,%ymm6,%ymm6
8255	vpalignr	$8,%ymm10,%ymm10,%ymm10
8256	vpalignr	$4,%ymm14,%ymm14,%ymm14
8257	vpalignr	$12,%ymm5,%ymm5,%ymm5
8258	vpalignr	$8,%ymm9,%ymm9,%ymm9
8259	movq	8+0+0(%rbp),%rdx
8260	mulxq	%r10,%r10,%rax
8261	addq	%r10,%r14
8262	mulxq	%r11,%r11,%r9
8263	adcq	%r11,%r15
8264	adcq	$0,%r9
8265	imulq	%r12,%rdx
8266	vpalignr	$4,%ymm13,%ymm13,%ymm13
8267	vpalignr	$12,%ymm4,%ymm4,%ymm4
8268	vpalignr	$8,%ymm8,%ymm8,%ymm8
8269	vpalignr	$4,%ymm12,%ymm12,%ymm12
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286	addq	%rax,%r15
8287	adcq	%rdx,%r9
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308	movq	%r13,%r10
8309	movq	%r14,%r11
8310	movq	%r15,%r12
8311	andq	$3,%r12
8312	movq	%r15,%r13
8313	andq	$-4,%r13
8314	movq	%r9,%r14
8315	shrdq	$2,%r9,%r15
8316	shrq	$2,%r9
8317	addq	%r13,%r15
8318	adcq	%r14,%r9
8319	addq	%r15,%r10
8320	adcq	%r9,%r11
8321	adcq	$0,%r12
8322
8323	leaq	32(%rdi),%rdi
8324	decq	%rcx
8325	jg	L$seal_avx2_tail_512_rounds_and_3xhash
8326	decq	%r8
8327	jge	L$seal_avx2_tail_512_rounds_and_2xhash
8328	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
8329	vpaddd	0+64(%rbp),%ymm7,%ymm7
8330	vpaddd	0+96(%rbp),%ymm11,%ymm11
8331	vpaddd	0+256(%rbp),%ymm15,%ymm15
8332	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
8333	vpaddd	0+64(%rbp),%ymm6,%ymm6
8334	vpaddd	0+96(%rbp),%ymm10,%ymm10
8335	vpaddd	0+224(%rbp),%ymm14,%ymm14
8336	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
8337	vpaddd	0+64(%rbp),%ymm5,%ymm5
8338	vpaddd	0+96(%rbp),%ymm9,%ymm9
8339	vpaddd	0+192(%rbp),%ymm13,%ymm13
8340	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
8341	vpaddd	0+64(%rbp),%ymm4,%ymm4
8342	vpaddd	0+96(%rbp),%ymm8,%ymm8
8343	vpaddd	0+160(%rbp),%ymm12,%ymm12
8344
8345	vmovdqa	%ymm0,0+128(%rbp)
8346	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
8347	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
8348	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
8349	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
8350	vpxor	0+0(%rsi),%ymm0,%ymm0
8351	vpxor	32+0(%rsi),%ymm3,%ymm3
8352	vpxor	64+0(%rsi),%ymm7,%ymm7
8353	vpxor	96+0(%rsi),%ymm11,%ymm11
8354	vmovdqu	%ymm0,0+0(%rdi)
8355	vmovdqu	%ymm3,32+0(%rdi)
8356	vmovdqu	%ymm7,64+0(%rdi)
8357	vmovdqu	%ymm11,96+0(%rdi)
8358
8359	vmovdqa	0+128(%rbp),%ymm0
8360	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
8361	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
8362	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
8363	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
8364	vpxor	0+128(%rsi),%ymm3,%ymm3
8365	vpxor	32+128(%rsi),%ymm2,%ymm2
8366	vpxor	64+128(%rsi),%ymm6,%ymm6
8367	vpxor	96+128(%rsi),%ymm10,%ymm10
8368	vmovdqu	%ymm3,0+128(%rdi)
8369	vmovdqu	%ymm2,32+128(%rdi)
8370	vmovdqu	%ymm6,64+128(%rdi)
8371	vmovdqu	%ymm10,96+128(%rdi)
8372	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
8373	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
8374	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
8375	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
8376	vpxor	0+256(%rsi),%ymm3,%ymm3
8377	vpxor	32+256(%rsi),%ymm1,%ymm1
8378	vpxor	64+256(%rsi),%ymm5,%ymm5
8379	vpxor	96+256(%rsi),%ymm9,%ymm9
8380	vmovdqu	%ymm3,0+256(%rdi)
8381	vmovdqu	%ymm1,32+256(%rdi)
8382	vmovdqu	%ymm5,64+256(%rdi)
8383	vmovdqu	%ymm9,96+256(%rdi)
8384	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
8385	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
8386	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
8387	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
8388	vmovdqa	%ymm3,%ymm8
8389
8390	movq	$384,%rcx
8391	leaq	384(%rsi),%rsi
8392	subq	$384,%rbx
8393	jmp	L$seal_avx2_short_hash_remainder
8394
8395L$seal_avx2_320:
8396	vmovdqa	%ymm0,%ymm1
8397	vmovdqa	%ymm0,%ymm2
8398	vmovdqa	%ymm4,%ymm5
8399	vmovdqa	%ymm4,%ymm6
8400	vmovdqa	%ymm8,%ymm9
8401	vmovdqa	%ymm8,%ymm10
8402	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
8403	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm14
8404	vmovdqa	%ymm4,%ymm7
8405	vmovdqa	%ymm8,%ymm11
8406	vmovdqa	%ymm12,0+160(%rbp)
8407	vmovdqa	%ymm13,0+192(%rbp)
8408	vmovdqa	%ymm14,0+224(%rbp)
8409	movq	$10,%r10
8410L$seal_avx2_320_rounds:
8411	vpaddd	%ymm4,%ymm0,%ymm0
8412	vpxor	%ymm0,%ymm12,%ymm12
8413	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8414	vpaddd	%ymm12,%ymm8,%ymm8
8415	vpxor	%ymm8,%ymm4,%ymm4
8416	vpsrld	$20,%ymm4,%ymm3
8417	vpslld	$12,%ymm4,%ymm4
8418	vpxor	%ymm3,%ymm4,%ymm4
8419	vpaddd	%ymm4,%ymm0,%ymm0
8420	vpxor	%ymm0,%ymm12,%ymm12
8421	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8422	vpaddd	%ymm12,%ymm8,%ymm8
8423	vpxor	%ymm8,%ymm4,%ymm4
8424	vpslld	$7,%ymm4,%ymm3
8425	vpsrld	$25,%ymm4,%ymm4
8426	vpxor	%ymm3,%ymm4,%ymm4
8427	vpalignr	$12,%ymm12,%ymm12,%ymm12
8428	vpalignr	$8,%ymm8,%ymm8,%ymm8
8429	vpalignr	$4,%ymm4,%ymm4,%ymm4
8430	vpaddd	%ymm5,%ymm1,%ymm1
8431	vpxor	%ymm1,%ymm13,%ymm13
8432	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8433	vpaddd	%ymm13,%ymm9,%ymm9
8434	vpxor	%ymm9,%ymm5,%ymm5
8435	vpsrld	$20,%ymm5,%ymm3
8436	vpslld	$12,%ymm5,%ymm5
8437	vpxor	%ymm3,%ymm5,%ymm5
8438	vpaddd	%ymm5,%ymm1,%ymm1
8439	vpxor	%ymm1,%ymm13,%ymm13
8440	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8441	vpaddd	%ymm13,%ymm9,%ymm9
8442	vpxor	%ymm9,%ymm5,%ymm5
8443	vpslld	$7,%ymm5,%ymm3
8444	vpsrld	$25,%ymm5,%ymm5
8445	vpxor	%ymm3,%ymm5,%ymm5
8446	vpalignr	$12,%ymm13,%ymm13,%ymm13
8447	vpalignr	$8,%ymm9,%ymm9,%ymm9
8448	vpalignr	$4,%ymm5,%ymm5,%ymm5
8449	vpaddd	%ymm6,%ymm2,%ymm2
8450	vpxor	%ymm2,%ymm14,%ymm14
8451	vpshufb	L$rol16(%rip),%ymm14,%ymm14
8452	vpaddd	%ymm14,%ymm10,%ymm10
8453	vpxor	%ymm10,%ymm6,%ymm6
8454	vpsrld	$20,%ymm6,%ymm3
8455	vpslld	$12,%ymm6,%ymm6
8456	vpxor	%ymm3,%ymm6,%ymm6
8457	vpaddd	%ymm6,%ymm2,%ymm2
8458	vpxor	%ymm2,%ymm14,%ymm14
8459	vpshufb	L$rol8(%rip),%ymm14,%ymm14
8460	vpaddd	%ymm14,%ymm10,%ymm10
8461	vpxor	%ymm10,%ymm6,%ymm6
8462	vpslld	$7,%ymm6,%ymm3
8463	vpsrld	$25,%ymm6,%ymm6
8464	vpxor	%ymm3,%ymm6,%ymm6
8465	vpalignr	$12,%ymm14,%ymm14,%ymm14
8466	vpalignr	$8,%ymm10,%ymm10,%ymm10
8467	vpalignr	$4,%ymm6,%ymm6,%ymm6
8468	vpaddd	%ymm4,%ymm0,%ymm0
8469	vpxor	%ymm0,%ymm12,%ymm12
8470	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8471	vpaddd	%ymm12,%ymm8,%ymm8
8472	vpxor	%ymm8,%ymm4,%ymm4
8473	vpsrld	$20,%ymm4,%ymm3
8474	vpslld	$12,%ymm4,%ymm4
8475	vpxor	%ymm3,%ymm4,%ymm4
8476	vpaddd	%ymm4,%ymm0,%ymm0
8477	vpxor	%ymm0,%ymm12,%ymm12
8478	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8479	vpaddd	%ymm12,%ymm8,%ymm8
8480	vpxor	%ymm8,%ymm4,%ymm4
8481	vpslld	$7,%ymm4,%ymm3
8482	vpsrld	$25,%ymm4,%ymm4
8483	vpxor	%ymm3,%ymm4,%ymm4
8484	vpalignr	$4,%ymm12,%ymm12,%ymm12
8485	vpalignr	$8,%ymm8,%ymm8,%ymm8
8486	vpalignr	$12,%ymm4,%ymm4,%ymm4
8487	vpaddd	%ymm5,%ymm1,%ymm1
8488	vpxor	%ymm1,%ymm13,%ymm13
8489	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8490	vpaddd	%ymm13,%ymm9,%ymm9
8491	vpxor	%ymm9,%ymm5,%ymm5
8492	vpsrld	$20,%ymm5,%ymm3
8493	vpslld	$12,%ymm5,%ymm5
8494	vpxor	%ymm3,%ymm5,%ymm5
8495	vpaddd	%ymm5,%ymm1,%ymm1
8496	vpxor	%ymm1,%ymm13,%ymm13
8497	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8498	vpaddd	%ymm13,%ymm9,%ymm9
8499	vpxor	%ymm9,%ymm5,%ymm5
8500	vpslld	$7,%ymm5,%ymm3
8501	vpsrld	$25,%ymm5,%ymm5
8502	vpxor	%ymm3,%ymm5,%ymm5
8503	vpalignr	$4,%ymm13,%ymm13,%ymm13
8504	vpalignr	$8,%ymm9,%ymm9,%ymm9
8505	vpalignr	$12,%ymm5,%ymm5,%ymm5
8506	vpaddd	%ymm6,%ymm2,%ymm2
8507	vpxor	%ymm2,%ymm14,%ymm14
8508	vpshufb	L$rol16(%rip),%ymm14,%ymm14
8509	vpaddd	%ymm14,%ymm10,%ymm10
8510	vpxor	%ymm10,%ymm6,%ymm6
8511	vpsrld	$20,%ymm6,%ymm3
8512	vpslld	$12,%ymm6,%ymm6
8513	vpxor	%ymm3,%ymm6,%ymm6
8514	vpaddd	%ymm6,%ymm2,%ymm2
8515	vpxor	%ymm2,%ymm14,%ymm14
8516	vpshufb	L$rol8(%rip),%ymm14,%ymm14
8517	vpaddd	%ymm14,%ymm10,%ymm10
8518	vpxor	%ymm10,%ymm6,%ymm6
8519	vpslld	$7,%ymm6,%ymm3
8520	vpsrld	$25,%ymm6,%ymm6
8521	vpxor	%ymm3,%ymm6,%ymm6
8522	vpalignr	$4,%ymm14,%ymm14,%ymm14
8523	vpalignr	$8,%ymm10,%ymm10,%ymm10
8524	vpalignr	$12,%ymm6,%ymm6,%ymm6
8525
8526	decq	%r10
8527	jne	L$seal_avx2_320_rounds
8528	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
8529	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
8530	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
8531	vpaddd	%ymm7,%ymm4,%ymm4
8532	vpaddd	%ymm7,%ymm5,%ymm5
8533	vpaddd	%ymm7,%ymm6,%ymm6
8534	vpaddd	%ymm11,%ymm8,%ymm8
8535	vpaddd	%ymm11,%ymm9,%ymm9
8536	vpaddd	%ymm11,%ymm10,%ymm10
8537	vpaddd	0+160(%rbp),%ymm12,%ymm12
8538	vpaddd	0+192(%rbp),%ymm13,%ymm13
8539	vpaddd	0+224(%rbp),%ymm14,%ymm14
8540	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8541
8542	vpand	L$clamp(%rip),%ymm3,%ymm3
8543	vmovdqa	%ymm3,0+0(%rbp)
8544
8545	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8546	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8547	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8548	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8549	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8550	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8551	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
8552	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
8553	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
8554	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
8555	jmp	L$seal_avx2_short
8556
8557L$seal_avx2_192:
8558	vmovdqa	%ymm0,%ymm1
8559	vmovdqa	%ymm0,%ymm2
8560	vmovdqa	%ymm4,%ymm5
8561	vmovdqa	%ymm4,%ymm6
8562	vmovdqa	%ymm8,%ymm9
8563	vmovdqa	%ymm8,%ymm10
8564	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
8565	vmovdqa	%ymm12,%ymm11
8566	vmovdqa	%ymm13,%ymm15
8567	movq	$10,%r10
8568L$seal_avx2_192_rounds:
8569	vpaddd	%ymm4,%ymm0,%ymm0
8570	vpxor	%ymm0,%ymm12,%ymm12
8571	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8572	vpaddd	%ymm12,%ymm8,%ymm8
8573	vpxor	%ymm8,%ymm4,%ymm4
8574	vpsrld	$20,%ymm4,%ymm3
8575	vpslld	$12,%ymm4,%ymm4
8576	vpxor	%ymm3,%ymm4,%ymm4
8577	vpaddd	%ymm4,%ymm0,%ymm0
8578	vpxor	%ymm0,%ymm12,%ymm12
8579	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8580	vpaddd	%ymm12,%ymm8,%ymm8
8581	vpxor	%ymm8,%ymm4,%ymm4
8582	vpslld	$7,%ymm4,%ymm3
8583	vpsrld	$25,%ymm4,%ymm4
8584	vpxor	%ymm3,%ymm4,%ymm4
8585	vpalignr	$12,%ymm12,%ymm12,%ymm12
8586	vpalignr	$8,%ymm8,%ymm8,%ymm8
8587	vpalignr	$4,%ymm4,%ymm4,%ymm4
8588	vpaddd	%ymm5,%ymm1,%ymm1
8589	vpxor	%ymm1,%ymm13,%ymm13
8590	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8591	vpaddd	%ymm13,%ymm9,%ymm9
8592	vpxor	%ymm9,%ymm5,%ymm5
8593	vpsrld	$20,%ymm5,%ymm3
8594	vpslld	$12,%ymm5,%ymm5
8595	vpxor	%ymm3,%ymm5,%ymm5
8596	vpaddd	%ymm5,%ymm1,%ymm1
8597	vpxor	%ymm1,%ymm13,%ymm13
8598	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8599	vpaddd	%ymm13,%ymm9,%ymm9
8600	vpxor	%ymm9,%ymm5,%ymm5
8601	vpslld	$7,%ymm5,%ymm3
8602	vpsrld	$25,%ymm5,%ymm5
8603	vpxor	%ymm3,%ymm5,%ymm5
8604	vpalignr	$12,%ymm13,%ymm13,%ymm13
8605	vpalignr	$8,%ymm9,%ymm9,%ymm9
8606	vpalignr	$4,%ymm5,%ymm5,%ymm5
8607	vpaddd	%ymm4,%ymm0,%ymm0
8608	vpxor	%ymm0,%ymm12,%ymm12
8609	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8610	vpaddd	%ymm12,%ymm8,%ymm8
8611	vpxor	%ymm8,%ymm4,%ymm4
8612	vpsrld	$20,%ymm4,%ymm3
8613	vpslld	$12,%ymm4,%ymm4
8614	vpxor	%ymm3,%ymm4,%ymm4
8615	vpaddd	%ymm4,%ymm0,%ymm0
8616	vpxor	%ymm0,%ymm12,%ymm12
8617	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8618	vpaddd	%ymm12,%ymm8,%ymm8
8619	vpxor	%ymm8,%ymm4,%ymm4
8620	vpslld	$7,%ymm4,%ymm3
8621	vpsrld	$25,%ymm4,%ymm4
8622	vpxor	%ymm3,%ymm4,%ymm4
8623	vpalignr	$4,%ymm12,%ymm12,%ymm12
8624	vpalignr	$8,%ymm8,%ymm8,%ymm8
8625	vpalignr	$12,%ymm4,%ymm4,%ymm4
8626	vpaddd	%ymm5,%ymm1,%ymm1
8627	vpxor	%ymm1,%ymm13,%ymm13
8628	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8629	vpaddd	%ymm13,%ymm9,%ymm9
8630	vpxor	%ymm9,%ymm5,%ymm5
8631	vpsrld	$20,%ymm5,%ymm3
8632	vpslld	$12,%ymm5,%ymm5
8633	vpxor	%ymm3,%ymm5,%ymm5
8634	vpaddd	%ymm5,%ymm1,%ymm1
8635	vpxor	%ymm1,%ymm13,%ymm13
8636	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8637	vpaddd	%ymm13,%ymm9,%ymm9
8638	vpxor	%ymm9,%ymm5,%ymm5
8639	vpslld	$7,%ymm5,%ymm3
8640	vpsrld	$25,%ymm5,%ymm5
8641	vpxor	%ymm3,%ymm5,%ymm5
8642	vpalignr	$4,%ymm13,%ymm13,%ymm13
8643	vpalignr	$8,%ymm9,%ymm9,%ymm9
8644	vpalignr	$12,%ymm5,%ymm5,%ymm5
8645
8646	decq	%r10
8647	jne	L$seal_avx2_192_rounds
8648	vpaddd	%ymm2,%ymm0,%ymm0
8649	vpaddd	%ymm2,%ymm1,%ymm1
8650	vpaddd	%ymm6,%ymm4,%ymm4
8651	vpaddd	%ymm6,%ymm5,%ymm5
8652	vpaddd	%ymm10,%ymm8,%ymm8
8653	vpaddd	%ymm10,%ymm9,%ymm9
8654	vpaddd	%ymm11,%ymm12,%ymm12
8655	vpaddd	%ymm15,%ymm13,%ymm13
8656	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8657
8658	vpand	L$clamp(%rip),%ymm3,%ymm3
8659	vmovdqa	%ymm3,0+0(%rbp)
8660
8661	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8662	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8663	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8664	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8665	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8666	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8667L$seal_avx2_short:
8668	movq	%r8,%r8
8669	call	poly_hash_ad_internal
8670	xorq	%rcx,%rcx
8671L$seal_avx2_short_hash_remainder:
8672	cmpq	$16,%rcx
8673	jb	L$seal_avx2_short_loop
8674	addq	0+0(%rdi),%r10
8675	adcq	8+0(%rdi),%r11
8676	adcq	$1,%r12
8677	movq	0+0+0(%rbp),%rax
8678	movq	%rax,%r15
8679	mulq	%r10
8680	movq	%rax,%r13
8681	movq	%rdx,%r14
8682	movq	0+0+0(%rbp),%rax
8683	mulq	%r11
8684	imulq	%r12,%r15
8685	addq	%rax,%r14
8686	adcq	%rdx,%r15
8687	movq	8+0+0(%rbp),%rax
8688	movq	%rax,%r9
8689	mulq	%r10
8690	addq	%rax,%r14
8691	adcq	$0,%rdx
8692	movq	%rdx,%r10
8693	movq	8+0+0(%rbp),%rax
8694	mulq	%r11
8695	addq	%rax,%r15
8696	adcq	$0,%rdx
8697	imulq	%r12,%r9
8698	addq	%r10,%r15
8699	adcq	%rdx,%r9
8700	movq	%r13,%r10
8701	movq	%r14,%r11
8702	movq	%r15,%r12
8703	andq	$3,%r12
8704	movq	%r15,%r13
8705	andq	$-4,%r13
8706	movq	%r9,%r14
8707	shrdq	$2,%r9,%r15
8708	shrq	$2,%r9
8709	addq	%r13,%r15
8710	adcq	%r14,%r9
8711	addq	%r15,%r10
8712	adcq	%r9,%r11
8713	adcq	$0,%r12
8714
8715	subq	$16,%rcx
8716	addq	$16,%rdi
8717	jmp	L$seal_avx2_short_hash_remainder
8718L$seal_avx2_short_loop:
8719	cmpq	$32,%rbx
8720	jb	L$seal_avx2_short_tail
8721	subq	$32,%rbx
8722
8723	vpxor	(%rsi),%ymm0,%ymm0
8724	vmovdqu	%ymm0,(%rdi)
8725	leaq	32(%rsi),%rsi
8726
8727	addq	0+0(%rdi),%r10
8728	adcq	8+0(%rdi),%r11
8729	adcq	$1,%r12
8730	movq	0+0+0(%rbp),%rax
8731	movq	%rax,%r15
8732	mulq	%r10
8733	movq	%rax,%r13
8734	movq	%rdx,%r14
8735	movq	0+0+0(%rbp),%rax
8736	mulq	%r11
8737	imulq	%r12,%r15
8738	addq	%rax,%r14
8739	adcq	%rdx,%r15
8740	movq	8+0+0(%rbp),%rax
8741	movq	%rax,%r9
8742	mulq	%r10
8743	addq	%rax,%r14
8744	adcq	$0,%rdx
8745	movq	%rdx,%r10
8746	movq	8+0+0(%rbp),%rax
8747	mulq	%r11
8748	addq	%rax,%r15
8749	adcq	$0,%rdx
8750	imulq	%r12,%r9
8751	addq	%r10,%r15
8752	adcq	%rdx,%r9
8753	movq	%r13,%r10
8754	movq	%r14,%r11
8755	movq	%r15,%r12
8756	andq	$3,%r12
8757	movq	%r15,%r13
8758	andq	$-4,%r13
8759	movq	%r9,%r14
8760	shrdq	$2,%r9,%r15
8761	shrq	$2,%r9
8762	addq	%r13,%r15
8763	adcq	%r14,%r9
8764	addq	%r15,%r10
8765	adcq	%r9,%r11
8766	adcq	$0,%r12
8767	addq	0+16(%rdi),%r10
8768	adcq	8+16(%rdi),%r11
8769	adcq	$1,%r12
8770	movq	0+0+0(%rbp),%rax
8771	movq	%rax,%r15
8772	mulq	%r10
8773	movq	%rax,%r13
8774	movq	%rdx,%r14
8775	movq	0+0+0(%rbp),%rax
8776	mulq	%r11
8777	imulq	%r12,%r15
8778	addq	%rax,%r14
8779	adcq	%rdx,%r15
8780	movq	8+0+0(%rbp),%rax
8781	movq	%rax,%r9
8782	mulq	%r10
8783	addq	%rax,%r14
8784	adcq	$0,%rdx
8785	movq	%rdx,%r10
8786	movq	8+0+0(%rbp),%rax
8787	mulq	%r11
8788	addq	%rax,%r15
8789	adcq	$0,%rdx
8790	imulq	%r12,%r9
8791	addq	%r10,%r15
8792	adcq	%rdx,%r9
8793	movq	%r13,%r10
8794	movq	%r14,%r11
8795	movq	%r15,%r12
8796	andq	$3,%r12
8797	movq	%r15,%r13
8798	andq	$-4,%r13
8799	movq	%r9,%r14
8800	shrdq	$2,%r9,%r15
8801	shrq	$2,%r9
8802	addq	%r13,%r15
8803	adcq	%r14,%r9
8804	addq	%r15,%r10
8805	adcq	%r9,%r11
8806	adcq	$0,%r12
8807
8808	leaq	32(%rdi),%rdi
8809
8810	vmovdqa	%ymm4,%ymm0
8811	vmovdqa	%ymm8,%ymm4
8812	vmovdqa	%ymm12,%ymm8
8813	vmovdqa	%ymm1,%ymm12
8814	vmovdqa	%ymm5,%ymm1
8815	vmovdqa	%ymm9,%ymm5
8816	vmovdqa	%ymm13,%ymm9
8817	vmovdqa	%ymm2,%ymm13
8818	vmovdqa	%ymm6,%ymm2
8819	jmp	L$seal_avx2_short_loop
8820L$seal_avx2_short_tail:
8821	cmpq	$16,%rbx
8822	jb	L$seal_avx2_exit
8823	subq	$16,%rbx
8824	vpxor	(%rsi),%xmm0,%xmm3
8825	vmovdqu	%xmm3,(%rdi)
8826	leaq	16(%rsi),%rsi
8827	addq	0+0(%rdi),%r10
8828	adcq	8+0(%rdi),%r11
8829	adcq	$1,%r12
8830	movq	0+0+0(%rbp),%rax
8831	movq	%rax,%r15
8832	mulq	%r10
8833	movq	%rax,%r13
8834	movq	%rdx,%r14
8835	movq	0+0+0(%rbp),%rax
8836	mulq	%r11
8837	imulq	%r12,%r15
8838	addq	%rax,%r14
8839	adcq	%rdx,%r15
8840	movq	8+0+0(%rbp),%rax
8841	movq	%rax,%r9
8842	mulq	%r10
8843	addq	%rax,%r14
8844	adcq	$0,%rdx
8845	movq	%rdx,%r10
8846	movq	8+0+0(%rbp),%rax
8847	mulq	%r11
8848	addq	%rax,%r15
8849	adcq	$0,%rdx
8850	imulq	%r12,%r9
8851	addq	%r10,%r15
8852	adcq	%rdx,%r9
8853	movq	%r13,%r10
8854	movq	%r14,%r11
8855	movq	%r15,%r12
8856	andq	$3,%r12
8857	movq	%r15,%r13
8858	andq	$-4,%r13
8859	movq	%r9,%r14
8860	shrdq	$2,%r9,%r15
8861	shrq	$2,%r9
8862	addq	%r13,%r15
8863	adcq	%r14,%r9
8864	addq	%r15,%r10
8865	adcq	%r9,%r11
8866	adcq	$0,%r12
8867
8868	leaq	16(%rdi),%rdi
8869	vextracti128	$1,%ymm0,%xmm0
8870L$seal_avx2_exit:
8871	vzeroupper
8872	jmp	L$seal_sse_tail_16
8873
8874
8875#endif
8876