1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifidn __OUTPUT_FORMAT__, win64
5default	rel
6%define XMMWORD
7%define YMMWORD
8%define ZMMWORD
9%define _CET_ENDBR
10
11%include "ring_core_generated/prefix_symbols_nasm.inc"
12section	.text code align=64
13
14EXTERN	OPENSSL_ia32cap_P
15
16chacha20_poly1305_constants:
17
18section	.rdata rdata align=8
19ALIGN	64
20$L$chacha20_consts:
21	DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
22	DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
23$L$rol8:
24	DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
25	DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
26$L$rol16:
27	DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
28	DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
29$L$avx2_init:
30	DD	0,0,0,0
31$L$sse_inc:
32	DD	1,0,0,0
33$L$avx2_inc:
34	DD	2,0,0,0,2,0,0,0
35$L$clamp:
36	DQ	0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC
37	DQ	0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF
38ALIGN	16
39$L$and_masks:
40	DB	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
41	DB	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
42	DB	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43	DB	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
44	DB	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
45	DB	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
46	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
47	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
48	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
49	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
50	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
51	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
52	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
53	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
54	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
55	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
56section	.text
57
58
59
60ALIGN	64
61poly_hash_ad_internal:
62
63
64	xor	r10,r10
65	xor	r11,r11
66	xor	r12,r12
67	cmp	r8,13
68	jne	NEAR $L$hash_ad_loop
69$L$poly_fast_tls_ad:
70
71	mov	r10,QWORD[rcx]
72	mov	r11,QWORD[5+rcx]
73	shr	r11,24
74	mov	r12,1
75	mov	rax,QWORD[((0+160+0))+rbp]
76	mov	r15,rax
77	mul	r10
78	mov	r13,rax
79	mov	r14,rdx
80	mov	rax,QWORD[((0+160+0))+rbp]
81	mul	r11
82	imul	r15,r12
83	add	r14,rax
84	adc	r15,rdx
85	mov	rax,QWORD[((8+160+0))+rbp]
86	mov	r9,rax
87	mul	r10
88	add	r14,rax
89	adc	rdx,0
90	mov	r10,rdx
91	mov	rax,QWORD[((8+160+0))+rbp]
92	mul	r11
93	add	r15,rax
94	adc	rdx,0
95	imul	r9,r12
96	add	r15,r10
97	adc	r9,rdx
98	mov	r10,r13
99	mov	r11,r14
100	mov	r12,r15
101	and	r12,3
102	mov	r13,r15
103	and	r13,-4
104	mov	r14,r9
105	shrd	r15,r9,2
106	shr	r9,2
107	add	r15,r13
108	adc	r9,r14
109	add	r10,r15
110	adc	r11,r9
111	adc	r12,0
112
113	ret
114$L$hash_ad_loop:
115
116	cmp	r8,16
117	jb	NEAR $L$hash_ad_tail
118	add	r10,QWORD[((0+0))+rcx]
119	adc	r11,QWORD[((8+0))+rcx]
120	adc	r12,1
121	mov	rax,QWORD[((0+160+0))+rbp]
122	mov	r15,rax
123	mul	r10
124	mov	r13,rax
125	mov	r14,rdx
126	mov	rax,QWORD[((0+160+0))+rbp]
127	mul	r11
128	imul	r15,r12
129	add	r14,rax
130	adc	r15,rdx
131	mov	rax,QWORD[((8+160+0))+rbp]
132	mov	r9,rax
133	mul	r10
134	add	r14,rax
135	adc	rdx,0
136	mov	r10,rdx
137	mov	rax,QWORD[((8+160+0))+rbp]
138	mul	r11
139	add	r15,rax
140	adc	rdx,0
141	imul	r9,r12
142	add	r15,r10
143	adc	r9,rdx
144	mov	r10,r13
145	mov	r11,r14
146	mov	r12,r15
147	and	r12,3
148	mov	r13,r15
149	and	r13,-4
150	mov	r14,r9
151	shrd	r15,r9,2
152	shr	r9,2
153	add	r15,r13
154	adc	r9,r14
155	add	r10,r15
156	adc	r11,r9
157	adc	r12,0
158
159	lea	rcx,[16+rcx]
160	sub	r8,16
161	jmp	NEAR $L$hash_ad_loop
162$L$hash_ad_tail:
163	cmp	r8,0
164	je	NEAR $L$hash_ad_done
165
166	xor	r13,r13
167	xor	r14,r14
168	xor	r15,r15
169	add	rcx,r8
170$L$hash_ad_tail_loop:
171	shld	r14,r13,8
172	shl	r13,8
173	movzx	r15,BYTE[((-1))+rcx]
174	xor	r13,r15
175	dec	rcx
176	dec	r8
177	jne	NEAR $L$hash_ad_tail_loop
178
179	add	r10,r13
180	adc	r11,r14
181	adc	r12,1
182	mov	rax,QWORD[((0+160+0))+rbp]
183	mov	r15,rax
184	mul	r10
185	mov	r13,rax
186	mov	r14,rdx
187	mov	rax,QWORD[((0+160+0))+rbp]
188	mul	r11
189	imul	r15,r12
190	add	r14,rax
191	adc	r15,rdx
192	mov	rax,QWORD[((8+160+0))+rbp]
193	mov	r9,rax
194	mul	r10
195	add	r14,rax
196	adc	rdx,0
197	mov	r10,rdx
198	mov	rax,QWORD[((8+160+0))+rbp]
199	mul	r11
200	add	r15,rax
201	adc	rdx,0
202	imul	r9,r12
203	add	r15,r10
204	adc	r9,rdx
205	mov	r10,r13
206	mov	r11,r14
207	mov	r12,r15
208	and	r12,3
209	mov	r13,r15
210	and	r13,-4
211	mov	r14,r9
212	shrd	r15,r9,2
213	shr	r9,2
214	add	r15,r13
215	adc	r9,r14
216	add	r10,r15
217	adc	r11,r9
218	adc	r12,0
219
220
221$L$hash_ad_done:
222	ret
223
224
225
226global	chacha20_poly1305_open
227
228ALIGN	64
229chacha20_poly1305_open:
230	mov	QWORD[8+rsp],rdi	;WIN64 prologue
231	mov	QWORD[16+rsp],rsi
232	mov	rax,rsp
233$L$SEH_begin_chacha20_poly1305_open:
234	mov	rdi,rcx
235	mov	rsi,rdx
236	mov	rdx,r8
237	mov	rcx,r9
238	mov	r8,QWORD[40+rsp]
239	mov	r9,QWORD[48+rsp]
240
241
242
243_CET_ENDBR
244	push	rbp
245
246	push	rbx
247
248	push	r12
249
250	push	r13
251
252	push	r14
253
254	push	r15
255
256
257
258	push	r9
259
260	sub	rsp,288 + 160 + 32
261
262
263	lea	rbp,[32+rsp]
264	and	rbp,-32
265
266	movaps	XMMWORD[(0+0)+rbp],xmm6
267	movaps	XMMWORD[(16+0)+rbp],xmm7
268	movaps	XMMWORD[(32+0)+rbp],xmm8
269	movaps	XMMWORD[(48+0)+rbp],xmm9
270	movaps	XMMWORD[(64+0)+rbp],xmm10
271	movaps	XMMWORD[(80+0)+rbp],xmm11
272	movaps	XMMWORD[(96+0)+rbp],xmm12
273	movaps	XMMWORD[(112+0)+rbp],xmm13
274	movaps	XMMWORD[(128+0)+rbp],xmm14
275	movaps	XMMWORD[(144+0)+rbp],xmm15
276
277	mov	rbx,rdx
278	mov	QWORD[((0+160+32))+rbp],r8
279	mov	QWORD[((8+160+32))+rbp],rbx
280
281	mov	eax,DWORD[((OPENSSL_ia32cap_P+8))]
282	and	eax,288
283	xor	eax,288
284	jz	NEAR chacha20_poly1305_open_avx2
285
286	cmp	rbx,128
287	jbe	NEAR $L$open_sse_128
288
289	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
290	movdqu	xmm4,XMMWORD[r9]
291	movdqu	xmm8,XMMWORD[16+r9]
292	movdqu	xmm12,XMMWORD[32+r9]
293
294	movdqa	xmm7,xmm12
295
296	movdqa	XMMWORD[(160+48)+rbp],xmm4
297	movdqa	XMMWORD[(160+64)+rbp],xmm8
298	movdqa	XMMWORD[(160+96)+rbp],xmm12
299	mov	r10,10
300$L$open_sse_init_rounds:
301	paddd	xmm0,xmm4
302	pxor	xmm12,xmm0
303	pshufb	xmm12,XMMWORD[$L$rol16]
304	paddd	xmm8,xmm12
305	pxor	xmm4,xmm8
306	movdqa	xmm3,xmm4
307	pslld	xmm3,12
308	psrld	xmm4,20
309	pxor	xmm4,xmm3
310	paddd	xmm0,xmm4
311	pxor	xmm12,xmm0
312	pshufb	xmm12,XMMWORD[$L$rol8]
313	paddd	xmm8,xmm12
314	pxor	xmm4,xmm8
315	movdqa	xmm3,xmm4
316	pslld	xmm3,7
317	psrld	xmm4,25
318	pxor	xmm4,xmm3
319DB	102,15,58,15,228,4
320DB	102,69,15,58,15,192,8
321DB	102,69,15,58,15,228,12
322	paddd	xmm0,xmm4
323	pxor	xmm12,xmm0
324	pshufb	xmm12,XMMWORD[$L$rol16]
325	paddd	xmm8,xmm12
326	pxor	xmm4,xmm8
327	movdqa	xmm3,xmm4
328	pslld	xmm3,12
329	psrld	xmm4,20
330	pxor	xmm4,xmm3
331	paddd	xmm0,xmm4
332	pxor	xmm12,xmm0
333	pshufb	xmm12,XMMWORD[$L$rol8]
334	paddd	xmm8,xmm12
335	pxor	xmm4,xmm8
336	movdqa	xmm3,xmm4
337	pslld	xmm3,7
338	psrld	xmm4,25
339	pxor	xmm4,xmm3
340DB	102,15,58,15,228,12
341DB	102,69,15,58,15,192,8
342DB	102,69,15,58,15,228,4
343
344	dec	r10
345	jne	NEAR $L$open_sse_init_rounds
346
347	paddd	xmm0,XMMWORD[$L$chacha20_consts]
348	paddd	xmm4,XMMWORD[((160+48))+rbp]
349
350	pand	xmm0,XMMWORD[$L$clamp]
351	movdqa	XMMWORD[(160+0)+rbp],xmm0
352	movdqa	XMMWORD[(160+16)+rbp],xmm4
353
354	mov	r8,r8
355	call	poly_hash_ad_internal
356$L$open_sse_main_loop:
357	cmp	rbx,16*16
358	jb	NEAR $L$open_sse_tail
359
360	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
361	movdqa	xmm4,XMMWORD[((160+48))+rbp]
362	movdqa	xmm8,XMMWORD[((160+64))+rbp]
363	movdqa	xmm1,xmm0
364	movdqa	xmm5,xmm4
365	movdqa	xmm9,xmm8
366	movdqa	xmm2,xmm0
367	movdqa	xmm6,xmm4
368	movdqa	xmm10,xmm8
369	movdqa	xmm3,xmm0
370	movdqa	xmm7,xmm4
371	movdqa	xmm11,xmm8
372	movdqa	xmm15,XMMWORD[((160+96))+rbp]
373	paddd	xmm15,XMMWORD[$L$sse_inc]
374	movdqa	xmm14,xmm15
375	paddd	xmm14,XMMWORD[$L$sse_inc]
376	movdqa	xmm13,xmm14
377	paddd	xmm13,XMMWORD[$L$sse_inc]
378	movdqa	xmm12,xmm13
379	paddd	xmm12,XMMWORD[$L$sse_inc]
380	movdqa	XMMWORD[(160+96)+rbp],xmm12
381	movdqa	XMMWORD[(160+112)+rbp],xmm13
382	movdqa	XMMWORD[(160+128)+rbp],xmm14
383	movdqa	XMMWORD[(160+144)+rbp],xmm15
384
385
386
387	mov	rcx,4
388	mov	r8,rsi
389$L$open_sse_main_loop_rounds:
390	movdqa	XMMWORD[(160+80)+rbp],xmm8
391	movdqa	xmm8,XMMWORD[$L$rol16]
392	paddd	xmm3,xmm7
393	paddd	xmm2,xmm6
394	paddd	xmm1,xmm5
395	paddd	xmm0,xmm4
396	pxor	xmm15,xmm3
397	pxor	xmm14,xmm2
398	pxor	xmm13,xmm1
399	pxor	xmm12,xmm0
400DB	102,69,15,56,0,248
401DB	102,69,15,56,0,240
402DB	102,69,15,56,0,232
403DB	102,69,15,56,0,224
404	movdqa	xmm8,XMMWORD[((160+80))+rbp]
405	paddd	xmm11,xmm15
406	paddd	xmm10,xmm14
407	paddd	xmm9,xmm13
408	paddd	xmm8,xmm12
409	pxor	xmm7,xmm11
410	add	r10,QWORD[((0+0))+r8]
411	adc	r11,QWORD[((8+0))+r8]
412	adc	r12,1
413
414	lea	r8,[16+r8]
415	pxor	xmm6,xmm10
416	pxor	xmm5,xmm9
417	pxor	xmm4,xmm8
418	movdqa	XMMWORD[(160+80)+rbp],xmm8
419	movdqa	xmm8,xmm7
420	psrld	xmm8,20
421	pslld	xmm7,32-20
422	pxor	xmm7,xmm8
423	movdqa	xmm8,xmm6
424	psrld	xmm8,20
425	pslld	xmm6,32-20
426	pxor	xmm6,xmm8
427	movdqa	xmm8,xmm5
428	psrld	xmm8,20
429	pslld	xmm5,32-20
430	pxor	xmm5,xmm8
431	movdqa	xmm8,xmm4
432	psrld	xmm8,20
433	pslld	xmm4,32-20
434	pxor	xmm4,xmm8
435	mov	rax,QWORD[((0+160+0))+rbp]
436	mov	r15,rax
437	mul	r10
438	mov	r13,rax
439	mov	r14,rdx
440	mov	rax,QWORD[((0+160+0))+rbp]
441	mul	r11
442	imul	r15,r12
443	add	r14,rax
444	adc	r15,rdx
445	movdqa	xmm8,XMMWORD[$L$rol8]
446	paddd	xmm3,xmm7
447	paddd	xmm2,xmm6
448	paddd	xmm1,xmm5
449	paddd	xmm0,xmm4
450	pxor	xmm15,xmm3
451	pxor	xmm14,xmm2
452	pxor	xmm13,xmm1
453	pxor	xmm12,xmm0
454DB	102,69,15,56,0,248
455DB	102,69,15,56,0,240
456DB	102,69,15,56,0,232
457DB	102,69,15,56,0,224
458	movdqa	xmm8,XMMWORD[((160+80))+rbp]
459	paddd	xmm11,xmm15
460	paddd	xmm10,xmm14
461	paddd	xmm9,xmm13
462	paddd	xmm8,xmm12
463	pxor	xmm7,xmm11
464	pxor	xmm6,xmm10
465	mov	rax,QWORD[((8+160+0))+rbp]
466	mov	r9,rax
467	mul	r10
468	add	r14,rax
469	adc	rdx,0
470	mov	r10,rdx
471	mov	rax,QWORD[((8+160+0))+rbp]
472	mul	r11
473	add	r15,rax
474	adc	rdx,0
475	pxor	xmm5,xmm9
476	pxor	xmm4,xmm8
477	movdqa	XMMWORD[(160+80)+rbp],xmm8
478	movdqa	xmm8,xmm7
479	psrld	xmm8,25
480	pslld	xmm7,32-25
481	pxor	xmm7,xmm8
482	movdqa	xmm8,xmm6
483	psrld	xmm8,25
484	pslld	xmm6,32-25
485	pxor	xmm6,xmm8
486	movdqa	xmm8,xmm5
487	psrld	xmm8,25
488	pslld	xmm5,32-25
489	pxor	xmm5,xmm8
490	movdqa	xmm8,xmm4
491	psrld	xmm8,25
492	pslld	xmm4,32-25
493	pxor	xmm4,xmm8
494	movdqa	xmm8,XMMWORD[((160+80))+rbp]
495	imul	r9,r12
496	add	r15,r10
497	adc	r9,rdx
498DB	102,15,58,15,255,4
499DB	102,69,15,58,15,219,8
500DB	102,69,15,58,15,255,12
501DB	102,15,58,15,246,4
502DB	102,69,15,58,15,210,8
503DB	102,69,15,58,15,246,12
504DB	102,15,58,15,237,4
505DB	102,69,15,58,15,201,8
506DB	102,69,15,58,15,237,12
507DB	102,15,58,15,228,4
508DB	102,69,15,58,15,192,8
509DB	102,69,15,58,15,228,12
510	movdqa	XMMWORD[(160+80)+rbp],xmm8
511	movdqa	xmm8,XMMWORD[$L$rol16]
512	paddd	xmm3,xmm7
513	paddd	xmm2,xmm6
514	paddd	xmm1,xmm5
515	paddd	xmm0,xmm4
516	pxor	xmm15,xmm3
517	pxor	xmm14,xmm2
518	mov	r10,r13
519	mov	r11,r14
520	mov	r12,r15
521	and	r12,3
522	mov	r13,r15
523	and	r13,-4
524	mov	r14,r9
525	shrd	r15,r9,2
526	shr	r9,2
527	add	r15,r13
528	adc	r9,r14
529	add	r10,r15
530	adc	r11,r9
531	adc	r12,0
532	pxor	xmm13,xmm1
533	pxor	xmm12,xmm0
534DB	102,69,15,56,0,248
535DB	102,69,15,56,0,240
536DB	102,69,15,56,0,232
537DB	102,69,15,56,0,224
538	movdqa	xmm8,XMMWORD[((160+80))+rbp]
539	paddd	xmm11,xmm15
540	paddd	xmm10,xmm14
541	paddd	xmm9,xmm13
542	paddd	xmm8,xmm12
543	pxor	xmm7,xmm11
544	pxor	xmm6,xmm10
545	pxor	xmm5,xmm9
546	pxor	xmm4,xmm8
547	movdqa	XMMWORD[(160+80)+rbp],xmm8
548	movdqa	xmm8,xmm7
549	psrld	xmm8,20
550	pslld	xmm7,32-20
551	pxor	xmm7,xmm8
552	movdqa	xmm8,xmm6
553	psrld	xmm8,20
554	pslld	xmm6,32-20
555	pxor	xmm6,xmm8
556	movdqa	xmm8,xmm5
557	psrld	xmm8,20
558	pslld	xmm5,32-20
559	pxor	xmm5,xmm8
560	movdqa	xmm8,xmm4
561	psrld	xmm8,20
562	pslld	xmm4,32-20
563	pxor	xmm4,xmm8
564	movdqa	xmm8,XMMWORD[$L$rol8]
565	paddd	xmm3,xmm7
566	paddd	xmm2,xmm6
567	paddd	xmm1,xmm5
568	paddd	xmm0,xmm4
569	pxor	xmm15,xmm3
570	pxor	xmm14,xmm2
571	pxor	xmm13,xmm1
572	pxor	xmm12,xmm0
573DB	102,69,15,56,0,248
574DB	102,69,15,56,0,240
575DB	102,69,15,56,0,232
576DB	102,69,15,56,0,224
577	movdqa	xmm8,XMMWORD[((160+80))+rbp]
578	paddd	xmm11,xmm15
579	paddd	xmm10,xmm14
580	paddd	xmm9,xmm13
581	paddd	xmm8,xmm12
582	pxor	xmm7,xmm11
583	pxor	xmm6,xmm10
584	pxor	xmm5,xmm9
585	pxor	xmm4,xmm8
586	movdqa	XMMWORD[(160+80)+rbp],xmm8
587	movdqa	xmm8,xmm7
588	psrld	xmm8,25
589	pslld	xmm7,32-25
590	pxor	xmm7,xmm8
591	movdqa	xmm8,xmm6
592	psrld	xmm8,25
593	pslld	xmm6,32-25
594	pxor	xmm6,xmm8
595	movdqa	xmm8,xmm5
596	psrld	xmm8,25
597	pslld	xmm5,32-25
598	pxor	xmm5,xmm8
599	movdqa	xmm8,xmm4
600	psrld	xmm8,25
601	pslld	xmm4,32-25
602	pxor	xmm4,xmm8
603	movdqa	xmm8,XMMWORD[((160+80))+rbp]
604DB	102,15,58,15,255,12
605DB	102,69,15,58,15,219,8
606DB	102,69,15,58,15,255,4
607DB	102,15,58,15,246,12
608DB	102,69,15,58,15,210,8
609DB	102,69,15,58,15,246,4
610DB	102,15,58,15,237,12
611DB	102,69,15,58,15,201,8
612DB	102,69,15,58,15,237,4
613DB	102,15,58,15,228,12
614DB	102,69,15,58,15,192,8
615DB	102,69,15,58,15,228,4
616
617	dec	rcx
618	jge	NEAR $L$open_sse_main_loop_rounds
619	add	r10,QWORD[((0+0))+r8]
620	adc	r11,QWORD[((8+0))+r8]
621	adc	r12,1
622	mov	rax,QWORD[((0+160+0))+rbp]
623	mov	r15,rax
624	mul	r10
625	mov	r13,rax
626	mov	r14,rdx
627	mov	rax,QWORD[((0+160+0))+rbp]
628	mul	r11
629	imul	r15,r12
630	add	r14,rax
631	adc	r15,rdx
632	mov	rax,QWORD[((8+160+0))+rbp]
633	mov	r9,rax
634	mul	r10
635	add	r14,rax
636	adc	rdx,0
637	mov	r10,rdx
638	mov	rax,QWORD[((8+160+0))+rbp]
639	mul	r11
640	add	r15,rax
641	adc	rdx,0
642	imul	r9,r12
643	add	r15,r10
644	adc	r9,rdx
645	mov	r10,r13
646	mov	r11,r14
647	mov	r12,r15
648	and	r12,3
649	mov	r13,r15
650	and	r13,-4
651	mov	r14,r9
652	shrd	r15,r9,2
653	shr	r9,2
654	add	r15,r13
655	adc	r9,r14
656	add	r10,r15
657	adc	r11,r9
658	adc	r12,0
659
660	lea	r8,[16+r8]
661	cmp	rcx,-6
662	jg	NEAR $L$open_sse_main_loop_rounds
663	paddd	xmm3,XMMWORD[$L$chacha20_consts]
664	paddd	xmm7,XMMWORD[((160+48))+rbp]
665	paddd	xmm11,XMMWORD[((160+64))+rbp]
666	paddd	xmm15,XMMWORD[((160+144))+rbp]
667	paddd	xmm2,XMMWORD[$L$chacha20_consts]
668	paddd	xmm6,XMMWORD[((160+48))+rbp]
669	paddd	xmm10,XMMWORD[((160+64))+rbp]
670	paddd	xmm14,XMMWORD[((160+128))+rbp]
671	paddd	xmm1,XMMWORD[$L$chacha20_consts]
672	paddd	xmm5,XMMWORD[((160+48))+rbp]
673	paddd	xmm9,XMMWORD[((160+64))+rbp]
674	paddd	xmm13,XMMWORD[((160+112))+rbp]
675	paddd	xmm0,XMMWORD[$L$chacha20_consts]
676	paddd	xmm4,XMMWORD[((160+48))+rbp]
677	paddd	xmm8,XMMWORD[((160+64))+rbp]
678	paddd	xmm12,XMMWORD[((160+96))+rbp]
679	movdqa	XMMWORD[(160+80)+rbp],xmm12
680	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
681	pxor	xmm12,xmm3
682	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
683	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
684	pxor	xmm12,xmm7
685	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
686	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
687	pxor	xmm12,xmm11
688	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
689	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
690	pxor	xmm12,xmm15
691	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
692	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
693	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
694	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
695	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
696	pxor	xmm2,xmm3
697	pxor	xmm6,xmm7
698	pxor	xmm10,xmm11
699	pxor	xmm15,xmm14
700	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
701	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
702	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
703	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
704	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
705	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
706	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
707	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
708	pxor	xmm1,xmm3
709	pxor	xmm5,xmm7
710	pxor	xmm9,xmm11
711	pxor	xmm15,xmm13
712	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
713	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
714	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
715	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
716	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
717	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
718	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
719	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
720	pxor	xmm0,xmm3
721	pxor	xmm4,xmm7
722	pxor	xmm8,xmm11
723	pxor	xmm15,XMMWORD[((160+80))+rbp]
724	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
725	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
726	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
727	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
728
729	lea	rsi,[256+rsi]
730	lea	rdi,[256+rdi]
731	sub	rbx,16*16
732	jmp	NEAR $L$open_sse_main_loop
733$L$open_sse_tail:
734
735	test	rbx,rbx
736	jz	NEAR $L$open_sse_finalize
737	cmp	rbx,12*16
738	ja	NEAR $L$open_sse_tail_256
739	cmp	rbx,8*16
740	ja	NEAR $L$open_sse_tail_192
741	cmp	rbx,4*16
742	ja	NEAR $L$open_sse_tail_128
743	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
744	movdqa	xmm4,XMMWORD[((160+48))+rbp]
745	movdqa	xmm8,XMMWORD[((160+64))+rbp]
746	movdqa	xmm12,XMMWORD[((160+96))+rbp]
747	paddd	xmm12,XMMWORD[$L$sse_inc]
748	movdqa	XMMWORD[(160+96)+rbp],xmm12
749
750	xor	r8,r8
751	mov	rcx,rbx
752	cmp	rcx,16
753	jb	NEAR $L$open_sse_tail_64_rounds
754$L$open_sse_tail_64_rounds_and_x1hash:
755	add	r10,QWORD[((0+0))+r8*1+rsi]
756	adc	r11,QWORD[((8+0))+r8*1+rsi]
757	adc	r12,1
758	mov	rax,QWORD[((0+160+0))+rbp]
759	mov	r15,rax
760	mul	r10
761	mov	r13,rax
762	mov	r14,rdx
763	mov	rax,QWORD[((0+160+0))+rbp]
764	mul	r11
765	imul	r15,r12
766	add	r14,rax
767	adc	r15,rdx
768	mov	rax,QWORD[((8+160+0))+rbp]
769	mov	r9,rax
770	mul	r10
771	add	r14,rax
772	adc	rdx,0
773	mov	r10,rdx
774	mov	rax,QWORD[((8+160+0))+rbp]
775	mul	r11
776	add	r15,rax
777	adc	rdx,0
778	imul	r9,r12
779	add	r15,r10
780	adc	r9,rdx
781	mov	r10,r13
782	mov	r11,r14
783	mov	r12,r15
784	and	r12,3
785	mov	r13,r15
786	and	r13,-4
787	mov	r14,r9
788	shrd	r15,r9,2
789	shr	r9,2
790	add	r15,r13
791	adc	r9,r14
792	add	r10,r15
793	adc	r11,r9
794	adc	r12,0
795
796	sub	rcx,16
797$L$open_sse_tail_64_rounds:
798	add	r8,16
799	paddd	xmm0,xmm4
800	pxor	xmm12,xmm0
801	pshufb	xmm12,XMMWORD[$L$rol16]
802	paddd	xmm8,xmm12
803	pxor	xmm4,xmm8
804	movdqa	xmm3,xmm4
805	pslld	xmm3,12
806	psrld	xmm4,20
807	pxor	xmm4,xmm3
808	paddd	xmm0,xmm4
809	pxor	xmm12,xmm0
810	pshufb	xmm12,XMMWORD[$L$rol8]
811	paddd	xmm8,xmm12
812	pxor	xmm4,xmm8
813	movdqa	xmm3,xmm4
814	pslld	xmm3,7
815	psrld	xmm4,25
816	pxor	xmm4,xmm3
817DB	102,15,58,15,228,4
818DB	102,69,15,58,15,192,8
819DB	102,69,15,58,15,228,12
820	paddd	xmm0,xmm4
821	pxor	xmm12,xmm0
822	pshufb	xmm12,XMMWORD[$L$rol16]
823	paddd	xmm8,xmm12
824	pxor	xmm4,xmm8
825	movdqa	xmm3,xmm4
826	pslld	xmm3,12
827	psrld	xmm4,20
828	pxor	xmm4,xmm3
829	paddd	xmm0,xmm4
830	pxor	xmm12,xmm0
831	pshufb	xmm12,XMMWORD[$L$rol8]
832	paddd	xmm8,xmm12
833	pxor	xmm4,xmm8
834	movdqa	xmm3,xmm4
835	pslld	xmm3,7
836	psrld	xmm4,25
837	pxor	xmm4,xmm3
838DB	102,15,58,15,228,12
839DB	102,69,15,58,15,192,8
840DB	102,69,15,58,15,228,4
841
842	cmp	rcx,16
843	jae	NEAR $L$open_sse_tail_64_rounds_and_x1hash
844	cmp	r8,10*16
845	jne	NEAR $L$open_sse_tail_64_rounds
846	paddd	xmm0,XMMWORD[$L$chacha20_consts]
847	paddd	xmm4,XMMWORD[((160+48))+rbp]
848	paddd	xmm8,XMMWORD[((160+64))+rbp]
849	paddd	xmm12,XMMWORD[((160+96))+rbp]
850
851	jmp	NEAR $L$open_sse_tail_64_dec_loop
852
853$L$open_sse_tail_128:
854	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
855	movdqa	xmm4,XMMWORD[((160+48))+rbp]
856	movdqa	xmm8,XMMWORD[((160+64))+rbp]
857	movdqa	xmm1,xmm0
858	movdqa	xmm5,xmm4
859	movdqa	xmm9,xmm8
860	movdqa	xmm13,XMMWORD[((160+96))+rbp]
861	paddd	xmm13,XMMWORD[$L$sse_inc]
862	movdqa	xmm12,xmm13
863	paddd	xmm12,XMMWORD[$L$sse_inc]
864	movdqa	XMMWORD[(160+96)+rbp],xmm12
865	movdqa	XMMWORD[(160+112)+rbp],xmm13
866
867	mov	rcx,rbx
868	and	rcx,-16
869	xor	r8,r8
870$L$open_sse_tail_128_rounds_and_x1hash:
871	add	r10,QWORD[((0+0))+r8*1+rsi]
872	adc	r11,QWORD[((8+0))+r8*1+rsi]
873	adc	r12,1
874	mov	rax,QWORD[((0+160+0))+rbp]
875	mov	r15,rax
876	mul	r10
877	mov	r13,rax
878	mov	r14,rdx
879	mov	rax,QWORD[((0+160+0))+rbp]
880	mul	r11
881	imul	r15,r12
882	add	r14,rax
883	adc	r15,rdx
884	mov	rax,QWORD[((8+160+0))+rbp]
885	mov	r9,rax
886	mul	r10
887	add	r14,rax
888	adc	rdx,0
889	mov	r10,rdx
890	mov	rax,QWORD[((8+160+0))+rbp]
891	mul	r11
892	add	r15,rax
893	adc	rdx,0
894	imul	r9,r12
895	add	r15,r10
896	adc	r9,rdx
897	mov	r10,r13
898	mov	r11,r14
899	mov	r12,r15
900	and	r12,3
901	mov	r13,r15
902	and	r13,-4
903	mov	r14,r9
904	shrd	r15,r9,2
905	shr	r9,2
906	add	r15,r13
907	adc	r9,r14
908	add	r10,r15
909	adc	r11,r9
910	adc	r12,0
911
912$L$open_sse_tail_128_rounds:
913	add	r8,16
914	paddd	xmm0,xmm4
915	pxor	xmm12,xmm0
916	pshufb	xmm12,XMMWORD[$L$rol16]
917	paddd	xmm8,xmm12
918	pxor	xmm4,xmm8
919	movdqa	xmm3,xmm4
920	pslld	xmm3,12
921	psrld	xmm4,20
922	pxor	xmm4,xmm3
923	paddd	xmm0,xmm4
924	pxor	xmm12,xmm0
925	pshufb	xmm12,XMMWORD[$L$rol8]
926	paddd	xmm8,xmm12
927	pxor	xmm4,xmm8
928	movdqa	xmm3,xmm4
929	pslld	xmm3,7
930	psrld	xmm4,25
931	pxor	xmm4,xmm3
932DB	102,15,58,15,228,4
933DB	102,69,15,58,15,192,8
934DB	102,69,15,58,15,228,12
935	paddd	xmm1,xmm5
936	pxor	xmm13,xmm1
937	pshufb	xmm13,XMMWORD[$L$rol16]
938	paddd	xmm9,xmm13
939	pxor	xmm5,xmm9
940	movdqa	xmm3,xmm5
941	pslld	xmm3,12
942	psrld	xmm5,20
943	pxor	xmm5,xmm3
944	paddd	xmm1,xmm5
945	pxor	xmm13,xmm1
946	pshufb	xmm13,XMMWORD[$L$rol8]
947	paddd	xmm9,xmm13
948	pxor	xmm5,xmm9
949	movdqa	xmm3,xmm5
950	pslld	xmm3,7
951	psrld	xmm5,25
952	pxor	xmm5,xmm3
953DB	102,15,58,15,237,4
954DB	102,69,15,58,15,201,8
955DB	102,69,15,58,15,237,12
956	paddd	xmm0,xmm4
957	pxor	xmm12,xmm0
958	pshufb	xmm12,XMMWORD[$L$rol16]
959	paddd	xmm8,xmm12
960	pxor	xmm4,xmm8
961	movdqa	xmm3,xmm4
962	pslld	xmm3,12
963	psrld	xmm4,20
964	pxor	xmm4,xmm3
965	paddd	xmm0,xmm4
966	pxor	xmm12,xmm0
967	pshufb	xmm12,XMMWORD[$L$rol8]
968	paddd	xmm8,xmm12
969	pxor	xmm4,xmm8
970	movdqa	xmm3,xmm4
971	pslld	xmm3,7
972	psrld	xmm4,25
973	pxor	xmm4,xmm3
974DB	102,15,58,15,228,12
975DB	102,69,15,58,15,192,8
976DB	102,69,15,58,15,228,4
977	paddd	xmm1,xmm5
978	pxor	xmm13,xmm1
979	pshufb	xmm13,XMMWORD[$L$rol16]
980	paddd	xmm9,xmm13
981	pxor	xmm5,xmm9
982	movdqa	xmm3,xmm5
983	pslld	xmm3,12
984	psrld	xmm5,20
985	pxor	xmm5,xmm3
986	paddd	xmm1,xmm5
987	pxor	xmm13,xmm1
988	pshufb	xmm13,XMMWORD[$L$rol8]
989	paddd	xmm9,xmm13
990	pxor	xmm5,xmm9
991	movdqa	xmm3,xmm5
992	pslld	xmm3,7
993	psrld	xmm5,25
994	pxor	xmm5,xmm3
995DB	102,15,58,15,237,12
996DB	102,69,15,58,15,201,8
997DB	102,69,15,58,15,237,4
998
999	cmp	r8,rcx
1000	jb	NEAR $L$open_sse_tail_128_rounds_and_x1hash
1001	cmp	r8,10*16
1002	jne	NEAR $L$open_sse_tail_128_rounds
1003	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1004	paddd	xmm5,XMMWORD[((160+48))+rbp]
1005	paddd	xmm9,XMMWORD[((160+64))+rbp]
1006	paddd	xmm13,XMMWORD[((160+112))+rbp]
1007	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1008	paddd	xmm4,XMMWORD[((160+48))+rbp]
1009	paddd	xmm8,XMMWORD[((160+64))+rbp]
1010	paddd	xmm12,XMMWORD[((160+96))+rbp]
1011	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
1012	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
1013	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
1014	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
1015	pxor	xmm1,xmm3
1016	pxor	xmm5,xmm7
1017	pxor	xmm9,xmm11
1018	pxor	xmm15,xmm13
1019	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
1020	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
1021	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
1022	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
1023
1024	sub	rbx,4*16
1025	lea	rsi,[64+rsi]
1026	lea	rdi,[64+rdi]
1027	jmp	NEAR $L$open_sse_tail_64_dec_loop
1028
1029$L$open_sse_tail_192:
1030	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
1031	movdqa	xmm4,XMMWORD[((160+48))+rbp]
1032	movdqa	xmm8,XMMWORD[((160+64))+rbp]
1033	movdqa	xmm1,xmm0
1034	movdqa	xmm5,xmm4
1035	movdqa	xmm9,xmm8
1036	movdqa	xmm2,xmm0
1037	movdqa	xmm6,xmm4
1038	movdqa	xmm10,xmm8
1039	movdqa	xmm14,XMMWORD[((160+96))+rbp]
1040	paddd	xmm14,XMMWORD[$L$sse_inc]
1041	movdqa	xmm13,xmm14
1042	paddd	xmm13,XMMWORD[$L$sse_inc]
1043	movdqa	xmm12,xmm13
1044	paddd	xmm12,XMMWORD[$L$sse_inc]
1045	movdqa	XMMWORD[(160+96)+rbp],xmm12
1046	movdqa	XMMWORD[(160+112)+rbp],xmm13
1047	movdqa	XMMWORD[(160+128)+rbp],xmm14
1048
1049	mov	rcx,rbx
1050	mov	r8,10*16
1051	cmp	rcx,10*16
1052	cmovg	rcx,r8
1053	and	rcx,-16
1054	xor	r8,r8
1055$L$open_sse_tail_192_rounds_and_x1hash:
1056	add	r10,QWORD[((0+0))+r8*1+rsi]
1057	adc	r11,QWORD[((8+0))+r8*1+rsi]
1058	adc	r12,1
1059	mov	rax,QWORD[((0+160+0))+rbp]
1060	mov	r15,rax
1061	mul	r10
1062	mov	r13,rax
1063	mov	r14,rdx
1064	mov	rax,QWORD[((0+160+0))+rbp]
1065	mul	r11
1066	imul	r15,r12
1067	add	r14,rax
1068	adc	r15,rdx
1069	mov	rax,QWORD[((8+160+0))+rbp]
1070	mov	r9,rax
1071	mul	r10
1072	add	r14,rax
1073	adc	rdx,0
1074	mov	r10,rdx
1075	mov	rax,QWORD[((8+160+0))+rbp]
1076	mul	r11
1077	add	r15,rax
1078	adc	rdx,0
1079	imul	r9,r12
1080	add	r15,r10
1081	adc	r9,rdx
1082	mov	r10,r13
1083	mov	r11,r14
1084	mov	r12,r15
1085	and	r12,3
1086	mov	r13,r15
1087	and	r13,-4
1088	mov	r14,r9
1089	shrd	r15,r9,2
1090	shr	r9,2
1091	add	r15,r13
1092	adc	r9,r14
1093	add	r10,r15
1094	adc	r11,r9
1095	adc	r12,0
1096
1097$L$open_sse_tail_192_rounds:
1098	add	r8,16
1099	paddd	xmm0,xmm4
1100	pxor	xmm12,xmm0
1101	pshufb	xmm12,XMMWORD[$L$rol16]
1102	paddd	xmm8,xmm12
1103	pxor	xmm4,xmm8
1104	movdqa	xmm3,xmm4
1105	pslld	xmm3,12
1106	psrld	xmm4,20
1107	pxor	xmm4,xmm3
1108	paddd	xmm0,xmm4
1109	pxor	xmm12,xmm0
1110	pshufb	xmm12,XMMWORD[$L$rol8]
1111	paddd	xmm8,xmm12
1112	pxor	xmm4,xmm8
1113	movdqa	xmm3,xmm4
1114	pslld	xmm3,7
1115	psrld	xmm4,25
1116	pxor	xmm4,xmm3
1117DB	102,15,58,15,228,4
1118DB	102,69,15,58,15,192,8
1119DB	102,69,15,58,15,228,12
1120	paddd	xmm1,xmm5
1121	pxor	xmm13,xmm1
1122	pshufb	xmm13,XMMWORD[$L$rol16]
1123	paddd	xmm9,xmm13
1124	pxor	xmm5,xmm9
1125	movdqa	xmm3,xmm5
1126	pslld	xmm3,12
1127	psrld	xmm5,20
1128	pxor	xmm5,xmm3
1129	paddd	xmm1,xmm5
1130	pxor	xmm13,xmm1
1131	pshufb	xmm13,XMMWORD[$L$rol8]
1132	paddd	xmm9,xmm13
1133	pxor	xmm5,xmm9
1134	movdqa	xmm3,xmm5
1135	pslld	xmm3,7
1136	psrld	xmm5,25
1137	pxor	xmm5,xmm3
1138DB	102,15,58,15,237,4
1139DB	102,69,15,58,15,201,8
1140DB	102,69,15,58,15,237,12
1141	paddd	xmm2,xmm6
1142	pxor	xmm14,xmm2
1143	pshufb	xmm14,XMMWORD[$L$rol16]
1144	paddd	xmm10,xmm14
1145	pxor	xmm6,xmm10
1146	movdqa	xmm3,xmm6
1147	pslld	xmm3,12
1148	psrld	xmm6,20
1149	pxor	xmm6,xmm3
1150	paddd	xmm2,xmm6
1151	pxor	xmm14,xmm2
1152	pshufb	xmm14,XMMWORD[$L$rol8]
1153	paddd	xmm10,xmm14
1154	pxor	xmm6,xmm10
1155	movdqa	xmm3,xmm6
1156	pslld	xmm3,7
1157	psrld	xmm6,25
1158	pxor	xmm6,xmm3
1159DB	102,15,58,15,246,4
1160DB	102,69,15,58,15,210,8
1161DB	102,69,15,58,15,246,12
1162	paddd	xmm0,xmm4
1163	pxor	xmm12,xmm0
1164	pshufb	xmm12,XMMWORD[$L$rol16]
1165	paddd	xmm8,xmm12
1166	pxor	xmm4,xmm8
1167	movdqa	xmm3,xmm4
1168	pslld	xmm3,12
1169	psrld	xmm4,20
1170	pxor	xmm4,xmm3
1171	paddd	xmm0,xmm4
1172	pxor	xmm12,xmm0
1173	pshufb	xmm12,XMMWORD[$L$rol8]
1174	paddd	xmm8,xmm12
1175	pxor	xmm4,xmm8
1176	movdqa	xmm3,xmm4
1177	pslld	xmm3,7
1178	psrld	xmm4,25
1179	pxor	xmm4,xmm3
1180DB	102,15,58,15,228,12
1181DB	102,69,15,58,15,192,8
1182DB	102,69,15,58,15,228,4
1183	paddd	xmm1,xmm5
1184	pxor	xmm13,xmm1
1185	pshufb	xmm13,XMMWORD[$L$rol16]
1186	paddd	xmm9,xmm13
1187	pxor	xmm5,xmm9
1188	movdqa	xmm3,xmm5
1189	pslld	xmm3,12
1190	psrld	xmm5,20
1191	pxor	xmm5,xmm3
1192	paddd	xmm1,xmm5
1193	pxor	xmm13,xmm1
1194	pshufb	xmm13,XMMWORD[$L$rol8]
1195	paddd	xmm9,xmm13
1196	pxor	xmm5,xmm9
1197	movdqa	xmm3,xmm5
1198	pslld	xmm3,7
1199	psrld	xmm5,25
1200	pxor	xmm5,xmm3
1201DB	102,15,58,15,237,12
1202DB	102,69,15,58,15,201,8
1203DB	102,69,15,58,15,237,4
1204	paddd	xmm2,xmm6
1205	pxor	xmm14,xmm2
1206	pshufb	xmm14,XMMWORD[$L$rol16]
1207	paddd	xmm10,xmm14
1208	pxor	xmm6,xmm10
1209	movdqa	xmm3,xmm6
1210	pslld	xmm3,12
1211	psrld	xmm6,20
1212	pxor	xmm6,xmm3
1213	paddd	xmm2,xmm6
1214	pxor	xmm14,xmm2
1215	pshufb	xmm14,XMMWORD[$L$rol8]
1216	paddd	xmm10,xmm14
1217	pxor	xmm6,xmm10
1218	movdqa	xmm3,xmm6
1219	pslld	xmm3,7
1220	psrld	xmm6,25
1221	pxor	xmm6,xmm3
1222DB	102,15,58,15,246,12
1223DB	102,69,15,58,15,210,8
1224DB	102,69,15,58,15,246,4
1225
1226	cmp	r8,rcx
1227	jb	NEAR $L$open_sse_tail_192_rounds_and_x1hash
1228	cmp	r8,10*16
1229	jne	NEAR $L$open_sse_tail_192_rounds
1230	cmp	rbx,11*16
1231	jb	NEAR $L$open_sse_tail_192_finish
1232	add	r10,QWORD[((0+160))+rsi]
1233	adc	r11,QWORD[((8+160))+rsi]
1234	adc	r12,1
1235	mov	rax,QWORD[((0+160+0))+rbp]
1236	mov	r15,rax
1237	mul	r10
1238	mov	r13,rax
1239	mov	r14,rdx
1240	mov	rax,QWORD[((0+160+0))+rbp]
1241	mul	r11
1242	imul	r15,r12
1243	add	r14,rax
1244	adc	r15,rdx
1245	mov	rax,QWORD[((8+160+0))+rbp]
1246	mov	r9,rax
1247	mul	r10
1248	add	r14,rax
1249	adc	rdx,0
1250	mov	r10,rdx
1251	mov	rax,QWORD[((8+160+0))+rbp]
1252	mul	r11
1253	add	r15,rax
1254	adc	rdx,0
1255	imul	r9,r12
1256	add	r15,r10
1257	adc	r9,rdx
1258	mov	r10,r13
1259	mov	r11,r14
1260	mov	r12,r15
1261	and	r12,3
1262	mov	r13,r15
1263	and	r13,-4
1264	mov	r14,r9
1265	shrd	r15,r9,2
1266	shr	r9,2
1267	add	r15,r13
1268	adc	r9,r14
1269	add	r10,r15
1270	adc	r11,r9
1271	adc	r12,0
1272
1273	cmp	rbx,12*16
1274	jb	NEAR $L$open_sse_tail_192_finish
1275	add	r10,QWORD[((0+176))+rsi]
1276	adc	r11,QWORD[((8+176))+rsi]
1277	adc	r12,1
1278	mov	rax,QWORD[((0+160+0))+rbp]
1279	mov	r15,rax
1280	mul	r10
1281	mov	r13,rax
1282	mov	r14,rdx
1283	mov	rax,QWORD[((0+160+0))+rbp]
1284	mul	r11
1285	imul	r15,r12
1286	add	r14,rax
1287	adc	r15,rdx
1288	mov	rax,QWORD[((8+160+0))+rbp]
1289	mov	r9,rax
1290	mul	r10
1291	add	r14,rax
1292	adc	rdx,0
1293	mov	r10,rdx
1294	mov	rax,QWORD[((8+160+0))+rbp]
1295	mul	r11
1296	add	r15,rax
1297	adc	rdx,0
1298	imul	r9,r12
1299	add	r15,r10
1300	adc	r9,rdx
1301	mov	r10,r13
1302	mov	r11,r14
1303	mov	r12,r15
1304	and	r12,3
1305	mov	r13,r15
1306	and	r13,-4
1307	mov	r14,r9
1308	shrd	r15,r9,2
1309	shr	r9,2
1310	add	r15,r13
1311	adc	r9,r14
1312	add	r10,r15
1313	adc	r11,r9
1314	adc	r12,0
1315
1316$L$open_sse_tail_192_finish:
1317	paddd	xmm2,XMMWORD[$L$chacha20_consts]
1318	paddd	xmm6,XMMWORD[((160+48))+rbp]
1319	paddd	xmm10,XMMWORD[((160+64))+rbp]
1320	paddd	xmm14,XMMWORD[((160+128))+rbp]
1321	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1322	paddd	xmm5,XMMWORD[((160+48))+rbp]
1323	paddd	xmm9,XMMWORD[((160+64))+rbp]
1324	paddd	xmm13,XMMWORD[((160+112))+rbp]
1325	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1326	paddd	xmm4,XMMWORD[((160+48))+rbp]
1327	paddd	xmm8,XMMWORD[((160+64))+rbp]
1328	paddd	xmm12,XMMWORD[((160+96))+rbp]
1329	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
1330	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
1331	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
1332	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
1333	pxor	xmm2,xmm3
1334	pxor	xmm6,xmm7
1335	pxor	xmm10,xmm11
1336	pxor	xmm15,xmm14
1337	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
1338	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
1339	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
1340	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
1341	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
1342	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
1343	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
1344	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
1345	pxor	xmm1,xmm3
1346	pxor	xmm5,xmm7
1347	pxor	xmm9,xmm11
1348	pxor	xmm15,xmm13
1349	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
1350	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
1351	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
1352	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
1353
1354	sub	rbx,8*16
1355	lea	rsi,[128+rsi]
1356	lea	rdi,[128+rdi]
1357	jmp	NEAR $L$open_sse_tail_64_dec_loop
1358
1359$L$open_sse_tail_256:
1360	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
1361	movdqa	xmm4,XMMWORD[((160+48))+rbp]
1362	movdqa	xmm8,XMMWORD[((160+64))+rbp]
1363	movdqa	xmm1,xmm0
1364	movdqa	xmm5,xmm4
1365	movdqa	xmm9,xmm8
1366	movdqa	xmm2,xmm0
1367	movdqa	xmm6,xmm4
1368	movdqa	xmm10,xmm8
1369	movdqa	xmm3,xmm0
1370	movdqa	xmm7,xmm4
1371	movdqa	xmm11,xmm8
1372	movdqa	xmm15,XMMWORD[((160+96))+rbp]
1373	paddd	xmm15,XMMWORD[$L$sse_inc]
1374	movdqa	xmm14,xmm15
1375	paddd	xmm14,XMMWORD[$L$sse_inc]
1376	movdqa	xmm13,xmm14
1377	paddd	xmm13,XMMWORD[$L$sse_inc]
1378	movdqa	xmm12,xmm13
1379	paddd	xmm12,XMMWORD[$L$sse_inc]
1380	movdqa	XMMWORD[(160+96)+rbp],xmm12
1381	movdqa	XMMWORD[(160+112)+rbp],xmm13
1382	movdqa	XMMWORD[(160+128)+rbp],xmm14
1383	movdqa	XMMWORD[(160+144)+rbp],xmm15
1384
1385	xor	r8,r8
1386$L$open_sse_tail_256_rounds_and_x1hash:
1387	add	r10,QWORD[((0+0))+r8*1+rsi]
1388	adc	r11,QWORD[((8+0))+r8*1+rsi]
1389	adc	r12,1
1390	movdqa	XMMWORD[(160+80)+rbp],xmm11
1391	paddd	xmm0,xmm4
1392	pxor	xmm12,xmm0
1393	pshufb	xmm12,XMMWORD[$L$rol16]
1394	paddd	xmm8,xmm12
1395	pxor	xmm4,xmm8
1396	movdqa	xmm11,xmm4
1397	pslld	xmm11,12
1398	psrld	xmm4,20
1399	pxor	xmm4,xmm11
1400	paddd	xmm0,xmm4
1401	pxor	xmm12,xmm0
1402	pshufb	xmm12,XMMWORD[$L$rol8]
1403	paddd	xmm8,xmm12
1404	pxor	xmm4,xmm8
1405	movdqa	xmm11,xmm4
1406	pslld	xmm11,7
1407	psrld	xmm4,25
1408	pxor	xmm4,xmm11
1409DB	102,15,58,15,228,4
1410DB	102,69,15,58,15,192,8
1411DB	102,69,15,58,15,228,12
1412	paddd	xmm1,xmm5
1413	pxor	xmm13,xmm1
1414	pshufb	xmm13,XMMWORD[$L$rol16]
1415	paddd	xmm9,xmm13
1416	pxor	xmm5,xmm9
1417	movdqa	xmm11,xmm5
1418	pslld	xmm11,12
1419	psrld	xmm5,20
1420	pxor	xmm5,xmm11
1421	paddd	xmm1,xmm5
1422	pxor	xmm13,xmm1
1423	pshufb	xmm13,XMMWORD[$L$rol8]
1424	paddd	xmm9,xmm13
1425	pxor	xmm5,xmm9
1426	movdqa	xmm11,xmm5
1427	pslld	xmm11,7
1428	psrld	xmm5,25
1429	pxor	xmm5,xmm11
1430DB	102,15,58,15,237,4
1431DB	102,69,15,58,15,201,8
1432DB	102,69,15,58,15,237,12
1433	paddd	xmm2,xmm6
1434	pxor	xmm14,xmm2
1435	pshufb	xmm14,XMMWORD[$L$rol16]
1436	paddd	xmm10,xmm14
1437	pxor	xmm6,xmm10
1438	movdqa	xmm11,xmm6
1439	pslld	xmm11,12
1440	psrld	xmm6,20
1441	pxor	xmm6,xmm11
1442	paddd	xmm2,xmm6
1443	pxor	xmm14,xmm2
1444	pshufb	xmm14,XMMWORD[$L$rol8]
1445	paddd	xmm10,xmm14
1446	pxor	xmm6,xmm10
1447	movdqa	xmm11,xmm6
1448	pslld	xmm11,7
1449	psrld	xmm6,25
1450	pxor	xmm6,xmm11
1451DB	102,15,58,15,246,4
1452DB	102,69,15,58,15,210,8
1453DB	102,69,15,58,15,246,12
1454	movdqa	xmm11,XMMWORD[((160+80))+rbp]
1455	mov	rax,QWORD[((0+160+0))+rbp]
1456	mov	r15,rax
1457	mul	r10
1458	mov	r13,rax
1459	mov	r14,rdx
1460	mov	rax,QWORD[((0+160+0))+rbp]
1461	mul	r11
1462	imul	r15,r12
1463	add	r14,rax
1464	adc	r15,rdx
1465	movdqa	XMMWORD[(160+80)+rbp],xmm9
1466	paddd	xmm3,xmm7
1467	pxor	xmm15,xmm3
1468	pshufb	xmm15,XMMWORD[$L$rol16]
1469	paddd	xmm11,xmm15
1470	pxor	xmm7,xmm11
1471	movdqa	xmm9,xmm7
1472	pslld	xmm9,12
1473	psrld	xmm7,20
1474	pxor	xmm7,xmm9
1475	paddd	xmm3,xmm7
1476	pxor	xmm15,xmm3
1477	pshufb	xmm15,XMMWORD[$L$rol8]
1478	paddd	xmm11,xmm15
1479	pxor	xmm7,xmm11
1480	movdqa	xmm9,xmm7
1481	pslld	xmm9,7
1482	psrld	xmm7,25
1483	pxor	xmm7,xmm9
1484DB	102,15,58,15,255,4
1485DB	102,69,15,58,15,219,8
1486DB	102,69,15,58,15,255,12
1487	movdqa	xmm9,XMMWORD[((160+80))+rbp]
1488	mov	rax,QWORD[((8+160+0))+rbp]
1489	mov	r9,rax
1490	mul	r10
1491	add	r14,rax
1492	adc	rdx,0
1493	mov	r10,rdx
1494	mov	rax,QWORD[((8+160+0))+rbp]
1495	mul	r11
1496	add	r15,rax
1497	adc	rdx,0
1498	movdqa	XMMWORD[(160+80)+rbp],xmm11
1499	paddd	xmm0,xmm4
1500	pxor	xmm12,xmm0
1501	pshufb	xmm12,XMMWORD[$L$rol16]
1502	paddd	xmm8,xmm12
1503	pxor	xmm4,xmm8
1504	movdqa	xmm11,xmm4
1505	pslld	xmm11,12
1506	psrld	xmm4,20
1507	pxor	xmm4,xmm11
1508	paddd	xmm0,xmm4
1509	pxor	xmm12,xmm0
1510	pshufb	xmm12,XMMWORD[$L$rol8]
1511	paddd	xmm8,xmm12
1512	pxor	xmm4,xmm8
1513	movdqa	xmm11,xmm4
1514	pslld	xmm11,7
1515	psrld	xmm4,25
1516	pxor	xmm4,xmm11
1517DB	102,15,58,15,228,12
1518DB	102,69,15,58,15,192,8
1519DB	102,69,15,58,15,228,4
1520	paddd	xmm1,xmm5
1521	pxor	xmm13,xmm1
1522	pshufb	xmm13,XMMWORD[$L$rol16]
1523	paddd	xmm9,xmm13
1524	pxor	xmm5,xmm9
1525	movdqa	xmm11,xmm5
1526	pslld	xmm11,12
1527	psrld	xmm5,20
1528	pxor	xmm5,xmm11
1529	paddd	xmm1,xmm5
1530	pxor	xmm13,xmm1
1531	pshufb	xmm13,XMMWORD[$L$rol8]
1532	paddd	xmm9,xmm13
1533	pxor	xmm5,xmm9
1534	movdqa	xmm11,xmm5
1535	pslld	xmm11,7
1536	psrld	xmm5,25
1537	pxor	xmm5,xmm11
1538DB	102,15,58,15,237,12
1539DB	102,69,15,58,15,201,8
1540DB	102,69,15,58,15,237,4
1541	imul	r9,r12
1542	add	r15,r10
1543	adc	r9,rdx
1544	paddd	xmm2,xmm6
1545	pxor	xmm14,xmm2
1546	pshufb	xmm14,XMMWORD[$L$rol16]
1547	paddd	xmm10,xmm14
1548	pxor	xmm6,xmm10
1549	movdqa	xmm11,xmm6
1550	pslld	xmm11,12
1551	psrld	xmm6,20
1552	pxor	xmm6,xmm11
1553	paddd	xmm2,xmm6
1554	pxor	xmm14,xmm2
1555	pshufb	xmm14,XMMWORD[$L$rol8]
1556	paddd	xmm10,xmm14
1557	pxor	xmm6,xmm10
1558	movdqa	xmm11,xmm6
1559	pslld	xmm11,7
1560	psrld	xmm6,25
1561	pxor	xmm6,xmm11
1562DB	102,15,58,15,246,12
1563DB	102,69,15,58,15,210,8
1564DB	102,69,15,58,15,246,4
1565	movdqa	xmm11,XMMWORD[((160+80))+rbp]
1566	mov	r10,r13
1567	mov	r11,r14
1568	mov	r12,r15
1569	and	r12,3
1570	mov	r13,r15
1571	and	r13,-4
1572	mov	r14,r9
1573	shrd	r15,r9,2
1574	shr	r9,2
1575	add	r15,r13
1576	adc	r9,r14
1577	add	r10,r15
1578	adc	r11,r9
1579	adc	r12,0
1580	movdqa	XMMWORD[(160+80)+rbp],xmm9
1581	paddd	xmm3,xmm7
1582	pxor	xmm15,xmm3
1583	pshufb	xmm15,XMMWORD[$L$rol16]
1584	paddd	xmm11,xmm15
1585	pxor	xmm7,xmm11
1586	movdqa	xmm9,xmm7
1587	pslld	xmm9,12
1588	psrld	xmm7,20
1589	pxor	xmm7,xmm9
1590	paddd	xmm3,xmm7
1591	pxor	xmm15,xmm3
1592	pshufb	xmm15,XMMWORD[$L$rol8]
1593	paddd	xmm11,xmm15
1594	pxor	xmm7,xmm11
1595	movdqa	xmm9,xmm7
1596	pslld	xmm9,7
1597	psrld	xmm7,25
1598	pxor	xmm7,xmm9
1599DB	102,15,58,15,255,12
1600DB	102,69,15,58,15,219,8
1601DB	102,69,15,58,15,255,4
1602	movdqa	xmm9,XMMWORD[((160+80))+rbp]
1603
1604	add	r8,16
1605	cmp	r8,10*16
1606	jb	NEAR $L$open_sse_tail_256_rounds_and_x1hash
1607
1608	mov	rcx,rbx
1609	and	rcx,-16
1610$L$open_sse_tail_256_hash:
1611	add	r10,QWORD[((0+0))+r8*1+rsi]
1612	adc	r11,QWORD[((8+0))+r8*1+rsi]
1613	adc	r12,1
1614	mov	rax,QWORD[((0+160+0))+rbp]
1615	mov	r15,rax
1616	mul	r10
1617	mov	r13,rax
1618	mov	r14,rdx
1619	mov	rax,QWORD[((0+160+0))+rbp]
1620	mul	r11
1621	imul	r15,r12
1622	add	r14,rax
1623	adc	r15,rdx
1624	mov	rax,QWORD[((8+160+0))+rbp]
1625	mov	r9,rax
1626	mul	r10
1627	add	r14,rax
1628	adc	rdx,0
1629	mov	r10,rdx
1630	mov	rax,QWORD[((8+160+0))+rbp]
1631	mul	r11
1632	add	r15,rax
1633	adc	rdx,0
1634	imul	r9,r12
1635	add	r15,r10
1636	adc	r9,rdx
1637	mov	r10,r13
1638	mov	r11,r14
1639	mov	r12,r15
1640	and	r12,3
1641	mov	r13,r15
1642	and	r13,-4
1643	mov	r14,r9
1644	shrd	r15,r9,2
1645	shr	r9,2
1646	add	r15,r13
1647	adc	r9,r14
1648	add	r10,r15
1649	adc	r11,r9
1650	adc	r12,0
1651
1652	add	r8,16
1653	cmp	r8,rcx
1654	jb	NEAR $L$open_sse_tail_256_hash
1655	paddd	xmm3,XMMWORD[$L$chacha20_consts]
1656	paddd	xmm7,XMMWORD[((160+48))+rbp]
1657	paddd	xmm11,XMMWORD[((160+64))+rbp]
1658	paddd	xmm15,XMMWORD[((160+144))+rbp]
1659	paddd	xmm2,XMMWORD[$L$chacha20_consts]
1660	paddd	xmm6,XMMWORD[((160+48))+rbp]
1661	paddd	xmm10,XMMWORD[((160+64))+rbp]
1662	paddd	xmm14,XMMWORD[((160+128))+rbp]
1663	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1664	paddd	xmm5,XMMWORD[((160+48))+rbp]
1665	paddd	xmm9,XMMWORD[((160+64))+rbp]
1666	paddd	xmm13,XMMWORD[((160+112))+rbp]
1667	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1668	paddd	xmm4,XMMWORD[((160+48))+rbp]
1669	paddd	xmm8,XMMWORD[((160+64))+rbp]
1670	paddd	xmm12,XMMWORD[((160+96))+rbp]
1671	movdqa	XMMWORD[(160+80)+rbp],xmm12
1672	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
1673	pxor	xmm12,xmm3
1674	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
1675	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
1676	pxor	xmm12,xmm7
1677	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
1678	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
1679	pxor	xmm12,xmm11
1680	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
1681	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
1682	pxor	xmm12,xmm15
1683	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
1684	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
1685	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
1686	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
1687	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
1688	pxor	xmm2,xmm3
1689	pxor	xmm6,xmm7
1690	pxor	xmm10,xmm11
1691	pxor	xmm15,xmm14
1692	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
1693	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
1694	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
1695	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
1696	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
1697	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
1698	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
1699	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
1700	pxor	xmm1,xmm3
1701	pxor	xmm5,xmm7
1702	pxor	xmm9,xmm11
1703	pxor	xmm15,xmm13
1704	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
1705	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
1706	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
1707	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
1708
1709	movdqa	xmm12,XMMWORD[((160+80))+rbp]
1710	sub	rbx,12*16
1711	lea	rsi,[192+rsi]
1712	lea	rdi,[192+rdi]
1713
1714
1715$L$open_sse_tail_64_dec_loop:
1716	cmp	rbx,16
1717	jb	NEAR $L$open_sse_tail_16_init
1718	sub	rbx,16
1719	movdqu	xmm3,XMMWORD[rsi]
1720	pxor	xmm0,xmm3
1721	movdqu	XMMWORD[rdi],xmm0
1722	lea	rsi,[16+rsi]
1723	lea	rdi,[16+rdi]
1724	movdqa	xmm0,xmm4
1725	movdqa	xmm4,xmm8
1726	movdqa	xmm8,xmm12
1727	jmp	NEAR $L$open_sse_tail_64_dec_loop
1728$L$open_sse_tail_16_init:
1729	movdqa	xmm1,xmm0
1730
1731
1732$L$open_sse_tail_16:
1733	test	rbx,rbx
1734	jz	NEAR $L$open_sse_finalize
1735
1736
1737
1738	pxor	xmm3,xmm3
1739	lea	rsi,[((-1))+rbx*1+rsi]
1740	mov	r8,rbx
1741$L$open_sse_tail_16_compose:
1742	pslldq	xmm3,1
1743	pinsrb	xmm3,BYTE[rsi],0
1744	sub	rsi,1
1745	sub	r8,1
1746	jnz	NEAR $L$open_sse_tail_16_compose
1747
1748DB	102,73,15,126,221
1749	pextrq	r14,xmm3,1
1750
1751	pxor	xmm3,xmm1
1752
1753
1754$L$open_sse_tail_16_extract:
1755	pextrb	XMMWORD[rdi],xmm3,0
1756	psrldq	xmm3,1
1757	add	rdi,1
1758	sub	rbx,1
1759	jne	NEAR $L$open_sse_tail_16_extract
1760
1761	add	r10,r13
1762	adc	r11,r14
1763	adc	r12,1
1764	mov	rax,QWORD[((0+160+0))+rbp]
1765	mov	r15,rax
1766	mul	r10
1767	mov	r13,rax
1768	mov	r14,rdx
1769	mov	rax,QWORD[((0+160+0))+rbp]
1770	mul	r11
1771	imul	r15,r12
1772	add	r14,rax
1773	adc	r15,rdx
1774	mov	rax,QWORD[((8+160+0))+rbp]
1775	mov	r9,rax
1776	mul	r10
1777	add	r14,rax
1778	adc	rdx,0
1779	mov	r10,rdx
1780	mov	rax,QWORD[((8+160+0))+rbp]
1781	mul	r11
1782	add	r15,rax
1783	adc	rdx,0
1784	imul	r9,r12
1785	add	r15,r10
1786	adc	r9,rdx
1787	mov	r10,r13
1788	mov	r11,r14
1789	mov	r12,r15
1790	and	r12,3
1791	mov	r13,r15
1792	and	r13,-4
1793	mov	r14,r9
1794	shrd	r15,r9,2
1795	shr	r9,2
1796	add	r15,r13
1797	adc	r9,r14
1798	add	r10,r15
1799	adc	r11,r9
1800	adc	r12,0
1801
1802
1803$L$open_sse_finalize:
1804	add	r10,QWORD[((0+160+32))+rbp]
1805	adc	r11,QWORD[((8+160+32))+rbp]
1806	adc	r12,1
1807	mov	rax,QWORD[((0+160+0))+rbp]
1808	mov	r15,rax
1809	mul	r10
1810	mov	r13,rax
1811	mov	r14,rdx
1812	mov	rax,QWORD[((0+160+0))+rbp]
1813	mul	r11
1814	imul	r15,r12
1815	add	r14,rax
1816	adc	r15,rdx
1817	mov	rax,QWORD[((8+160+0))+rbp]
1818	mov	r9,rax
1819	mul	r10
1820	add	r14,rax
1821	adc	rdx,0
1822	mov	r10,rdx
1823	mov	rax,QWORD[((8+160+0))+rbp]
1824	mul	r11
1825	add	r15,rax
1826	adc	rdx,0
1827	imul	r9,r12
1828	add	r15,r10
1829	adc	r9,rdx
1830	mov	r10,r13
1831	mov	r11,r14
1832	mov	r12,r15
1833	and	r12,3
1834	mov	r13,r15
1835	and	r13,-4
1836	mov	r14,r9
1837	shrd	r15,r9,2
1838	shr	r9,2
1839	add	r15,r13
1840	adc	r9,r14
1841	add	r10,r15
1842	adc	r11,r9
1843	adc	r12,0
1844
1845
1846	mov	r13,r10
1847	mov	r14,r11
1848	mov	r15,r12
1849	sub	r10,-5
1850	sbb	r11,-1
1851	sbb	r12,3
1852	cmovc	r10,r13
1853	cmovc	r11,r14
1854	cmovc	r12,r15
1855
1856	add	r10,QWORD[((0+160+16))+rbp]
1857	adc	r11,QWORD[((8+160+16))+rbp]
1858
1859	movaps	xmm6,XMMWORD[((0+0))+rbp]
1860	movaps	xmm7,XMMWORD[((16+0))+rbp]
1861	movaps	xmm8,XMMWORD[((32+0))+rbp]
1862	movaps	xmm9,XMMWORD[((48+0))+rbp]
1863	movaps	xmm10,XMMWORD[((64+0))+rbp]
1864	movaps	xmm11,XMMWORD[((80+0))+rbp]
1865	movaps	xmm12,XMMWORD[((96+0))+rbp]
1866	movaps	xmm13,XMMWORD[((112+0))+rbp]
1867	movaps	xmm14,XMMWORD[((128+0))+rbp]
1868	movaps	xmm15,XMMWORD[((144+0))+rbp]
1869
1870
1871	add	rsp,288 + 160 + 32
1872
1873
1874	pop	r9
1875
1876	mov	QWORD[r9],r10
1877	mov	QWORD[8+r9],r11
1878	pop	r15
1879
1880	pop	r14
1881
1882	pop	r13
1883
1884	pop	r12
1885
1886	pop	rbx
1887
1888	pop	rbp
1889
1890	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1891	mov	rsi,QWORD[16+rsp]
1892	ret
1893
1894$L$open_sse_128:
1895
1896	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
1897	movdqa	xmm1,xmm0
1898	movdqa	xmm2,xmm0
1899	movdqu	xmm4,XMMWORD[r9]
1900	movdqa	xmm5,xmm4
1901	movdqa	xmm6,xmm4
1902	movdqu	xmm8,XMMWORD[16+r9]
1903	movdqa	xmm9,xmm8
1904	movdqa	xmm10,xmm8
1905	movdqu	xmm12,XMMWORD[32+r9]
1906	movdqa	xmm13,xmm12
1907	paddd	xmm13,XMMWORD[$L$sse_inc]
1908	movdqa	xmm14,xmm13
1909	paddd	xmm14,XMMWORD[$L$sse_inc]
1910	movdqa	xmm7,xmm4
1911	movdqa	xmm11,xmm8
1912	movdqa	xmm15,xmm13
1913	mov	r10,10
1914
1915$L$open_sse_128_rounds:
1916	paddd	xmm0,xmm4
1917	pxor	xmm12,xmm0
1918	pshufb	xmm12,XMMWORD[$L$rol16]
1919	paddd	xmm8,xmm12
1920	pxor	xmm4,xmm8
1921	movdqa	xmm3,xmm4
1922	pslld	xmm3,12
1923	psrld	xmm4,20
1924	pxor	xmm4,xmm3
1925	paddd	xmm0,xmm4
1926	pxor	xmm12,xmm0
1927	pshufb	xmm12,XMMWORD[$L$rol8]
1928	paddd	xmm8,xmm12
1929	pxor	xmm4,xmm8
1930	movdqa	xmm3,xmm4
1931	pslld	xmm3,7
1932	psrld	xmm4,25
1933	pxor	xmm4,xmm3
1934DB	102,15,58,15,228,4
1935DB	102,69,15,58,15,192,8
1936DB	102,69,15,58,15,228,12
1937	paddd	xmm1,xmm5
1938	pxor	xmm13,xmm1
1939	pshufb	xmm13,XMMWORD[$L$rol16]
1940	paddd	xmm9,xmm13
1941	pxor	xmm5,xmm9
1942	movdqa	xmm3,xmm5
1943	pslld	xmm3,12
1944	psrld	xmm5,20
1945	pxor	xmm5,xmm3
1946	paddd	xmm1,xmm5
1947	pxor	xmm13,xmm1
1948	pshufb	xmm13,XMMWORD[$L$rol8]
1949	paddd	xmm9,xmm13
1950	pxor	xmm5,xmm9
1951	movdqa	xmm3,xmm5
1952	pslld	xmm3,7
1953	psrld	xmm5,25
1954	pxor	xmm5,xmm3
1955DB	102,15,58,15,237,4
1956DB	102,69,15,58,15,201,8
1957DB	102,69,15,58,15,237,12
1958	paddd	xmm2,xmm6
1959	pxor	xmm14,xmm2
1960	pshufb	xmm14,XMMWORD[$L$rol16]
1961	paddd	xmm10,xmm14
1962	pxor	xmm6,xmm10
1963	movdqa	xmm3,xmm6
1964	pslld	xmm3,12
1965	psrld	xmm6,20
1966	pxor	xmm6,xmm3
1967	paddd	xmm2,xmm6
1968	pxor	xmm14,xmm2
1969	pshufb	xmm14,XMMWORD[$L$rol8]
1970	paddd	xmm10,xmm14
1971	pxor	xmm6,xmm10
1972	movdqa	xmm3,xmm6
1973	pslld	xmm3,7
1974	psrld	xmm6,25
1975	pxor	xmm6,xmm3
1976DB	102,15,58,15,246,4
1977DB	102,69,15,58,15,210,8
1978DB	102,69,15,58,15,246,12
1979	paddd	xmm0,xmm4
1980	pxor	xmm12,xmm0
1981	pshufb	xmm12,XMMWORD[$L$rol16]
1982	paddd	xmm8,xmm12
1983	pxor	xmm4,xmm8
1984	movdqa	xmm3,xmm4
1985	pslld	xmm3,12
1986	psrld	xmm4,20
1987	pxor	xmm4,xmm3
1988	paddd	xmm0,xmm4
1989	pxor	xmm12,xmm0
1990	pshufb	xmm12,XMMWORD[$L$rol8]
1991	paddd	xmm8,xmm12
1992	pxor	xmm4,xmm8
1993	movdqa	xmm3,xmm4
1994	pslld	xmm3,7
1995	psrld	xmm4,25
1996	pxor	xmm4,xmm3
1997DB	102,15,58,15,228,12
1998DB	102,69,15,58,15,192,8
1999DB	102,69,15,58,15,228,4
2000	paddd	xmm1,xmm5
2001	pxor	xmm13,xmm1
2002	pshufb	xmm13,XMMWORD[$L$rol16]
2003	paddd	xmm9,xmm13
2004	pxor	xmm5,xmm9
2005	movdqa	xmm3,xmm5
2006	pslld	xmm3,12
2007	psrld	xmm5,20
2008	pxor	xmm5,xmm3
2009	paddd	xmm1,xmm5
2010	pxor	xmm13,xmm1
2011	pshufb	xmm13,XMMWORD[$L$rol8]
2012	paddd	xmm9,xmm13
2013	pxor	xmm5,xmm9
2014	movdqa	xmm3,xmm5
2015	pslld	xmm3,7
2016	psrld	xmm5,25
2017	pxor	xmm5,xmm3
2018DB	102,15,58,15,237,12
2019DB	102,69,15,58,15,201,8
2020DB	102,69,15,58,15,237,4
2021	paddd	xmm2,xmm6
2022	pxor	xmm14,xmm2
2023	pshufb	xmm14,XMMWORD[$L$rol16]
2024	paddd	xmm10,xmm14
2025	pxor	xmm6,xmm10
2026	movdqa	xmm3,xmm6
2027	pslld	xmm3,12
2028	psrld	xmm6,20
2029	pxor	xmm6,xmm3
2030	paddd	xmm2,xmm6
2031	pxor	xmm14,xmm2
2032	pshufb	xmm14,XMMWORD[$L$rol8]
2033	paddd	xmm10,xmm14
2034	pxor	xmm6,xmm10
2035	movdqa	xmm3,xmm6
2036	pslld	xmm3,7
2037	psrld	xmm6,25
2038	pxor	xmm6,xmm3
2039DB	102,15,58,15,246,12
2040DB	102,69,15,58,15,210,8
2041DB	102,69,15,58,15,246,4
2042
2043	dec	r10
2044	jnz	NEAR $L$open_sse_128_rounds
2045	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2046	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2047	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2048	paddd	xmm4,xmm7
2049	paddd	xmm5,xmm7
2050	paddd	xmm6,xmm7
2051	paddd	xmm9,xmm11
2052	paddd	xmm10,xmm11
2053	paddd	xmm13,xmm15
2054	paddd	xmm15,XMMWORD[$L$sse_inc]
2055	paddd	xmm14,xmm15
2056
2057	pand	xmm0,XMMWORD[$L$clamp]
2058	movdqa	XMMWORD[(160+0)+rbp],xmm0
2059	movdqa	XMMWORD[(160+16)+rbp],xmm4
2060
2061	mov	r8,r8
2062	call	poly_hash_ad_internal
2063$L$open_sse_128_xor_hash:
2064	cmp	rbx,16
2065	jb	NEAR $L$open_sse_tail_16
2066	sub	rbx,16
2067	add	r10,QWORD[((0+0))+rsi]
2068	adc	r11,QWORD[((8+0))+rsi]
2069	adc	r12,1
2070
2071
2072	movdqu	xmm3,XMMWORD[rsi]
2073	pxor	xmm1,xmm3
2074	movdqu	XMMWORD[rdi],xmm1
2075	lea	rsi,[16+rsi]
2076	lea	rdi,[16+rdi]
2077	mov	rax,QWORD[((0+160+0))+rbp]
2078	mov	r15,rax
2079	mul	r10
2080	mov	r13,rax
2081	mov	r14,rdx
2082	mov	rax,QWORD[((0+160+0))+rbp]
2083	mul	r11
2084	imul	r15,r12
2085	add	r14,rax
2086	adc	r15,rdx
2087	mov	rax,QWORD[((8+160+0))+rbp]
2088	mov	r9,rax
2089	mul	r10
2090	add	r14,rax
2091	adc	rdx,0
2092	mov	r10,rdx
2093	mov	rax,QWORD[((8+160+0))+rbp]
2094	mul	r11
2095	add	r15,rax
2096	adc	rdx,0
2097	imul	r9,r12
2098	add	r15,r10
2099	adc	r9,rdx
2100	mov	r10,r13
2101	mov	r11,r14
2102	mov	r12,r15
2103	and	r12,3
2104	mov	r13,r15
2105	and	r13,-4
2106	mov	r14,r9
2107	shrd	r15,r9,2
2108	shr	r9,2
2109	add	r15,r13
2110	adc	r9,r14
2111	add	r10,r15
2112	adc	r11,r9
2113	adc	r12,0
2114
2115
2116	movdqa	xmm1,xmm5
2117	movdqa	xmm5,xmm9
2118	movdqa	xmm9,xmm13
2119	movdqa	xmm13,xmm2
2120	movdqa	xmm2,xmm6
2121	movdqa	xmm6,xmm10
2122	movdqa	xmm10,xmm14
2123	jmp	NEAR $L$open_sse_128_xor_hash
2124$L$SEH_end_chacha20_poly1305_open:
2125
2126
2127
2128
2129
2130
2131
2132
2133global	chacha20_poly1305_seal
2134
2135ALIGN	64
2136chacha20_poly1305_seal:
2137	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2138	mov	QWORD[16+rsp],rsi
2139	mov	rax,rsp
2140$L$SEH_begin_chacha20_poly1305_seal:
2141	mov	rdi,rcx
2142	mov	rsi,rdx
2143	mov	rdx,r8
2144	mov	rcx,r9
2145	mov	r8,QWORD[40+rsp]
2146	mov	r9,QWORD[48+rsp]
2147
2148
2149
2150_CET_ENDBR
2151	push	rbp
2152
2153	push	rbx
2154
2155	push	r12
2156
2157	push	r13
2158
2159	push	r14
2160
2161	push	r15
2162
2163
2164
2165	push	r9
2166
2167	sub	rsp,288 + 160 + 32
2168
2169	lea	rbp,[32+rsp]
2170	and	rbp,-32
2171
2172	movaps	XMMWORD[(0+0)+rbp],xmm6
2173	movaps	XMMWORD[(16+0)+rbp],xmm7
2174	movaps	XMMWORD[(32+0)+rbp],xmm8
2175	movaps	XMMWORD[(48+0)+rbp],xmm9
2176	movaps	XMMWORD[(64+0)+rbp],xmm10
2177	movaps	XMMWORD[(80+0)+rbp],xmm11
2178	movaps	XMMWORD[(96+0)+rbp],xmm12
2179	movaps	XMMWORD[(112+0)+rbp],xmm13
2180	movaps	XMMWORD[(128+0)+rbp],xmm14
2181	movaps	XMMWORD[(144+0)+rbp],xmm15
2182
2183	mov	rbx,QWORD[56+r9]
2184	add	rbx,rdx
2185	mov	QWORD[((0+160+32))+rbp],r8
2186	mov	QWORD[((8+160+32))+rbp],rbx
2187	mov	rbx,rdx
2188
2189	mov	eax,DWORD[((OPENSSL_ia32cap_P+8))]
2190	and	eax,288
2191	xor	eax,288
2192	jz	NEAR chacha20_poly1305_seal_avx2
2193
2194	cmp	rbx,128
2195	jbe	NEAR $L$seal_sse_128
2196
2197	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2198	movdqu	xmm4,XMMWORD[r9]
2199	movdqu	xmm8,XMMWORD[16+r9]
2200	movdqu	xmm12,XMMWORD[32+r9]
2201
2202	movdqa	xmm1,xmm0
2203	movdqa	xmm2,xmm0
2204	movdqa	xmm3,xmm0
2205	movdqa	xmm5,xmm4
2206	movdqa	xmm6,xmm4
2207	movdqa	xmm7,xmm4
2208	movdqa	xmm9,xmm8
2209	movdqa	xmm10,xmm8
2210	movdqa	xmm11,xmm8
2211	movdqa	xmm15,xmm12
2212	paddd	xmm12,XMMWORD[$L$sse_inc]
2213	movdqa	xmm14,xmm12
2214	paddd	xmm12,XMMWORD[$L$sse_inc]
2215	movdqa	xmm13,xmm12
2216	paddd	xmm12,XMMWORD[$L$sse_inc]
2217
2218	movdqa	XMMWORD[(160+48)+rbp],xmm4
2219	movdqa	XMMWORD[(160+64)+rbp],xmm8
2220	movdqa	XMMWORD[(160+96)+rbp],xmm12
2221	movdqa	XMMWORD[(160+112)+rbp],xmm13
2222	movdqa	XMMWORD[(160+128)+rbp],xmm14
2223	movdqa	XMMWORD[(160+144)+rbp],xmm15
2224	mov	r10,10
2225$L$seal_sse_init_rounds:
2226	movdqa	XMMWORD[(160+80)+rbp],xmm8
2227	movdqa	xmm8,XMMWORD[$L$rol16]
2228	paddd	xmm3,xmm7
2229	paddd	xmm2,xmm6
2230	paddd	xmm1,xmm5
2231	paddd	xmm0,xmm4
2232	pxor	xmm15,xmm3
2233	pxor	xmm14,xmm2
2234	pxor	xmm13,xmm1
2235	pxor	xmm12,xmm0
2236DB	102,69,15,56,0,248
2237DB	102,69,15,56,0,240
2238DB	102,69,15,56,0,232
2239DB	102,69,15,56,0,224
2240	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2241	paddd	xmm11,xmm15
2242	paddd	xmm10,xmm14
2243	paddd	xmm9,xmm13
2244	paddd	xmm8,xmm12
2245	pxor	xmm7,xmm11
2246	pxor	xmm6,xmm10
2247	pxor	xmm5,xmm9
2248	pxor	xmm4,xmm8
2249	movdqa	XMMWORD[(160+80)+rbp],xmm8
2250	movdqa	xmm8,xmm7
2251	psrld	xmm8,20
2252	pslld	xmm7,32-20
2253	pxor	xmm7,xmm8
2254	movdqa	xmm8,xmm6
2255	psrld	xmm8,20
2256	pslld	xmm6,32-20
2257	pxor	xmm6,xmm8
2258	movdqa	xmm8,xmm5
2259	psrld	xmm8,20
2260	pslld	xmm5,32-20
2261	pxor	xmm5,xmm8
2262	movdqa	xmm8,xmm4
2263	psrld	xmm8,20
2264	pslld	xmm4,32-20
2265	pxor	xmm4,xmm8
2266	movdqa	xmm8,XMMWORD[$L$rol8]
2267	paddd	xmm3,xmm7
2268	paddd	xmm2,xmm6
2269	paddd	xmm1,xmm5
2270	paddd	xmm0,xmm4
2271	pxor	xmm15,xmm3
2272	pxor	xmm14,xmm2
2273	pxor	xmm13,xmm1
2274	pxor	xmm12,xmm0
2275DB	102,69,15,56,0,248
2276DB	102,69,15,56,0,240
2277DB	102,69,15,56,0,232
2278DB	102,69,15,56,0,224
2279	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2280	paddd	xmm11,xmm15
2281	paddd	xmm10,xmm14
2282	paddd	xmm9,xmm13
2283	paddd	xmm8,xmm12
2284	pxor	xmm7,xmm11
2285	pxor	xmm6,xmm10
2286	pxor	xmm5,xmm9
2287	pxor	xmm4,xmm8
2288	movdqa	XMMWORD[(160+80)+rbp],xmm8
2289	movdqa	xmm8,xmm7
2290	psrld	xmm8,25
2291	pslld	xmm7,32-25
2292	pxor	xmm7,xmm8
2293	movdqa	xmm8,xmm6
2294	psrld	xmm8,25
2295	pslld	xmm6,32-25
2296	pxor	xmm6,xmm8
2297	movdqa	xmm8,xmm5
2298	psrld	xmm8,25
2299	pslld	xmm5,32-25
2300	pxor	xmm5,xmm8
2301	movdqa	xmm8,xmm4
2302	psrld	xmm8,25
2303	pslld	xmm4,32-25
2304	pxor	xmm4,xmm8
2305	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2306DB	102,15,58,15,255,4
2307DB	102,69,15,58,15,219,8
2308DB	102,69,15,58,15,255,12
2309DB	102,15,58,15,246,4
2310DB	102,69,15,58,15,210,8
2311DB	102,69,15,58,15,246,12
2312DB	102,15,58,15,237,4
2313DB	102,69,15,58,15,201,8
2314DB	102,69,15,58,15,237,12
2315DB	102,15,58,15,228,4
2316DB	102,69,15,58,15,192,8
2317DB	102,69,15,58,15,228,12
2318	movdqa	XMMWORD[(160+80)+rbp],xmm8
2319	movdqa	xmm8,XMMWORD[$L$rol16]
2320	paddd	xmm3,xmm7
2321	paddd	xmm2,xmm6
2322	paddd	xmm1,xmm5
2323	paddd	xmm0,xmm4
2324	pxor	xmm15,xmm3
2325	pxor	xmm14,xmm2
2326	pxor	xmm13,xmm1
2327	pxor	xmm12,xmm0
2328DB	102,69,15,56,0,248
2329DB	102,69,15,56,0,240
2330DB	102,69,15,56,0,232
2331DB	102,69,15,56,0,224
2332	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2333	paddd	xmm11,xmm15
2334	paddd	xmm10,xmm14
2335	paddd	xmm9,xmm13
2336	paddd	xmm8,xmm12
2337	pxor	xmm7,xmm11
2338	pxor	xmm6,xmm10
2339	pxor	xmm5,xmm9
2340	pxor	xmm4,xmm8
2341	movdqa	XMMWORD[(160+80)+rbp],xmm8
2342	movdqa	xmm8,xmm7
2343	psrld	xmm8,20
2344	pslld	xmm7,32-20
2345	pxor	xmm7,xmm8
2346	movdqa	xmm8,xmm6
2347	psrld	xmm8,20
2348	pslld	xmm6,32-20
2349	pxor	xmm6,xmm8
2350	movdqa	xmm8,xmm5
2351	psrld	xmm8,20
2352	pslld	xmm5,32-20
2353	pxor	xmm5,xmm8
2354	movdqa	xmm8,xmm4
2355	psrld	xmm8,20
2356	pslld	xmm4,32-20
2357	pxor	xmm4,xmm8
2358	movdqa	xmm8,XMMWORD[$L$rol8]
2359	paddd	xmm3,xmm7
2360	paddd	xmm2,xmm6
2361	paddd	xmm1,xmm5
2362	paddd	xmm0,xmm4
2363	pxor	xmm15,xmm3
2364	pxor	xmm14,xmm2
2365	pxor	xmm13,xmm1
2366	pxor	xmm12,xmm0
2367DB	102,69,15,56,0,248
2368DB	102,69,15,56,0,240
2369DB	102,69,15,56,0,232
2370DB	102,69,15,56,0,224
2371	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2372	paddd	xmm11,xmm15
2373	paddd	xmm10,xmm14
2374	paddd	xmm9,xmm13
2375	paddd	xmm8,xmm12
2376	pxor	xmm7,xmm11
2377	pxor	xmm6,xmm10
2378	pxor	xmm5,xmm9
2379	pxor	xmm4,xmm8
2380	movdqa	XMMWORD[(160+80)+rbp],xmm8
2381	movdqa	xmm8,xmm7
2382	psrld	xmm8,25
2383	pslld	xmm7,32-25
2384	pxor	xmm7,xmm8
2385	movdqa	xmm8,xmm6
2386	psrld	xmm8,25
2387	pslld	xmm6,32-25
2388	pxor	xmm6,xmm8
2389	movdqa	xmm8,xmm5
2390	psrld	xmm8,25
2391	pslld	xmm5,32-25
2392	pxor	xmm5,xmm8
2393	movdqa	xmm8,xmm4
2394	psrld	xmm8,25
2395	pslld	xmm4,32-25
2396	pxor	xmm4,xmm8
2397	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2398DB	102,15,58,15,255,12
2399DB	102,69,15,58,15,219,8
2400DB	102,69,15,58,15,255,4
2401DB	102,15,58,15,246,12
2402DB	102,69,15,58,15,210,8
2403DB	102,69,15,58,15,246,4
2404DB	102,15,58,15,237,12
2405DB	102,69,15,58,15,201,8
2406DB	102,69,15,58,15,237,4
2407DB	102,15,58,15,228,12
2408DB	102,69,15,58,15,192,8
2409DB	102,69,15,58,15,228,4
2410
2411	dec	r10
2412	jnz	NEAR $L$seal_sse_init_rounds
2413	paddd	xmm3,XMMWORD[$L$chacha20_consts]
2414	paddd	xmm7,XMMWORD[((160+48))+rbp]
2415	paddd	xmm11,XMMWORD[((160+64))+rbp]
2416	paddd	xmm15,XMMWORD[((160+144))+rbp]
2417	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2418	paddd	xmm6,XMMWORD[((160+48))+rbp]
2419	paddd	xmm10,XMMWORD[((160+64))+rbp]
2420	paddd	xmm14,XMMWORD[((160+128))+rbp]
2421	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2422	paddd	xmm5,XMMWORD[((160+48))+rbp]
2423	paddd	xmm9,XMMWORD[((160+64))+rbp]
2424	paddd	xmm13,XMMWORD[((160+112))+rbp]
2425	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2426	paddd	xmm4,XMMWORD[((160+48))+rbp]
2427	paddd	xmm8,XMMWORD[((160+64))+rbp]
2428	paddd	xmm12,XMMWORD[((160+96))+rbp]
2429
2430
2431	pand	xmm3,XMMWORD[$L$clamp]
2432	movdqa	XMMWORD[(160+0)+rbp],xmm3
2433	movdqa	XMMWORD[(160+16)+rbp],xmm7
2434
2435	mov	r8,r8
2436	call	poly_hash_ad_internal
2437	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
2438	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
2439	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
2440	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
2441	pxor	xmm2,xmm3
2442	pxor	xmm6,xmm7
2443	pxor	xmm10,xmm11
2444	pxor	xmm15,xmm14
2445	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
2446	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
2447	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
2448	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
2449	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
2450	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
2451	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
2452	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
2453	pxor	xmm1,xmm3
2454	pxor	xmm5,xmm7
2455	pxor	xmm9,xmm11
2456	pxor	xmm15,xmm13
2457	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
2458	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
2459	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
2460	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
2461
2462	cmp	rbx,12*16
2463	ja	NEAR $L$seal_sse_main_init
2464	mov	rcx,8*16
2465	sub	rbx,8*16
2466	lea	rsi,[128+rsi]
2467	jmp	NEAR $L$seal_sse_128_tail_hash
2468$L$seal_sse_main_init:
2469	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
2470	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
2471	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
2472	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
2473	pxor	xmm0,xmm3
2474	pxor	xmm4,xmm7
2475	pxor	xmm8,xmm11
2476	pxor	xmm15,xmm12
2477	movdqu	XMMWORD[(0 + 128)+rdi],xmm0
2478	movdqu	XMMWORD[(16 + 128)+rdi],xmm4
2479	movdqu	XMMWORD[(32 + 128)+rdi],xmm8
2480	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
2481
2482	mov	rcx,12*16
2483	sub	rbx,12*16
2484	lea	rsi,[192+rsi]
2485	mov	rcx,2
2486	mov	r8,8
2487	cmp	rbx,4*16
2488	jbe	NEAR $L$seal_sse_tail_64
2489	cmp	rbx,8*16
2490	jbe	NEAR $L$seal_sse_tail_128
2491	cmp	rbx,12*16
2492	jbe	NEAR $L$seal_sse_tail_192
2493
2494$L$seal_sse_main_loop:
2495	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2496	movdqa	xmm4,XMMWORD[((160+48))+rbp]
2497	movdqa	xmm8,XMMWORD[((160+64))+rbp]
2498	movdqa	xmm1,xmm0
2499	movdqa	xmm5,xmm4
2500	movdqa	xmm9,xmm8
2501	movdqa	xmm2,xmm0
2502	movdqa	xmm6,xmm4
2503	movdqa	xmm10,xmm8
2504	movdqa	xmm3,xmm0
2505	movdqa	xmm7,xmm4
2506	movdqa	xmm11,xmm8
2507	movdqa	xmm15,XMMWORD[((160+96))+rbp]
2508	paddd	xmm15,XMMWORD[$L$sse_inc]
2509	movdqa	xmm14,xmm15
2510	paddd	xmm14,XMMWORD[$L$sse_inc]
2511	movdqa	xmm13,xmm14
2512	paddd	xmm13,XMMWORD[$L$sse_inc]
2513	movdqa	xmm12,xmm13
2514	paddd	xmm12,XMMWORD[$L$sse_inc]
2515	movdqa	XMMWORD[(160+96)+rbp],xmm12
2516	movdqa	XMMWORD[(160+112)+rbp],xmm13
2517	movdqa	XMMWORD[(160+128)+rbp],xmm14
2518	movdqa	XMMWORD[(160+144)+rbp],xmm15
2519
2520ALIGN	32
2521$L$seal_sse_main_rounds:
2522	movdqa	XMMWORD[(160+80)+rbp],xmm8
2523	movdqa	xmm8,XMMWORD[$L$rol16]
2524	paddd	xmm3,xmm7
2525	paddd	xmm2,xmm6
2526	paddd	xmm1,xmm5
2527	paddd	xmm0,xmm4
2528	pxor	xmm15,xmm3
2529	pxor	xmm14,xmm2
2530	pxor	xmm13,xmm1
2531	pxor	xmm12,xmm0
2532DB	102,69,15,56,0,248
2533DB	102,69,15,56,0,240
2534DB	102,69,15,56,0,232
2535DB	102,69,15,56,0,224
2536	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2537	paddd	xmm11,xmm15
2538	paddd	xmm10,xmm14
2539	paddd	xmm9,xmm13
2540	paddd	xmm8,xmm12
2541	pxor	xmm7,xmm11
2542	add	r10,QWORD[((0+0))+rdi]
2543	adc	r11,QWORD[((8+0))+rdi]
2544	adc	r12,1
2545	pxor	xmm6,xmm10
2546	pxor	xmm5,xmm9
2547	pxor	xmm4,xmm8
2548	movdqa	XMMWORD[(160+80)+rbp],xmm8
2549	movdqa	xmm8,xmm7
2550	psrld	xmm8,20
2551	pslld	xmm7,32-20
2552	pxor	xmm7,xmm8
2553	movdqa	xmm8,xmm6
2554	psrld	xmm8,20
2555	pslld	xmm6,32-20
2556	pxor	xmm6,xmm8
2557	movdqa	xmm8,xmm5
2558	psrld	xmm8,20
2559	pslld	xmm5,32-20
2560	pxor	xmm5,xmm8
2561	movdqa	xmm8,xmm4
2562	psrld	xmm8,20
2563	pslld	xmm4,32-20
2564	pxor	xmm4,xmm8
2565	mov	rax,QWORD[((0+160+0))+rbp]
2566	mov	r15,rax
2567	mul	r10
2568	mov	r13,rax
2569	mov	r14,rdx
2570	mov	rax,QWORD[((0+160+0))+rbp]
2571	mul	r11
2572	imul	r15,r12
2573	add	r14,rax
2574	adc	r15,rdx
2575	movdqa	xmm8,XMMWORD[$L$rol8]
2576	paddd	xmm3,xmm7
2577	paddd	xmm2,xmm6
2578	paddd	xmm1,xmm5
2579	paddd	xmm0,xmm4
2580	pxor	xmm15,xmm3
2581	pxor	xmm14,xmm2
2582	pxor	xmm13,xmm1
2583	pxor	xmm12,xmm0
2584DB	102,69,15,56,0,248
2585DB	102,69,15,56,0,240
2586DB	102,69,15,56,0,232
2587DB	102,69,15,56,0,224
2588	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2589	paddd	xmm11,xmm15
2590	paddd	xmm10,xmm14
2591	paddd	xmm9,xmm13
2592	paddd	xmm8,xmm12
2593	pxor	xmm7,xmm11
2594	pxor	xmm6,xmm10
2595	mov	rax,QWORD[((8+160+0))+rbp]
2596	mov	r9,rax
2597	mul	r10
2598	add	r14,rax
2599	adc	rdx,0
2600	mov	r10,rdx
2601	mov	rax,QWORD[((8+160+0))+rbp]
2602	mul	r11
2603	add	r15,rax
2604	adc	rdx,0
2605	pxor	xmm5,xmm9
2606	pxor	xmm4,xmm8
2607	movdqa	XMMWORD[(160+80)+rbp],xmm8
2608	movdqa	xmm8,xmm7
2609	psrld	xmm8,25
2610	pslld	xmm7,32-25
2611	pxor	xmm7,xmm8
2612	movdqa	xmm8,xmm6
2613	psrld	xmm8,25
2614	pslld	xmm6,32-25
2615	pxor	xmm6,xmm8
2616	movdqa	xmm8,xmm5
2617	psrld	xmm8,25
2618	pslld	xmm5,32-25
2619	pxor	xmm5,xmm8
2620	movdqa	xmm8,xmm4
2621	psrld	xmm8,25
2622	pslld	xmm4,32-25
2623	pxor	xmm4,xmm8
2624	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2625	imul	r9,r12
2626	add	r15,r10
2627	adc	r9,rdx
2628DB	102,15,58,15,255,4
2629DB	102,69,15,58,15,219,8
2630DB	102,69,15,58,15,255,12
2631DB	102,15,58,15,246,4
2632DB	102,69,15,58,15,210,8
2633DB	102,69,15,58,15,246,12
2634DB	102,15,58,15,237,4
2635DB	102,69,15,58,15,201,8
2636DB	102,69,15,58,15,237,12
2637DB	102,15,58,15,228,4
2638DB	102,69,15,58,15,192,8
2639DB	102,69,15,58,15,228,12
2640	movdqa	XMMWORD[(160+80)+rbp],xmm8
2641	movdqa	xmm8,XMMWORD[$L$rol16]
2642	paddd	xmm3,xmm7
2643	paddd	xmm2,xmm6
2644	paddd	xmm1,xmm5
2645	paddd	xmm0,xmm4
2646	pxor	xmm15,xmm3
2647	pxor	xmm14,xmm2
2648	mov	r10,r13
2649	mov	r11,r14
2650	mov	r12,r15
2651	and	r12,3
2652	mov	r13,r15
2653	and	r13,-4
2654	mov	r14,r9
2655	shrd	r15,r9,2
2656	shr	r9,2
2657	add	r15,r13
2658	adc	r9,r14
2659	add	r10,r15
2660	adc	r11,r9
2661	adc	r12,0
2662	pxor	xmm13,xmm1
2663	pxor	xmm12,xmm0
2664DB	102,69,15,56,0,248
2665DB	102,69,15,56,0,240
2666DB	102,69,15,56,0,232
2667DB	102,69,15,56,0,224
2668	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2669	paddd	xmm11,xmm15
2670	paddd	xmm10,xmm14
2671	paddd	xmm9,xmm13
2672	paddd	xmm8,xmm12
2673	pxor	xmm7,xmm11
2674	pxor	xmm6,xmm10
2675	pxor	xmm5,xmm9
2676	pxor	xmm4,xmm8
2677	movdqa	XMMWORD[(160+80)+rbp],xmm8
2678	movdqa	xmm8,xmm7
2679	psrld	xmm8,20
2680	pslld	xmm7,32-20
2681	pxor	xmm7,xmm8
2682	movdqa	xmm8,xmm6
2683	psrld	xmm8,20
2684	pslld	xmm6,32-20
2685	pxor	xmm6,xmm8
2686	movdqa	xmm8,xmm5
2687	psrld	xmm8,20
2688	pslld	xmm5,32-20
2689	pxor	xmm5,xmm8
2690	movdqa	xmm8,xmm4
2691	psrld	xmm8,20
2692	pslld	xmm4,32-20
2693	pxor	xmm4,xmm8
2694	movdqa	xmm8,XMMWORD[$L$rol8]
2695	paddd	xmm3,xmm7
2696	paddd	xmm2,xmm6
2697	paddd	xmm1,xmm5
2698	paddd	xmm0,xmm4
2699	pxor	xmm15,xmm3
2700	pxor	xmm14,xmm2
2701	pxor	xmm13,xmm1
2702	pxor	xmm12,xmm0
2703DB	102,69,15,56,0,248
2704DB	102,69,15,56,0,240
2705DB	102,69,15,56,0,232
2706DB	102,69,15,56,0,224
2707	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2708	paddd	xmm11,xmm15
2709	paddd	xmm10,xmm14
2710	paddd	xmm9,xmm13
2711	paddd	xmm8,xmm12
2712	pxor	xmm7,xmm11
2713	pxor	xmm6,xmm10
2714	pxor	xmm5,xmm9
2715	pxor	xmm4,xmm8
2716	movdqa	XMMWORD[(160+80)+rbp],xmm8
2717	movdqa	xmm8,xmm7
2718	psrld	xmm8,25
2719	pslld	xmm7,32-25
2720	pxor	xmm7,xmm8
2721	movdqa	xmm8,xmm6
2722	psrld	xmm8,25
2723	pslld	xmm6,32-25
2724	pxor	xmm6,xmm8
2725	movdqa	xmm8,xmm5
2726	psrld	xmm8,25
2727	pslld	xmm5,32-25
2728	pxor	xmm5,xmm8
2729	movdqa	xmm8,xmm4
2730	psrld	xmm8,25
2731	pslld	xmm4,32-25
2732	pxor	xmm4,xmm8
2733	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2734DB	102,15,58,15,255,12
2735DB	102,69,15,58,15,219,8
2736DB	102,69,15,58,15,255,4
2737DB	102,15,58,15,246,12
2738DB	102,69,15,58,15,210,8
2739DB	102,69,15,58,15,246,4
2740DB	102,15,58,15,237,12
2741DB	102,69,15,58,15,201,8
2742DB	102,69,15,58,15,237,4
2743DB	102,15,58,15,228,12
2744DB	102,69,15,58,15,192,8
2745DB	102,69,15,58,15,228,4
2746
2747	lea	rdi,[16+rdi]
2748	dec	r8
2749	jge	NEAR $L$seal_sse_main_rounds
2750	add	r10,QWORD[((0+0))+rdi]
2751	adc	r11,QWORD[((8+0))+rdi]
2752	adc	r12,1
2753	mov	rax,QWORD[((0+160+0))+rbp]
2754	mov	r15,rax
2755	mul	r10
2756	mov	r13,rax
2757	mov	r14,rdx
2758	mov	rax,QWORD[((0+160+0))+rbp]
2759	mul	r11
2760	imul	r15,r12
2761	add	r14,rax
2762	adc	r15,rdx
2763	mov	rax,QWORD[((8+160+0))+rbp]
2764	mov	r9,rax
2765	mul	r10
2766	add	r14,rax
2767	adc	rdx,0
2768	mov	r10,rdx
2769	mov	rax,QWORD[((8+160+0))+rbp]
2770	mul	r11
2771	add	r15,rax
2772	adc	rdx,0
2773	imul	r9,r12
2774	add	r15,r10
2775	adc	r9,rdx
2776	mov	r10,r13
2777	mov	r11,r14
2778	mov	r12,r15
2779	and	r12,3
2780	mov	r13,r15
2781	and	r13,-4
2782	mov	r14,r9
2783	shrd	r15,r9,2
2784	shr	r9,2
2785	add	r15,r13
2786	adc	r9,r14
2787	add	r10,r15
2788	adc	r11,r9
2789	adc	r12,0
2790
2791	lea	rdi,[16+rdi]
2792	dec	rcx
2793	jg	NEAR $L$seal_sse_main_rounds
2794	paddd	xmm3,XMMWORD[$L$chacha20_consts]
2795	paddd	xmm7,XMMWORD[((160+48))+rbp]
2796	paddd	xmm11,XMMWORD[((160+64))+rbp]
2797	paddd	xmm15,XMMWORD[((160+144))+rbp]
2798	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2799	paddd	xmm6,XMMWORD[((160+48))+rbp]
2800	paddd	xmm10,XMMWORD[((160+64))+rbp]
2801	paddd	xmm14,XMMWORD[((160+128))+rbp]
2802	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2803	paddd	xmm5,XMMWORD[((160+48))+rbp]
2804	paddd	xmm9,XMMWORD[((160+64))+rbp]
2805	paddd	xmm13,XMMWORD[((160+112))+rbp]
2806	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2807	paddd	xmm4,XMMWORD[((160+48))+rbp]
2808	paddd	xmm8,XMMWORD[((160+64))+rbp]
2809	paddd	xmm12,XMMWORD[((160+96))+rbp]
2810
2811	movdqa	XMMWORD[(160+80)+rbp],xmm14
2812	movdqa	XMMWORD[(160+80)+rbp],xmm14
2813	movdqu	xmm14,XMMWORD[((0 + 0))+rsi]
2814	pxor	xmm14,xmm3
2815	movdqu	XMMWORD[(0 + 0)+rdi],xmm14
2816	movdqu	xmm14,XMMWORD[((16 + 0))+rsi]
2817	pxor	xmm14,xmm7
2818	movdqu	XMMWORD[(16 + 0)+rdi],xmm14
2819	movdqu	xmm14,XMMWORD[((32 + 0))+rsi]
2820	pxor	xmm14,xmm11
2821	movdqu	XMMWORD[(32 + 0)+rdi],xmm14
2822	movdqu	xmm14,XMMWORD[((48 + 0))+rsi]
2823	pxor	xmm14,xmm15
2824	movdqu	XMMWORD[(48 + 0)+rdi],xmm14
2825
2826	movdqa	xmm14,XMMWORD[((160+80))+rbp]
2827	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
2828	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
2829	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
2830	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
2831	pxor	xmm2,xmm3
2832	pxor	xmm6,xmm7
2833	pxor	xmm10,xmm11
2834	pxor	xmm15,xmm14
2835	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
2836	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
2837	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
2838	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
2839	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
2840	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
2841	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
2842	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
2843	pxor	xmm1,xmm3
2844	pxor	xmm5,xmm7
2845	pxor	xmm9,xmm11
2846	pxor	xmm15,xmm13
2847	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
2848	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
2849	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
2850	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
2851
2852	cmp	rbx,16*16
2853	ja	NEAR $L$seal_sse_main_loop_xor
2854
2855	mov	rcx,12*16
2856	sub	rbx,12*16
2857	lea	rsi,[192+rsi]
2858	jmp	NEAR $L$seal_sse_128_tail_hash
2859$L$seal_sse_main_loop_xor:
2860	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
2861	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
2862	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
2863	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
2864	pxor	xmm0,xmm3
2865	pxor	xmm4,xmm7
2866	pxor	xmm8,xmm11
2867	pxor	xmm15,xmm12
2868	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
2869	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
2870	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
2871	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
2872
2873	lea	rsi,[256+rsi]
2874	sub	rbx,16*16
2875	mov	rcx,6
2876	mov	r8,4
2877	cmp	rbx,12*16
2878	jg	NEAR $L$seal_sse_main_loop
2879	mov	rcx,rbx
2880	test	rbx,rbx
2881	je	NEAR $L$seal_sse_128_tail_hash
2882	mov	rcx,6
2883	cmp	rbx,8*16
2884	ja	NEAR $L$seal_sse_tail_192
2885	cmp	rbx,4*16
2886	ja	NEAR $L$seal_sse_tail_128
2887
2888$L$seal_sse_tail_64:
2889	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2890	movdqa	xmm4,XMMWORD[((160+48))+rbp]
2891	movdqa	xmm8,XMMWORD[((160+64))+rbp]
2892	movdqa	xmm12,XMMWORD[((160+96))+rbp]
2893	paddd	xmm12,XMMWORD[$L$sse_inc]
2894	movdqa	XMMWORD[(160+96)+rbp],xmm12
2895
2896$L$seal_sse_tail_64_rounds_and_x2hash:
2897	add	r10,QWORD[((0+0))+rdi]
2898	adc	r11,QWORD[((8+0))+rdi]
2899	adc	r12,1
2900	mov	rax,QWORD[((0+160+0))+rbp]
2901	mov	r15,rax
2902	mul	r10
2903	mov	r13,rax
2904	mov	r14,rdx
2905	mov	rax,QWORD[((0+160+0))+rbp]
2906	mul	r11
2907	imul	r15,r12
2908	add	r14,rax
2909	adc	r15,rdx
2910	mov	rax,QWORD[((8+160+0))+rbp]
2911	mov	r9,rax
2912	mul	r10
2913	add	r14,rax
2914	adc	rdx,0
2915	mov	r10,rdx
2916	mov	rax,QWORD[((8+160+0))+rbp]
2917	mul	r11
2918	add	r15,rax
2919	adc	rdx,0
2920	imul	r9,r12
2921	add	r15,r10
2922	adc	r9,rdx
2923	mov	r10,r13
2924	mov	r11,r14
2925	mov	r12,r15
2926	and	r12,3
2927	mov	r13,r15
2928	and	r13,-4
2929	mov	r14,r9
2930	shrd	r15,r9,2
2931	shr	r9,2
2932	add	r15,r13
2933	adc	r9,r14
2934	add	r10,r15
2935	adc	r11,r9
2936	adc	r12,0
2937
2938	lea	rdi,[16+rdi]
2939$L$seal_sse_tail_64_rounds_and_x1hash:
2940	paddd	xmm0,xmm4
2941	pxor	xmm12,xmm0
2942	pshufb	xmm12,XMMWORD[$L$rol16]
2943	paddd	xmm8,xmm12
2944	pxor	xmm4,xmm8
2945	movdqa	xmm3,xmm4
2946	pslld	xmm3,12
2947	psrld	xmm4,20
2948	pxor	xmm4,xmm3
2949	paddd	xmm0,xmm4
2950	pxor	xmm12,xmm0
2951	pshufb	xmm12,XMMWORD[$L$rol8]
2952	paddd	xmm8,xmm12
2953	pxor	xmm4,xmm8
2954	movdqa	xmm3,xmm4
2955	pslld	xmm3,7
2956	psrld	xmm4,25
2957	pxor	xmm4,xmm3
2958DB	102,15,58,15,228,4
2959DB	102,69,15,58,15,192,8
2960DB	102,69,15,58,15,228,12
2961	paddd	xmm0,xmm4
2962	pxor	xmm12,xmm0
2963	pshufb	xmm12,XMMWORD[$L$rol16]
2964	paddd	xmm8,xmm12
2965	pxor	xmm4,xmm8
2966	movdqa	xmm3,xmm4
2967	pslld	xmm3,12
2968	psrld	xmm4,20
2969	pxor	xmm4,xmm3
2970	paddd	xmm0,xmm4
2971	pxor	xmm12,xmm0
2972	pshufb	xmm12,XMMWORD[$L$rol8]
2973	paddd	xmm8,xmm12
2974	pxor	xmm4,xmm8
2975	movdqa	xmm3,xmm4
2976	pslld	xmm3,7
2977	psrld	xmm4,25
2978	pxor	xmm4,xmm3
2979DB	102,15,58,15,228,12
2980DB	102,69,15,58,15,192,8
2981DB	102,69,15,58,15,228,4
2982	add	r10,QWORD[((0+0))+rdi]
2983	adc	r11,QWORD[((8+0))+rdi]
2984	adc	r12,1
2985	mov	rax,QWORD[((0+160+0))+rbp]
2986	mov	r15,rax
2987	mul	r10
2988	mov	r13,rax
2989	mov	r14,rdx
2990	mov	rax,QWORD[((0+160+0))+rbp]
2991	mul	r11
2992	imul	r15,r12
2993	add	r14,rax
2994	adc	r15,rdx
2995	mov	rax,QWORD[((8+160+0))+rbp]
2996	mov	r9,rax
2997	mul	r10
2998	add	r14,rax
2999	adc	rdx,0
3000	mov	r10,rdx
3001	mov	rax,QWORD[((8+160+0))+rbp]
3002	mul	r11
3003	add	r15,rax
3004	adc	rdx,0
3005	imul	r9,r12
3006	add	r15,r10
3007	adc	r9,rdx
3008	mov	r10,r13
3009	mov	r11,r14
3010	mov	r12,r15
3011	and	r12,3
3012	mov	r13,r15
3013	and	r13,-4
3014	mov	r14,r9
3015	shrd	r15,r9,2
3016	shr	r9,2
3017	add	r15,r13
3018	adc	r9,r14
3019	add	r10,r15
3020	adc	r11,r9
3021	adc	r12,0
3022
3023	lea	rdi,[16+rdi]
3024	dec	rcx
3025	jg	NEAR $L$seal_sse_tail_64_rounds_and_x2hash
3026	dec	r8
3027	jge	NEAR $L$seal_sse_tail_64_rounds_and_x1hash
3028	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3029	paddd	xmm4,XMMWORD[((160+48))+rbp]
3030	paddd	xmm8,XMMWORD[((160+64))+rbp]
3031	paddd	xmm12,XMMWORD[((160+96))+rbp]
3032
3033	jmp	NEAR $L$seal_sse_128_tail_xor
3034
3035$L$seal_sse_tail_128:
3036	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
3037	movdqa	xmm4,XMMWORD[((160+48))+rbp]
3038	movdqa	xmm8,XMMWORD[((160+64))+rbp]
3039	movdqa	xmm1,xmm0
3040	movdqa	xmm5,xmm4
3041	movdqa	xmm9,xmm8
3042	movdqa	xmm13,XMMWORD[((160+96))+rbp]
3043	paddd	xmm13,XMMWORD[$L$sse_inc]
3044	movdqa	xmm12,xmm13
3045	paddd	xmm12,XMMWORD[$L$sse_inc]
3046	movdqa	XMMWORD[(160+96)+rbp],xmm12
3047	movdqa	XMMWORD[(160+112)+rbp],xmm13
3048
3049$L$seal_sse_tail_128_rounds_and_x2hash:
3050	add	r10,QWORD[((0+0))+rdi]
3051	adc	r11,QWORD[((8+0))+rdi]
3052	adc	r12,1
3053	mov	rax,QWORD[((0+160+0))+rbp]
3054	mov	r15,rax
3055	mul	r10
3056	mov	r13,rax
3057	mov	r14,rdx
3058	mov	rax,QWORD[((0+160+0))+rbp]
3059	mul	r11
3060	imul	r15,r12
3061	add	r14,rax
3062	adc	r15,rdx
3063	mov	rax,QWORD[((8+160+0))+rbp]
3064	mov	r9,rax
3065	mul	r10
3066	add	r14,rax
3067	adc	rdx,0
3068	mov	r10,rdx
3069	mov	rax,QWORD[((8+160+0))+rbp]
3070	mul	r11
3071	add	r15,rax
3072	adc	rdx,0
3073	imul	r9,r12
3074	add	r15,r10
3075	adc	r9,rdx
3076	mov	r10,r13
3077	mov	r11,r14
3078	mov	r12,r15
3079	and	r12,3
3080	mov	r13,r15
3081	and	r13,-4
3082	mov	r14,r9
3083	shrd	r15,r9,2
3084	shr	r9,2
3085	add	r15,r13
3086	adc	r9,r14
3087	add	r10,r15
3088	adc	r11,r9
3089	adc	r12,0
3090
3091	lea	rdi,[16+rdi]
3092$L$seal_sse_tail_128_rounds_and_x1hash:
3093	paddd	xmm0,xmm4
3094	pxor	xmm12,xmm0
3095	pshufb	xmm12,XMMWORD[$L$rol16]
3096	paddd	xmm8,xmm12
3097	pxor	xmm4,xmm8
3098	movdqa	xmm3,xmm4
3099	pslld	xmm3,12
3100	psrld	xmm4,20
3101	pxor	xmm4,xmm3
3102	paddd	xmm0,xmm4
3103	pxor	xmm12,xmm0
3104	pshufb	xmm12,XMMWORD[$L$rol8]
3105	paddd	xmm8,xmm12
3106	pxor	xmm4,xmm8
3107	movdqa	xmm3,xmm4
3108	pslld	xmm3,7
3109	psrld	xmm4,25
3110	pxor	xmm4,xmm3
3111DB	102,15,58,15,228,4
3112DB	102,69,15,58,15,192,8
3113DB	102,69,15,58,15,228,12
3114	paddd	xmm1,xmm5
3115	pxor	xmm13,xmm1
3116	pshufb	xmm13,XMMWORD[$L$rol16]
3117	paddd	xmm9,xmm13
3118	pxor	xmm5,xmm9
3119	movdqa	xmm3,xmm5
3120	pslld	xmm3,12
3121	psrld	xmm5,20
3122	pxor	xmm5,xmm3
3123	paddd	xmm1,xmm5
3124	pxor	xmm13,xmm1
3125	pshufb	xmm13,XMMWORD[$L$rol8]
3126	paddd	xmm9,xmm13
3127	pxor	xmm5,xmm9
3128	movdqa	xmm3,xmm5
3129	pslld	xmm3,7
3130	psrld	xmm5,25
3131	pxor	xmm5,xmm3
3132DB	102,15,58,15,237,4
3133DB	102,69,15,58,15,201,8
3134DB	102,69,15,58,15,237,12
3135	add	r10,QWORD[((0+0))+rdi]
3136	adc	r11,QWORD[((8+0))+rdi]
3137	adc	r12,1
3138	mov	rax,QWORD[((0+160+0))+rbp]
3139	mov	r15,rax
3140	mul	r10
3141	mov	r13,rax
3142	mov	r14,rdx
3143	mov	rax,QWORD[((0+160+0))+rbp]
3144	mul	r11
3145	imul	r15,r12
3146	add	r14,rax
3147	adc	r15,rdx
3148	mov	rax,QWORD[((8+160+0))+rbp]
3149	mov	r9,rax
3150	mul	r10
3151	add	r14,rax
3152	adc	rdx,0
3153	mov	r10,rdx
3154	mov	rax,QWORD[((8+160+0))+rbp]
3155	mul	r11
3156	add	r15,rax
3157	adc	rdx,0
3158	imul	r9,r12
3159	add	r15,r10
3160	adc	r9,rdx
3161	mov	r10,r13
3162	mov	r11,r14
3163	mov	r12,r15
3164	and	r12,3
3165	mov	r13,r15
3166	and	r13,-4
3167	mov	r14,r9
3168	shrd	r15,r9,2
3169	shr	r9,2
3170	add	r15,r13
3171	adc	r9,r14
3172	add	r10,r15
3173	adc	r11,r9
3174	adc	r12,0
3175	paddd	xmm0,xmm4
3176	pxor	xmm12,xmm0
3177	pshufb	xmm12,XMMWORD[$L$rol16]
3178	paddd	xmm8,xmm12
3179	pxor	xmm4,xmm8
3180	movdqa	xmm3,xmm4
3181	pslld	xmm3,12
3182	psrld	xmm4,20
3183	pxor	xmm4,xmm3
3184	paddd	xmm0,xmm4
3185	pxor	xmm12,xmm0
3186	pshufb	xmm12,XMMWORD[$L$rol8]
3187	paddd	xmm8,xmm12
3188	pxor	xmm4,xmm8
3189	movdqa	xmm3,xmm4
3190	pslld	xmm3,7
3191	psrld	xmm4,25
3192	pxor	xmm4,xmm3
3193DB	102,15,58,15,228,12
3194DB	102,69,15,58,15,192,8
3195DB	102,69,15,58,15,228,4
3196	paddd	xmm1,xmm5
3197	pxor	xmm13,xmm1
3198	pshufb	xmm13,XMMWORD[$L$rol16]
3199	paddd	xmm9,xmm13
3200	pxor	xmm5,xmm9
3201	movdqa	xmm3,xmm5
3202	pslld	xmm3,12
3203	psrld	xmm5,20
3204	pxor	xmm5,xmm3
3205	paddd	xmm1,xmm5
3206	pxor	xmm13,xmm1
3207	pshufb	xmm13,XMMWORD[$L$rol8]
3208	paddd	xmm9,xmm13
3209	pxor	xmm5,xmm9
3210	movdqa	xmm3,xmm5
3211	pslld	xmm3,7
3212	psrld	xmm5,25
3213	pxor	xmm5,xmm3
3214DB	102,15,58,15,237,12
3215DB	102,69,15,58,15,201,8
3216DB	102,69,15,58,15,237,4
3217
3218	lea	rdi,[16+rdi]
3219	dec	rcx
3220	jg	NEAR $L$seal_sse_tail_128_rounds_and_x2hash
3221	dec	r8
3222	jge	NEAR $L$seal_sse_tail_128_rounds_and_x1hash
3223	paddd	xmm1,XMMWORD[$L$chacha20_consts]
3224	paddd	xmm5,XMMWORD[((160+48))+rbp]
3225	paddd	xmm9,XMMWORD[((160+64))+rbp]
3226	paddd	xmm13,XMMWORD[((160+112))+rbp]
3227	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3228	paddd	xmm4,XMMWORD[((160+48))+rbp]
3229	paddd	xmm8,XMMWORD[((160+64))+rbp]
3230	paddd	xmm12,XMMWORD[((160+96))+rbp]
3231	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
3232	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
3233	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
3234	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
3235	pxor	xmm1,xmm3
3236	pxor	xmm5,xmm7
3237	pxor	xmm9,xmm11
3238	pxor	xmm15,xmm13
3239	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
3240	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
3241	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
3242	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
3243
3244	mov	rcx,4*16
3245	sub	rbx,4*16
3246	lea	rsi,[64+rsi]
3247	jmp	NEAR $L$seal_sse_128_tail_hash
3248
3249$L$seal_sse_tail_192:
3250	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
3251	movdqa	xmm4,XMMWORD[((160+48))+rbp]
3252	movdqa	xmm8,XMMWORD[((160+64))+rbp]
3253	movdqa	xmm1,xmm0
3254	movdqa	xmm5,xmm4
3255	movdqa	xmm9,xmm8
3256	movdqa	xmm2,xmm0
3257	movdqa	xmm6,xmm4
3258	movdqa	xmm10,xmm8
3259	movdqa	xmm14,XMMWORD[((160+96))+rbp]
3260	paddd	xmm14,XMMWORD[$L$sse_inc]
3261	movdqa	xmm13,xmm14
3262	paddd	xmm13,XMMWORD[$L$sse_inc]
3263	movdqa	xmm12,xmm13
3264	paddd	xmm12,XMMWORD[$L$sse_inc]
3265	movdqa	XMMWORD[(160+96)+rbp],xmm12
3266	movdqa	XMMWORD[(160+112)+rbp],xmm13
3267	movdqa	XMMWORD[(160+128)+rbp],xmm14
3268
3269$L$seal_sse_tail_192_rounds_and_x2hash:
3270	add	r10,QWORD[((0+0))+rdi]
3271	adc	r11,QWORD[((8+0))+rdi]
3272	adc	r12,1
3273	mov	rax,QWORD[((0+160+0))+rbp]
3274	mov	r15,rax
3275	mul	r10
3276	mov	r13,rax
3277	mov	r14,rdx
3278	mov	rax,QWORD[((0+160+0))+rbp]
3279	mul	r11
3280	imul	r15,r12
3281	add	r14,rax
3282	adc	r15,rdx
3283	mov	rax,QWORD[((8+160+0))+rbp]
3284	mov	r9,rax
3285	mul	r10
3286	add	r14,rax
3287	adc	rdx,0
3288	mov	r10,rdx
3289	mov	rax,QWORD[((8+160+0))+rbp]
3290	mul	r11
3291	add	r15,rax
3292	adc	rdx,0
3293	imul	r9,r12
3294	add	r15,r10
3295	adc	r9,rdx
3296	mov	r10,r13
3297	mov	r11,r14
3298	mov	r12,r15
3299	and	r12,3
3300	mov	r13,r15
3301	and	r13,-4
3302	mov	r14,r9
3303	shrd	r15,r9,2
3304	shr	r9,2
3305	add	r15,r13
3306	adc	r9,r14
3307	add	r10,r15
3308	adc	r11,r9
3309	adc	r12,0
3310
3311	lea	rdi,[16+rdi]
3312$L$seal_sse_tail_192_rounds_and_x1hash:
3313	paddd	xmm0,xmm4
3314	pxor	xmm12,xmm0
3315	pshufb	xmm12,XMMWORD[$L$rol16]
3316	paddd	xmm8,xmm12
3317	pxor	xmm4,xmm8
3318	movdqa	xmm3,xmm4
3319	pslld	xmm3,12
3320	psrld	xmm4,20
3321	pxor	xmm4,xmm3
3322	paddd	xmm0,xmm4
3323	pxor	xmm12,xmm0
3324	pshufb	xmm12,XMMWORD[$L$rol8]
3325	paddd	xmm8,xmm12
3326	pxor	xmm4,xmm8
3327	movdqa	xmm3,xmm4
3328	pslld	xmm3,7
3329	psrld	xmm4,25
3330	pxor	xmm4,xmm3
3331DB	102,15,58,15,228,4
3332DB	102,69,15,58,15,192,8
3333DB	102,69,15,58,15,228,12
3334	paddd	xmm1,xmm5
3335	pxor	xmm13,xmm1
3336	pshufb	xmm13,XMMWORD[$L$rol16]
3337	paddd	xmm9,xmm13
3338	pxor	xmm5,xmm9
3339	movdqa	xmm3,xmm5
3340	pslld	xmm3,12
3341	psrld	xmm5,20
3342	pxor	xmm5,xmm3
3343	paddd	xmm1,xmm5
3344	pxor	xmm13,xmm1
3345	pshufb	xmm13,XMMWORD[$L$rol8]
3346	paddd	xmm9,xmm13
3347	pxor	xmm5,xmm9
3348	movdqa	xmm3,xmm5
3349	pslld	xmm3,7
3350	psrld	xmm5,25
3351	pxor	xmm5,xmm3
3352DB	102,15,58,15,237,4
3353DB	102,69,15,58,15,201,8
3354DB	102,69,15,58,15,237,12
3355	paddd	xmm2,xmm6
3356	pxor	xmm14,xmm2
3357	pshufb	xmm14,XMMWORD[$L$rol16]
3358	paddd	xmm10,xmm14
3359	pxor	xmm6,xmm10
3360	movdqa	xmm3,xmm6
3361	pslld	xmm3,12
3362	psrld	xmm6,20
3363	pxor	xmm6,xmm3
3364	paddd	xmm2,xmm6
3365	pxor	xmm14,xmm2
3366	pshufb	xmm14,XMMWORD[$L$rol8]
3367	paddd	xmm10,xmm14
3368	pxor	xmm6,xmm10
3369	movdqa	xmm3,xmm6
3370	pslld	xmm3,7
3371	psrld	xmm6,25
3372	pxor	xmm6,xmm3
3373DB	102,15,58,15,246,4
3374DB	102,69,15,58,15,210,8
3375DB	102,69,15,58,15,246,12
3376	add	r10,QWORD[((0+0))+rdi]
3377	adc	r11,QWORD[((8+0))+rdi]
3378	adc	r12,1
3379	mov	rax,QWORD[((0+160+0))+rbp]
3380	mov	r15,rax
3381	mul	r10
3382	mov	r13,rax
3383	mov	r14,rdx
3384	mov	rax,QWORD[((0+160+0))+rbp]
3385	mul	r11
3386	imul	r15,r12
3387	add	r14,rax
3388	adc	r15,rdx
3389	mov	rax,QWORD[((8+160+0))+rbp]
3390	mov	r9,rax
3391	mul	r10
3392	add	r14,rax
3393	adc	rdx,0
3394	mov	r10,rdx
3395	mov	rax,QWORD[((8+160+0))+rbp]
3396	mul	r11
3397	add	r15,rax
3398	adc	rdx,0
3399	imul	r9,r12
3400	add	r15,r10
3401	adc	r9,rdx
3402	mov	r10,r13
3403	mov	r11,r14
3404	mov	r12,r15
3405	and	r12,3
3406	mov	r13,r15
3407	and	r13,-4
3408	mov	r14,r9
3409	shrd	r15,r9,2
3410	shr	r9,2
3411	add	r15,r13
3412	adc	r9,r14
3413	add	r10,r15
3414	adc	r11,r9
3415	adc	r12,0
3416	paddd	xmm0,xmm4
3417	pxor	xmm12,xmm0
3418	pshufb	xmm12,XMMWORD[$L$rol16]
3419	paddd	xmm8,xmm12
3420	pxor	xmm4,xmm8
3421	movdqa	xmm3,xmm4
3422	pslld	xmm3,12
3423	psrld	xmm4,20
3424	pxor	xmm4,xmm3
3425	paddd	xmm0,xmm4
3426	pxor	xmm12,xmm0
3427	pshufb	xmm12,XMMWORD[$L$rol8]
3428	paddd	xmm8,xmm12
3429	pxor	xmm4,xmm8
3430	movdqa	xmm3,xmm4
3431	pslld	xmm3,7
3432	psrld	xmm4,25
3433	pxor	xmm4,xmm3
3434DB	102,15,58,15,228,12
3435DB	102,69,15,58,15,192,8
3436DB	102,69,15,58,15,228,4
3437	paddd	xmm1,xmm5
3438	pxor	xmm13,xmm1
3439	pshufb	xmm13,XMMWORD[$L$rol16]
3440	paddd	xmm9,xmm13
3441	pxor	xmm5,xmm9
3442	movdqa	xmm3,xmm5
3443	pslld	xmm3,12
3444	psrld	xmm5,20
3445	pxor	xmm5,xmm3
3446	paddd	xmm1,xmm5
3447	pxor	xmm13,xmm1
3448	pshufb	xmm13,XMMWORD[$L$rol8]
3449	paddd	xmm9,xmm13
3450	pxor	xmm5,xmm9
3451	movdqa	xmm3,xmm5
3452	pslld	xmm3,7
3453	psrld	xmm5,25
3454	pxor	xmm5,xmm3
3455DB	102,15,58,15,237,12
3456DB	102,69,15,58,15,201,8
3457DB	102,69,15,58,15,237,4
3458	paddd	xmm2,xmm6
3459	pxor	xmm14,xmm2
3460	pshufb	xmm14,XMMWORD[$L$rol16]
3461	paddd	xmm10,xmm14
3462	pxor	xmm6,xmm10
3463	movdqa	xmm3,xmm6
3464	pslld	xmm3,12
3465	psrld	xmm6,20
3466	pxor	xmm6,xmm3
3467	paddd	xmm2,xmm6
3468	pxor	xmm14,xmm2
3469	pshufb	xmm14,XMMWORD[$L$rol8]
3470	paddd	xmm10,xmm14
3471	pxor	xmm6,xmm10
3472	movdqa	xmm3,xmm6
3473	pslld	xmm3,7
3474	psrld	xmm6,25
3475	pxor	xmm6,xmm3
3476DB	102,15,58,15,246,12
3477DB	102,69,15,58,15,210,8
3478DB	102,69,15,58,15,246,4
3479
3480	lea	rdi,[16+rdi]
3481	dec	rcx
3482	jg	NEAR $L$seal_sse_tail_192_rounds_and_x2hash
3483	dec	r8
3484	jge	NEAR $L$seal_sse_tail_192_rounds_and_x1hash
3485	paddd	xmm2,XMMWORD[$L$chacha20_consts]
3486	paddd	xmm6,XMMWORD[((160+48))+rbp]
3487	paddd	xmm10,XMMWORD[((160+64))+rbp]
3488	paddd	xmm14,XMMWORD[((160+128))+rbp]
3489	paddd	xmm1,XMMWORD[$L$chacha20_consts]
3490	paddd	xmm5,XMMWORD[((160+48))+rbp]
3491	paddd	xmm9,XMMWORD[((160+64))+rbp]
3492	paddd	xmm13,XMMWORD[((160+112))+rbp]
3493	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3494	paddd	xmm4,XMMWORD[((160+48))+rbp]
3495	paddd	xmm8,XMMWORD[((160+64))+rbp]
3496	paddd	xmm12,XMMWORD[((160+96))+rbp]
3497	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
3498	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
3499	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
3500	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
3501	pxor	xmm2,xmm3
3502	pxor	xmm6,xmm7
3503	pxor	xmm10,xmm11
3504	pxor	xmm15,xmm14
3505	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
3506	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
3507	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
3508	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
3509	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
3510	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
3511	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
3512	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
3513	pxor	xmm1,xmm3
3514	pxor	xmm5,xmm7
3515	pxor	xmm9,xmm11
3516	pxor	xmm15,xmm13
3517	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
3518	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
3519	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
3520	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
3521
3522	mov	rcx,8*16
3523	sub	rbx,8*16
3524	lea	rsi,[128+rsi]
3525
3526$L$seal_sse_128_tail_hash:
3527	cmp	rcx,16
3528	jb	NEAR $L$seal_sse_128_tail_xor
3529	add	r10,QWORD[((0+0))+rdi]
3530	adc	r11,QWORD[((8+0))+rdi]
3531	adc	r12,1
3532	mov	rax,QWORD[((0+160+0))+rbp]
3533	mov	r15,rax
3534	mul	r10
3535	mov	r13,rax
3536	mov	r14,rdx
3537	mov	rax,QWORD[((0+160+0))+rbp]
3538	mul	r11
3539	imul	r15,r12
3540	add	r14,rax
3541	adc	r15,rdx
3542	mov	rax,QWORD[((8+160+0))+rbp]
3543	mov	r9,rax
3544	mul	r10
3545	add	r14,rax
3546	adc	rdx,0
3547	mov	r10,rdx
3548	mov	rax,QWORD[((8+160+0))+rbp]
3549	mul	r11
3550	add	r15,rax
3551	adc	rdx,0
3552	imul	r9,r12
3553	add	r15,r10
3554	adc	r9,rdx
3555	mov	r10,r13
3556	mov	r11,r14
3557	mov	r12,r15
3558	and	r12,3
3559	mov	r13,r15
3560	and	r13,-4
3561	mov	r14,r9
3562	shrd	r15,r9,2
3563	shr	r9,2
3564	add	r15,r13
3565	adc	r9,r14
3566	add	r10,r15
3567	adc	r11,r9
3568	adc	r12,0
3569
3570	sub	rcx,16
3571	lea	rdi,[16+rdi]
3572	jmp	NEAR $L$seal_sse_128_tail_hash
3573
3574$L$seal_sse_128_tail_xor:
3575	cmp	rbx,16
3576	jb	NEAR $L$seal_sse_tail_16
3577	sub	rbx,16
3578
3579	movdqu	xmm3,XMMWORD[rsi]
3580	pxor	xmm0,xmm3
3581	movdqu	XMMWORD[rdi],xmm0
3582
3583	add	r10,QWORD[rdi]
3584	adc	r11,QWORD[8+rdi]
3585	adc	r12,1
3586	lea	rsi,[16+rsi]
3587	lea	rdi,[16+rdi]
3588	mov	rax,QWORD[((0+160+0))+rbp]
3589	mov	r15,rax
3590	mul	r10
3591	mov	r13,rax
3592	mov	r14,rdx
3593	mov	rax,QWORD[((0+160+0))+rbp]
3594	mul	r11
3595	imul	r15,r12
3596	add	r14,rax
3597	adc	r15,rdx
3598	mov	rax,QWORD[((8+160+0))+rbp]
3599	mov	r9,rax
3600	mul	r10
3601	add	r14,rax
3602	adc	rdx,0
3603	mov	r10,rdx
3604	mov	rax,QWORD[((8+160+0))+rbp]
3605	mul	r11
3606	add	r15,rax
3607	adc	rdx,0
3608	imul	r9,r12
3609	add	r15,r10
3610	adc	r9,rdx
3611	mov	r10,r13
3612	mov	r11,r14
3613	mov	r12,r15
3614	and	r12,3
3615	mov	r13,r15
3616	and	r13,-4
3617	mov	r14,r9
3618	shrd	r15,r9,2
3619	shr	r9,2
3620	add	r15,r13
3621	adc	r9,r14
3622	add	r10,r15
3623	adc	r11,r9
3624	adc	r12,0
3625
3626
3627	movdqa	xmm0,xmm4
3628	movdqa	xmm4,xmm8
3629	movdqa	xmm8,xmm12
3630	movdqa	xmm12,xmm1
3631	movdqa	xmm1,xmm5
3632	movdqa	xmm5,xmm9
3633	movdqa	xmm9,xmm13
3634	jmp	NEAR $L$seal_sse_128_tail_xor
3635
3636$L$seal_sse_tail_16:
3637	test	rbx,rbx
3638	jz	NEAR $L$process_blocks_of_extra_in
3639
3640	mov	r8,rbx
3641	mov	rcx,rbx
3642	lea	rsi,[((-1))+rbx*1+rsi]
3643	pxor	xmm15,xmm15
3644$L$seal_sse_tail_16_compose:
3645	pslldq	xmm15,1
3646	pinsrb	xmm15,BYTE[rsi],0
3647	lea	rsi,[((-1))+rsi]
3648	dec	rcx
3649	jne	NEAR $L$seal_sse_tail_16_compose
3650
3651
3652	pxor	xmm15,xmm0
3653
3654
3655	mov	rcx,rbx
3656	movdqu	xmm0,xmm15
3657$L$seal_sse_tail_16_extract:
3658	pextrb	XMMWORD[rdi],xmm0,0
3659	psrldq	xmm0,1
3660	add	rdi,1
3661	sub	rcx,1
3662	jnz	NEAR $L$seal_sse_tail_16_extract
3663
3664
3665
3666
3667
3668
3669
3670
3671	mov	r9,QWORD[((288 + 160 + 32))+rsp]
3672	mov	r14,QWORD[56+r9]
3673	mov	r13,QWORD[48+r9]
3674	test	r14,r14
3675	jz	NEAR $L$process_partial_block
3676
3677	mov	r15,16
3678	sub	r15,rbx
3679	cmp	r14,r15
3680
3681	jge	NEAR $L$load_extra_in
3682	mov	r15,r14
3683
3684$L$load_extra_in:
3685
3686
3687	lea	rsi,[((-1))+r15*1+r13]
3688
3689
3690	add	r13,r15
3691	sub	r14,r15
3692	mov	QWORD[48+r9],r13
3693	mov	QWORD[56+r9],r14
3694
3695
3696
3697	add	r8,r15
3698
3699
3700	pxor	xmm11,xmm11
3701$L$load_extra_load_loop:
3702	pslldq	xmm11,1
3703	pinsrb	xmm11,BYTE[rsi],0
3704	lea	rsi,[((-1))+rsi]
3705	sub	r15,1
3706	jnz	NEAR $L$load_extra_load_loop
3707
3708
3709
3710
3711	mov	r15,rbx
3712
3713$L$load_extra_shift_loop:
3714	pslldq	xmm11,1
3715	sub	r15,1
3716	jnz	NEAR $L$load_extra_shift_loop
3717
3718
3719
3720
3721	lea	r15,[$L$and_masks]
3722	shl	rbx,4
3723	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
3724
3725
3726	por	xmm15,xmm11
3727
3728
3729
3730DB	102,77,15,126,253
3731	pextrq	r14,xmm15,1
3732	add	r10,r13
3733	adc	r11,r14
3734	adc	r12,1
3735	mov	rax,QWORD[((0+160+0))+rbp]
3736	mov	r15,rax
3737	mul	r10
3738	mov	r13,rax
3739	mov	r14,rdx
3740	mov	rax,QWORD[((0+160+0))+rbp]
3741	mul	r11
3742	imul	r15,r12
3743	add	r14,rax
3744	adc	r15,rdx
3745	mov	rax,QWORD[((8+160+0))+rbp]
3746	mov	r9,rax
3747	mul	r10
3748	add	r14,rax
3749	adc	rdx,0
3750	mov	r10,rdx
3751	mov	rax,QWORD[((8+160+0))+rbp]
3752	mul	r11
3753	add	r15,rax
3754	adc	rdx,0
3755	imul	r9,r12
3756	add	r15,r10
3757	adc	r9,rdx
3758	mov	r10,r13
3759	mov	r11,r14
3760	mov	r12,r15
3761	and	r12,3
3762	mov	r13,r15
3763	and	r13,-4
3764	mov	r14,r9
3765	shrd	r15,r9,2
3766	shr	r9,2
3767	add	r15,r13
3768	adc	r9,r14
3769	add	r10,r15
3770	adc	r11,r9
3771	adc	r12,0
3772
3773
3774$L$process_blocks_of_extra_in:
3775
3776	mov	r9,QWORD[((288+32+160 ))+rsp]
3777	mov	rsi,QWORD[48+r9]
3778	mov	r8,QWORD[56+r9]
3779	mov	rcx,r8
3780	shr	r8,4
3781
3782$L$process_extra_hash_loop:
3783	jz	NEAR process_extra_in_trailer
3784	add	r10,QWORD[((0+0))+rsi]
3785	adc	r11,QWORD[((8+0))+rsi]
3786	adc	r12,1
3787	mov	rax,QWORD[((0+160+0))+rbp]
3788	mov	r15,rax
3789	mul	r10
3790	mov	r13,rax
3791	mov	r14,rdx
3792	mov	rax,QWORD[((0+160+0))+rbp]
3793	mul	r11
3794	imul	r15,r12
3795	add	r14,rax
3796	adc	r15,rdx
3797	mov	rax,QWORD[((8+160+0))+rbp]
3798	mov	r9,rax
3799	mul	r10
3800	add	r14,rax
3801	adc	rdx,0
3802	mov	r10,rdx
3803	mov	rax,QWORD[((8+160+0))+rbp]
3804	mul	r11
3805	add	r15,rax
3806	adc	rdx,0
3807	imul	r9,r12
3808	add	r15,r10
3809	adc	r9,rdx
3810	mov	r10,r13
3811	mov	r11,r14
3812	mov	r12,r15
3813	and	r12,3
3814	mov	r13,r15
3815	and	r13,-4
3816	mov	r14,r9
3817	shrd	r15,r9,2
3818	shr	r9,2
3819	add	r15,r13
3820	adc	r9,r14
3821	add	r10,r15
3822	adc	r11,r9
3823	adc	r12,0
3824
3825	lea	rsi,[16+rsi]
3826	sub	r8,1
3827	jmp	NEAR $L$process_extra_hash_loop
3828process_extra_in_trailer:
3829	and	rcx,15
3830	mov	rbx,rcx
3831	jz	NEAR $L$do_length_block
3832	lea	rsi,[((-1))+rcx*1+rsi]
3833
3834$L$process_extra_in_trailer_load:
3835	pslldq	xmm15,1
3836	pinsrb	xmm15,BYTE[rsi],0
3837	lea	rsi,[((-1))+rsi]
3838	sub	rcx,1
3839	jnz	NEAR $L$process_extra_in_trailer_load
3840
3841$L$process_partial_block:
3842
3843	lea	r15,[$L$and_masks]
3844	shl	rbx,4
3845	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
3846DB	102,77,15,126,253
3847	pextrq	r14,xmm15,1
3848	add	r10,r13
3849	adc	r11,r14
3850	adc	r12,1
3851	mov	rax,QWORD[((0+160+0))+rbp]
3852	mov	r15,rax
3853	mul	r10
3854	mov	r13,rax
3855	mov	r14,rdx
3856	mov	rax,QWORD[((0+160+0))+rbp]
3857	mul	r11
3858	imul	r15,r12
3859	add	r14,rax
3860	adc	r15,rdx
3861	mov	rax,QWORD[((8+160+0))+rbp]
3862	mov	r9,rax
3863	mul	r10
3864	add	r14,rax
3865	adc	rdx,0
3866	mov	r10,rdx
3867	mov	rax,QWORD[((8+160+0))+rbp]
3868	mul	r11
3869	add	r15,rax
3870	adc	rdx,0
3871	imul	r9,r12
3872	add	r15,r10
3873	adc	r9,rdx
3874	mov	r10,r13
3875	mov	r11,r14
3876	mov	r12,r15
3877	and	r12,3
3878	mov	r13,r15
3879	and	r13,-4
3880	mov	r14,r9
3881	shrd	r15,r9,2
3882	shr	r9,2
3883	add	r15,r13
3884	adc	r9,r14
3885	add	r10,r15
3886	adc	r11,r9
3887	adc	r12,0
3888
3889
3890$L$do_length_block:
3891	add	r10,QWORD[((0+160+32))+rbp]
3892	adc	r11,QWORD[((8+160+32))+rbp]
3893	adc	r12,1
3894	mov	rax,QWORD[((0+160+0))+rbp]
3895	mov	r15,rax
3896	mul	r10
3897	mov	r13,rax
3898	mov	r14,rdx
3899	mov	rax,QWORD[((0+160+0))+rbp]
3900	mul	r11
3901	imul	r15,r12
3902	add	r14,rax
3903	adc	r15,rdx
3904	mov	rax,QWORD[((8+160+0))+rbp]
3905	mov	r9,rax
3906	mul	r10
3907	add	r14,rax
3908	adc	rdx,0
3909	mov	r10,rdx
3910	mov	rax,QWORD[((8+160+0))+rbp]
3911	mul	r11
3912	add	r15,rax
3913	adc	rdx,0
3914	imul	r9,r12
3915	add	r15,r10
3916	adc	r9,rdx
3917	mov	r10,r13
3918	mov	r11,r14
3919	mov	r12,r15
3920	and	r12,3
3921	mov	r13,r15
3922	and	r13,-4
3923	mov	r14,r9
3924	shrd	r15,r9,2
3925	shr	r9,2
3926	add	r15,r13
3927	adc	r9,r14
3928	add	r10,r15
3929	adc	r11,r9
3930	adc	r12,0
3931
3932
3933	mov	r13,r10
3934	mov	r14,r11
3935	mov	r15,r12
3936	sub	r10,-5
3937	sbb	r11,-1
3938	sbb	r12,3
3939	cmovc	r10,r13
3940	cmovc	r11,r14
3941	cmovc	r12,r15
3942
3943	add	r10,QWORD[((0+160+16))+rbp]
3944	adc	r11,QWORD[((8+160+16))+rbp]
3945
3946	movaps	xmm6,XMMWORD[((0+0))+rbp]
3947	movaps	xmm7,XMMWORD[((16+0))+rbp]
3948	movaps	xmm8,XMMWORD[((32+0))+rbp]
3949	movaps	xmm9,XMMWORD[((48+0))+rbp]
3950	movaps	xmm10,XMMWORD[((64+0))+rbp]
3951	movaps	xmm11,XMMWORD[((80+0))+rbp]
3952	movaps	xmm12,XMMWORD[((96+0))+rbp]
3953	movaps	xmm13,XMMWORD[((112+0))+rbp]
3954	movaps	xmm14,XMMWORD[((128+0))+rbp]
3955	movaps	xmm15,XMMWORD[((144+0))+rbp]
3956
3957
3958	add	rsp,288 + 160 + 32
3959
3960
3961	pop	r9
3962
3963	mov	QWORD[r9],r10
3964	mov	QWORD[8+r9],r11
3965	pop	r15
3966
3967	pop	r14
3968
3969	pop	r13
3970
3971	pop	r12
3972
3973	pop	rbx
3974
3975	pop	rbp
3976
3977	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3978	mov	rsi,QWORD[16+rsp]
3979	ret
3980
3981$L$seal_sse_128:
3982
3983	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
3984	movdqa	xmm1,xmm0
3985	movdqa	xmm2,xmm0
3986	movdqu	xmm4,XMMWORD[r9]
3987	movdqa	xmm5,xmm4
3988	movdqa	xmm6,xmm4
3989	movdqu	xmm8,XMMWORD[16+r9]
3990	movdqa	xmm9,xmm8
3991	movdqa	xmm10,xmm8
3992	movdqu	xmm14,XMMWORD[32+r9]
3993	movdqa	xmm12,xmm14
3994	paddd	xmm12,XMMWORD[$L$sse_inc]
3995	movdqa	xmm13,xmm12
3996	paddd	xmm13,XMMWORD[$L$sse_inc]
3997	movdqa	xmm7,xmm4
3998	movdqa	xmm11,xmm8
3999	movdqa	xmm15,xmm12
4000	mov	r10,10
4001
4002$L$seal_sse_128_rounds:
4003	paddd	xmm0,xmm4
4004	pxor	xmm12,xmm0
4005	pshufb	xmm12,XMMWORD[$L$rol16]
4006	paddd	xmm8,xmm12
4007	pxor	xmm4,xmm8
4008	movdqa	xmm3,xmm4
4009	pslld	xmm3,12
4010	psrld	xmm4,20
4011	pxor	xmm4,xmm3
4012	paddd	xmm0,xmm4
4013	pxor	xmm12,xmm0
4014	pshufb	xmm12,XMMWORD[$L$rol8]
4015	paddd	xmm8,xmm12
4016	pxor	xmm4,xmm8
4017	movdqa	xmm3,xmm4
4018	pslld	xmm3,7
4019	psrld	xmm4,25
4020	pxor	xmm4,xmm3
4021DB	102,15,58,15,228,4
4022DB	102,69,15,58,15,192,8
4023DB	102,69,15,58,15,228,12
4024	paddd	xmm1,xmm5
4025	pxor	xmm13,xmm1
4026	pshufb	xmm13,XMMWORD[$L$rol16]
4027	paddd	xmm9,xmm13
4028	pxor	xmm5,xmm9
4029	movdqa	xmm3,xmm5
4030	pslld	xmm3,12
4031	psrld	xmm5,20
4032	pxor	xmm5,xmm3
4033	paddd	xmm1,xmm5
4034	pxor	xmm13,xmm1
4035	pshufb	xmm13,XMMWORD[$L$rol8]
4036	paddd	xmm9,xmm13
4037	pxor	xmm5,xmm9
4038	movdqa	xmm3,xmm5
4039	pslld	xmm3,7
4040	psrld	xmm5,25
4041	pxor	xmm5,xmm3
4042DB	102,15,58,15,237,4
4043DB	102,69,15,58,15,201,8
4044DB	102,69,15,58,15,237,12
4045	paddd	xmm2,xmm6
4046	pxor	xmm14,xmm2
4047	pshufb	xmm14,XMMWORD[$L$rol16]
4048	paddd	xmm10,xmm14
4049	pxor	xmm6,xmm10
4050	movdqa	xmm3,xmm6
4051	pslld	xmm3,12
4052	psrld	xmm6,20
4053	pxor	xmm6,xmm3
4054	paddd	xmm2,xmm6
4055	pxor	xmm14,xmm2
4056	pshufb	xmm14,XMMWORD[$L$rol8]
4057	paddd	xmm10,xmm14
4058	pxor	xmm6,xmm10
4059	movdqa	xmm3,xmm6
4060	pslld	xmm3,7
4061	psrld	xmm6,25
4062	pxor	xmm6,xmm3
4063DB	102,15,58,15,246,4
4064DB	102,69,15,58,15,210,8
4065DB	102,69,15,58,15,246,12
4066	paddd	xmm0,xmm4
4067	pxor	xmm12,xmm0
4068	pshufb	xmm12,XMMWORD[$L$rol16]
4069	paddd	xmm8,xmm12
4070	pxor	xmm4,xmm8
4071	movdqa	xmm3,xmm4
4072	pslld	xmm3,12
4073	psrld	xmm4,20
4074	pxor	xmm4,xmm3
4075	paddd	xmm0,xmm4
4076	pxor	xmm12,xmm0
4077	pshufb	xmm12,XMMWORD[$L$rol8]
4078	paddd	xmm8,xmm12
4079	pxor	xmm4,xmm8
4080	movdqa	xmm3,xmm4
4081	pslld	xmm3,7
4082	psrld	xmm4,25
4083	pxor	xmm4,xmm3
4084DB	102,15,58,15,228,12
4085DB	102,69,15,58,15,192,8
4086DB	102,69,15,58,15,228,4
4087	paddd	xmm1,xmm5
4088	pxor	xmm13,xmm1
4089	pshufb	xmm13,XMMWORD[$L$rol16]
4090	paddd	xmm9,xmm13
4091	pxor	xmm5,xmm9
4092	movdqa	xmm3,xmm5
4093	pslld	xmm3,12
4094	psrld	xmm5,20
4095	pxor	xmm5,xmm3
4096	paddd	xmm1,xmm5
4097	pxor	xmm13,xmm1
4098	pshufb	xmm13,XMMWORD[$L$rol8]
4099	paddd	xmm9,xmm13
4100	pxor	xmm5,xmm9
4101	movdqa	xmm3,xmm5
4102	pslld	xmm3,7
4103	psrld	xmm5,25
4104	pxor	xmm5,xmm3
4105DB	102,15,58,15,237,12
4106DB	102,69,15,58,15,201,8
4107DB	102,69,15,58,15,237,4
4108	paddd	xmm2,xmm6
4109	pxor	xmm14,xmm2
4110	pshufb	xmm14,XMMWORD[$L$rol16]
4111	paddd	xmm10,xmm14
4112	pxor	xmm6,xmm10
4113	movdqa	xmm3,xmm6
4114	pslld	xmm3,12
4115	psrld	xmm6,20
4116	pxor	xmm6,xmm3
4117	paddd	xmm2,xmm6
4118	pxor	xmm14,xmm2
4119	pshufb	xmm14,XMMWORD[$L$rol8]
4120	paddd	xmm10,xmm14
4121	pxor	xmm6,xmm10
4122	movdqa	xmm3,xmm6
4123	pslld	xmm3,7
4124	psrld	xmm6,25
4125	pxor	xmm6,xmm3
4126DB	102,15,58,15,246,12
4127DB	102,69,15,58,15,210,8
4128DB	102,69,15,58,15,246,4
4129
4130	dec	r10
4131	jnz	NEAR $L$seal_sse_128_rounds
4132	paddd	xmm0,XMMWORD[$L$chacha20_consts]
4133	paddd	xmm1,XMMWORD[$L$chacha20_consts]
4134	paddd	xmm2,XMMWORD[$L$chacha20_consts]
4135	paddd	xmm4,xmm7
4136	paddd	xmm5,xmm7
4137	paddd	xmm6,xmm7
4138	paddd	xmm8,xmm11
4139	paddd	xmm9,xmm11
4140	paddd	xmm12,xmm15
4141	paddd	xmm15,XMMWORD[$L$sse_inc]
4142	paddd	xmm13,xmm15
4143
4144	pand	xmm2,XMMWORD[$L$clamp]
4145	movdqa	XMMWORD[(160+0)+rbp],xmm2
4146	movdqa	XMMWORD[(160+16)+rbp],xmm6
4147
4148	mov	r8,r8
4149	call	poly_hash_ad_internal
4150	jmp	NEAR $L$seal_sse_128_tail_xor
4151$L$SEH_end_chacha20_poly1305_seal:
4152
4153
4154
4155
4156ALIGN	64
4157chacha20_poly1305_open_avx2:
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170	vzeroupper
4171	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4172	vbroadcasti128	ymm4,XMMWORD[r9]
4173	vbroadcasti128	ymm8,XMMWORD[16+r9]
4174	vbroadcasti128	ymm12,XMMWORD[32+r9]
4175	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
4176	cmp	rbx,6*32
4177	jbe	NEAR $L$open_avx2_192
4178	cmp	rbx,10*32
4179	jbe	NEAR $L$open_avx2_320
4180
4181	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
4182	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
4183	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4184	mov	r10,10
4185$L$open_avx2_init_rounds:
4186	vpaddd	ymm0,ymm0,ymm4
4187	vpxor	ymm12,ymm12,ymm0
4188	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4189	vpaddd	ymm8,ymm8,ymm12
4190	vpxor	ymm4,ymm4,ymm8
4191	vpsrld	ymm3,ymm4,20
4192	vpslld	ymm4,ymm4,12
4193	vpxor	ymm4,ymm4,ymm3
4194	vpaddd	ymm0,ymm0,ymm4
4195	vpxor	ymm12,ymm12,ymm0
4196	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4197	vpaddd	ymm8,ymm8,ymm12
4198	vpxor	ymm4,ymm4,ymm8
4199	vpslld	ymm3,ymm4,7
4200	vpsrld	ymm4,ymm4,25
4201	vpxor	ymm4,ymm4,ymm3
4202	vpalignr	ymm12,ymm12,ymm12,12
4203	vpalignr	ymm8,ymm8,ymm8,8
4204	vpalignr	ymm4,ymm4,ymm4,4
4205	vpaddd	ymm0,ymm0,ymm4
4206	vpxor	ymm12,ymm12,ymm0
4207	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4208	vpaddd	ymm8,ymm8,ymm12
4209	vpxor	ymm4,ymm4,ymm8
4210	vpsrld	ymm3,ymm4,20
4211	vpslld	ymm4,ymm4,12
4212	vpxor	ymm4,ymm4,ymm3
4213	vpaddd	ymm0,ymm0,ymm4
4214	vpxor	ymm12,ymm12,ymm0
4215	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4216	vpaddd	ymm8,ymm8,ymm12
4217	vpxor	ymm4,ymm4,ymm8
4218	vpslld	ymm3,ymm4,7
4219	vpsrld	ymm4,ymm4,25
4220	vpxor	ymm4,ymm4,ymm3
4221	vpalignr	ymm12,ymm12,ymm12,4
4222	vpalignr	ymm8,ymm8,ymm8,8
4223	vpalignr	ymm4,ymm4,ymm4,12
4224
4225	dec	r10
4226	jne	NEAR $L$open_avx2_init_rounds
4227	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4228	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4229	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4230	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4231
4232	vperm2i128	ymm3,ymm4,ymm0,0x02
4233
4234	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
4235	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
4236
4237	vperm2i128	ymm0,ymm4,ymm0,0x13
4238	vperm2i128	ymm4,ymm12,ymm8,0x13
4239
4240	mov	r8,r8
4241	call	poly_hash_ad_internal
4242
4243	xor	rcx,rcx
4244$L$open_avx2_init_hash:
4245	add	r10,QWORD[((0+0))+rcx*1+rsi]
4246	adc	r11,QWORD[((8+0))+rcx*1+rsi]
4247	adc	r12,1
4248	mov	rax,QWORD[((0+160+0))+rbp]
4249	mov	r15,rax
4250	mul	r10
4251	mov	r13,rax
4252	mov	r14,rdx
4253	mov	rax,QWORD[((0+160+0))+rbp]
4254	mul	r11
4255	imul	r15,r12
4256	add	r14,rax
4257	adc	r15,rdx
4258	mov	rax,QWORD[((8+160+0))+rbp]
4259	mov	r9,rax
4260	mul	r10
4261	add	r14,rax
4262	adc	rdx,0
4263	mov	r10,rdx
4264	mov	rax,QWORD[((8+160+0))+rbp]
4265	mul	r11
4266	add	r15,rax
4267	adc	rdx,0
4268	imul	r9,r12
4269	add	r15,r10
4270	adc	r9,rdx
4271	mov	r10,r13
4272	mov	r11,r14
4273	mov	r12,r15
4274	and	r12,3
4275	mov	r13,r15
4276	and	r13,-4
4277	mov	r14,r9
4278	shrd	r15,r9,2
4279	shr	r9,2
4280	add	r15,r13
4281	adc	r9,r14
4282	add	r10,r15
4283	adc	r11,r9
4284	adc	r12,0
4285
4286	add	rcx,16
4287	cmp	rcx,2*32
4288	jne	NEAR $L$open_avx2_init_hash
4289
4290	vpxor	ymm0,ymm0,YMMWORD[rsi]
4291	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
4292
4293	vmovdqu	YMMWORD[rdi],ymm0
4294	vmovdqu	YMMWORD[32+rdi],ymm4
4295	lea	rsi,[64+rsi]
4296	lea	rdi,[64+rdi]
4297	sub	rbx,2*32
4298$L$open_avx2_main_loop:
4299
4300	cmp	rbx,16*32
4301	jb	NEAR $L$open_avx2_main_loop_done
4302	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4303	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4304	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4305	vmovdqa	ymm1,ymm0
4306	vmovdqa	ymm5,ymm4
4307	vmovdqa	ymm9,ymm8
4308	vmovdqa	ymm2,ymm0
4309	vmovdqa	ymm6,ymm4
4310	vmovdqa	ymm10,ymm8
4311	vmovdqa	ymm3,ymm0
4312	vmovdqa	ymm7,ymm4
4313	vmovdqa	ymm11,ymm8
4314	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4315	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
4316	vpaddd	ymm14,ymm12,ymm15
4317	vpaddd	ymm13,ymm12,ymm14
4318	vpaddd	ymm12,ymm12,ymm13
4319	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
4320	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
4321	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
4322	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4323
4324	xor	rcx,rcx
4325$L$open_avx2_main_loop_rounds:
4326	add	r10,QWORD[((0+0))+rcx*1+rsi]
4327	adc	r11,QWORD[((8+0))+rcx*1+rsi]
4328	adc	r12,1
4329	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4330	vmovdqa	ymm8,YMMWORD[$L$rol16]
4331	vpaddd	ymm3,ymm3,ymm7
4332	vpaddd	ymm2,ymm2,ymm6
4333	vpaddd	ymm1,ymm1,ymm5
4334	vpaddd	ymm0,ymm0,ymm4
4335	vpxor	ymm15,ymm15,ymm3
4336	vpxor	ymm14,ymm14,ymm2
4337	vpxor	ymm13,ymm13,ymm1
4338	vpxor	ymm12,ymm12,ymm0
4339	mov	rdx,QWORD[((0+160+0))+rbp]
4340	mov	r15,rdx
4341	mulx	r14,r13,r10
4342	mulx	rdx,rax,r11
4343	imul	r15,r12
4344	add	r14,rax
4345	adc	r15,rdx
4346	vpshufb	ymm15,ymm15,ymm8
4347	vpshufb	ymm14,ymm14,ymm8
4348	vpshufb	ymm13,ymm13,ymm8
4349	vpshufb	ymm12,ymm12,ymm8
4350	vpaddd	ymm11,ymm11,ymm15
4351	vpaddd	ymm10,ymm10,ymm14
4352	vpaddd	ymm9,ymm9,ymm13
4353	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4354	vpxor	ymm7,ymm7,ymm11
4355	mov	rdx,QWORD[((8+160+0))+rbp]
4356	mulx	rax,r10,r10
4357	add	r14,r10
4358	mulx	r9,r11,r11
4359	adc	r15,r11
4360	adc	r9,0
4361	imul	rdx,r12
4362	vpxor	ymm6,ymm6,ymm10
4363	vpxor	ymm5,ymm5,ymm9
4364	vpxor	ymm4,ymm4,ymm8
4365	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4366	vpsrld	ymm8,ymm7,20
4367	vpslld	ymm7,ymm7,32-20
4368	vpxor	ymm7,ymm7,ymm8
4369	vpsrld	ymm8,ymm6,20
4370	vpslld	ymm6,ymm6,32-20
4371	vpxor	ymm6,ymm6,ymm8
4372	vpsrld	ymm8,ymm5,20
4373	vpslld	ymm5,ymm5,32-20
4374	add	r15,rax
4375	adc	r9,rdx
4376	vpxor	ymm5,ymm5,ymm8
4377	vpsrld	ymm8,ymm4,20
4378	vpslld	ymm4,ymm4,32-20
4379	vpxor	ymm4,ymm4,ymm8
4380	vmovdqa	ymm8,YMMWORD[$L$rol8]
4381	vpaddd	ymm3,ymm3,ymm7
4382	vpaddd	ymm2,ymm2,ymm6
4383	vpaddd	ymm1,ymm1,ymm5
4384	vpaddd	ymm0,ymm0,ymm4
4385	vpxor	ymm15,ymm15,ymm3
4386	mov	r10,r13
4387	mov	r11,r14
4388	mov	r12,r15
4389	and	r12,3
4390	mov	r13,r15
4391	and	r13,-4
4392	mov	r14,r9
4393	shrd	r15,r9,2
4394	shr	r9,2
4395	add	r15,r13
4396	adc	r9,r14
4397	add	r10,r15
4398	adc	r11,r9
4399	adc	r12,0
4400	vpxor	ymm14,ymm14,ymm2
4401	vpxor	ymm13,ymm13,ymm1
4402	vpxor	ymm12,ymm12,ymm0
4403	vpshufb	ymm15,ymm15,ymm8
4404	vpshufb	ymm14,ymm14,ymm8
4405	vpshufb	ymm13,ymm13,ymm8
4406	vpshufb	ymm12,ymm12,ymm8
4407	vpaddd	ymm11,ymm11,ymm15
4408	vpaddd	ymm10,ymm10,ymm14
4409	add	r10,QWORD[((0+16))+rcx*1+rsi]
4410	adc	r11,QWORD[((8+16))+rcx*1+rsi]
4411	adc	r12,1
4412	vpaddd	ymm9,ymm9,ymm13
4413	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4414	vpxor	ymm7,ymm7,ymm11
4415	vpxor	ymm6,ymm6,ymm10
4416	vpxor	ymm5,ymm5,ymm9
4417	vpxor	ymm4,ymm4,ymm8
4418	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4419	vpsrld	ymm8,ymm7,25
4420	mov	rdx,QWORD[((0+160+0))+rbp]
4421	mov	r15,rdx
4422	mulx	r14,r13,r10
4423	mulx	rdx,rax,r11
4424	imul	r15,r12
4425	add	r14,rax
4426	adc	r15,rdx
4427	vpslld	ymm7,ymm7,32-25
4428	vpxor	ymm7,ymm7,ymm8
4429	vpsrld	ymm8,ymm6,25
4430	vpslld	ymm6,ymm6,32-25
4431	vpxor	ymm6,ymm6,ymm8
4432	vpsrld	ymm8,ymm5,25
4433	vpslld	ymm5,ymm5,32-25
4434	vpxor	ymm5,ymm5,ymm8
4435	vpsrld	ymm8,ymm4,25
4436	vpslld	ymm4,ymm4,32-25
4437	vpxor	ymm4,ymm4,ymm8
4438	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
4439	vpalignr	ymm7,ymm7,ymm7,4
4440	vpalignr	ymm11,ymm11,ymm11,8
4441	vpalignr	ymm15,ymm15,ymm15,12
4442	vpalignr	ymm6,ymm6,ymm6,4
4443	vpalignr	ymm10,ymm10,ymm10,8
4444	vpalignr	ymm14,ymm14,ymm14,12
4445	mov	rdx,QWORD[((8+160+0))+rbp]
4446	mulx	rax,r10,r10
4447	add	r14,r10
4448	mulx	r9,r11,r11
4449	adc	r15,r11
4450	adc	r9,0
4451	imul	rdx,r12
4452	vpalignr	ymm5,ymm5,ymm5,4
4453	vpalignr	ymm9,ymm9,ymm9,8
4454	vpalignr	ymm13,ymm13,ymm13,12
4455	vpalignr	ymm4,ymm4,ymm4,4
4456	vpalignr	ymm8,ymm8,ymm8,8
4457	vpalignr	ymm12,ymm12,ymm12,12
4458	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4459	vmovdqa	ymm8,YMMWORD[$L$rol16]
4460	vpaddd	ymm3,ymm3,ymm7
4461	vpaddd	ymm2,ymm2,ymm6
4462	vpaddd	ymm1,ymm1,ymm5
4463	vpaddd	ymm0,ymm0,ymm4
4464	vpxor	ymm15,ymm15,ymm3
4465	vpxor	ymm14,ymm14,ymm2
4466	vpxor	ymm13,ymm13,ymm1
4467	vpxor	ymm12,ymm12,ymm0
4468	vpshufb	ymm15,ymm15,ymm8
4469	vpshufb	ymm14,ymm14,ymm8
4470	add	r15,rax
4471	adc	r9,rdx
4472	vpshufb	ymm13,ymm13,ymm8
4473	vpshufb	ymm12,ymm12,ymm8
4474	vpaddd	ymm11,ymm11,ymm15
4475	vpaddd	ymm10,ymm10,ymm14
4476	vpaddd	ymm9,ymm9,ymm13
4477	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4478	vpxor	ymm7,ymm7,ymm11
4479	vpxor	ymm6,ymm6,ymm10
4480	vpxor	ymm5,ymm5,ymm9
4481	mov	r10,r13
4482	mov	r11,r14
4483	mov	r12,r15
4484	and	r12,3
4485	mov	r13,r15
4486	and	r13,-4
4487	mov	r14,r9
4488	shrd	r15,r9,2
4489	shr	r9,2
4490	add	r15,r13
4491	adc	r9,r14
4492	add	r10,r15
4493	adc	r11,r9
4494	adc	r12,0
4495	vpxor	ymm4,ymm4,ymm8
4496	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4497	vpsrld	ymm8,ymm7,20
4498	vpslld	ymm7,ymm7,32-20
4499	vpxor	ymm7,ymm7,ymm8
4500	vpsrld	ymm8,ymm6,20
4501	vpslld	ymm6,ymm6,32-20
4502	vpxor	ymm6,ymm6,ymm8
4503	add	r10,QWORD[((0+32))+rcx*1+rsi]
4504	adc	r11,QWORD[((8+32))+rcx*1+rsi]
4505	adc	r12,1
4506
4507	lea	rcx,[48+rcx]
4508	vpsrld	ymm8,ymm5,20
4509	vpslld	ymm5,ymm5,32-20
4510	vpxor	ymm5,ymm5,ymm8
4511	vpsrld	ymm8,ymm4,20
4512	vpslld	ymm4,ymm4,32-20
4513	vpxor	ymm4,ymm4,ymm8
4514	vmovdqa	ymm8,YMMWORD[$L$rol8]
4515	vpaddd	ymm3,ymm3,ymm7
4516	vpaddd	ymm2,ymm2,ymm6
4517	vpaddd	ymm1,ymm1,ymm5
4518	vpaddd	ymm0,ymm0,ymm4
4519	vpxor	ymm15,ymm15,ymm3
4520	vpxor	ymm14,ymm14,ymm2
4521	vpxor	ymm13,ymm13,ymm1
4522	vpxor	ymm12,ymm12,ymm0
4523	vpshufb	ymm15,ymm15,ymm8
4524	vpshufb	ymm14,ymm14,ymm8
4525	vpshufb	ymm13,ymm13,ymm8
4526	mov	rdx,QWORD[((0+160+0))+rbp]
4527	mov	r15,rdx
4528	mulx	r14,r13,r10
4529	mulx	rdx,rax,r11
4530	imul	r15,r12
4531	add	r14,rax
4532	adc	r15,rdx
4533	vpshufb	ymm12,ymm12,ymm8
4534	vpaddd	ymm11,ymm11,ymm15
4535	vpaddd	ymm10,ymm10,ymm14
4536	vpaddd	ymm9,ymm9,ymm13
4537	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4538	vpxor	ymm7,ymm7,ymm11
4539	vpxor	ymm6,ymm6,ymm10
4540	vpxor	ymm5,ymm5,ymm9
4541	mov	rdx,QWORD[((8+160+0))+rbp]
4542	mulx	rax,r10,r10
4543	add	r14,r10
4544	mulx	r9,r11,r11
4545	adc	r15,r11
4546	adc	r9,0
4547	imul	rdx,r12
4548	vpxor	ymm4,ymm4,ymm8
4549	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4550	vpsrld	ymm8,ymm7,25
4551	vpslld	ymm7,ymm7,32-25
4552	vpxor	ymm7,ymm7,ymm8
4553	vpsrld	ymm8,ymm6,25
4554	vpslld	ymm6,ymm6,32-25
4555	vpxor	ymm6,ymm6,ymm8
4556	add	r15,rax
4557	adc	r9,rdx
4558	vpsrld	ymm8,ymm5,25
4559	vpslld	ymm5,ymm5,32-25
4560	vpxor	ymm5,ymm5,ymm8
4561	vpsrld	ymm8,ymm4,25
4562	vpslld	ymm4,ymm4,32-25
4563	vpxor	ymm4,ymm4,ymm8
4564	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
4565	vpalignr	ymm7,ymm7,ymm7,12
4566	vpalignr	ymm11,ymm11,ymm11,8
4567	vpalignr	ymm15,ymm15,ymm15,4
4568	vpalignr	ymm6,ymm6,ymm6,12
4569	vpalignr	ymm10,ymm10,ymm10,8
4570	vpalignr	ymm14,ymm14,ymm14,4
4571	vpalignr	ymm5,ymm5,ymm5,12
4572	vpalignr	ymm9,ymm9,ymm9,8
4573	vpalignr	ymm13,ymm13,ymm13,4
4574	vpalignr	ymm4,ymm4,ymm4,12
4575	vpalignr	ymm8,ymm8,ymm8,8
4576	mov	r10,r13
4577	mov	r11,r14
4578	mov	r12,r15
4579	and	r12,3
4580	mov	r13,r15
4581	and	r13,-4
4582	mov	r14,r9
4583	shrd	r15,r9,2
4584	shr	r9,2
4585	add	r15,r13
4586	adc	r9,r14
4587	add	r10,r15
4588	adc	r11,r9
4589	adc	r12,0
4590	vpalignr	ymm12,ymm12,ymm12,4
4591
4592	cmp	rcx,10*6*8
4593	jne	NEAR $L$open_avx2_main_loop_rounds
4594	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
4595	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
4596	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
4597	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
4598	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
4599	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
4600	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
4601	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
4602	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
4603	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
4604	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
4605	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
4606	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4607	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4608	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4609	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4610
4611	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
4612	add	r10,QWORD[((0+480))+rsi]
4613	adc	r11,QWORD[((8+480))+rsi]
4614	adc	r12,1
4615	vperm2i128	ymm0,ymm7,ymm3,0x02
4616	vperm2i128	ymm7,ymm7,ymm3,0x13
4617	vperm2i128	ymm3,ymm15,ymm11,0x02
4618	vperm2i128	ymm11,ymm15,ymm11,0x13
4619	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
4620	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
4621	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
4622	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
4623	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
4624	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
4625	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
4626	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
4627
4628	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
4629	mov	rax,QWORD[((0+160+0))+rbp]
4630	mov	r15,rax
4631	mul	r10
4632	mov	r13,rax
4633	mov	r14,rdx
4634	mov	rax,QWORD[((0+160+0))+rbp]
4635	mul	r11
4636	imul	r15,r12
4637	add	r14,rax
4638	adc	r15,rdx
4639	mov	rax,QWORD[((8+160+0))+rbp]
4640	mov	r9,rax
4641	mul	r10
4642	add	r14,rax
4643	adc	rdx,0
4644	mov	r10,rdx
4645	mov	rax,QWORD[((8+160+0))+rbp]
4646	mul	r11
4647	add	r15,rax
4648	adc	rdx,0
4649	imul	r9,r12
4650	add	r15,r10
4651	adc	r9,rdx
4652	mov	r10,r13
4653	mov	r11,r14
4654	mov	r12,r15
4655	and	r12,3
4656	mov	r13,r15
4657	and	r13,-4
4658	mov	r14,r9
4659	shrd	r15,r9,2
4660	shr	r9,2
4661	add	r15,r13
4662	adc	r9,r14
4663	add	r10,r15
4664	adc	r11,r9
4665	adc	r12,0
4666	vperm2i128	ymm3,ymm6,ymm2,0x02
4667	vperm2i128	ymm6,ymm6,ymm2,0x13
4668	vperm2i128	ymm2,ymm14,ymm10,0x02
4669	vperm2i128	ymm10,ymm14,ymm10,0x13
4670	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
4671	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
4672	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
4673	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
4674	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
4675	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
4676	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
4677	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
4678	add	r10,QWORD[((0+480+16))+rsi]
4679	adc	r11,QWORD[((8+480+16))+rsi]
4680	adc	r12,1
4681	vperm2i128	ymm3,ymm5,ymm1,0x02
4682	vperm2i128	ymm5,ymm5,ymm1,0x13
4683	vperm2i128	ymm1,ymm13,ymm9,0x02
4684	vperm2i128	ymm9,ymm13,ymm9,0x13
4685	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
4686	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
4687	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
4688	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
4689	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
4690	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
4691	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
4692	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
4693	mov	rax,QWORD[((0+160+0))+rbp]
4694	mov	r15,rax
4695	mul	r10
4696	mov	r13,rax
4697	mov	r14,rdx
4698	mov	rax,QWORD[((0+160+0))+rbp]
4699	mul	r11
4700	imul	r15,r12
4701	add	r14,rax
4702	adc	r15,rdx
4703	mov	rax,QWORD[((8+160+0))+rbp]
4704	mov	r9,rax
4705	mul	r10
4706	add	r14,rax
4707	adc	rdx,0
4708	mov	r10,rdx
4709	mov	rax,QWORD[((8+160+0))+rbp]
4710	mul	r11
4711	add	r15,rax
4712	adc	rdx,0
4713	imul	r9,r12
4714	add	r15,r10
4715	adc	r9,rdx
4716	mov	r10,r13
4717	mov	r11,r14
4718	mov	r12,r15
4719	and	r12,3
4720	mov	r13,r15
4721	and	r13,-4
4722	mov	r14,r9
4723	shrd	r15,r9,2
4724	shr	r9,2
4725	add	r15,r13
4726	adc	r9,r14
4727	add	r10,r15
4728	adc	r11,r9
4729	adc	r12,0
4730	vperm2i128	ymm3,ymm4,ymm0,0x02
4731	vperm2i128	ymm4,ymm4,ymm0,0x13
4732	vperm2i128	ymm0,ymm12,ymm8,0x02
4733	vperm2i128	ymm8,ymm12,ymm8,0x13
4734	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
4735	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
4736	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
4737	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
4738	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
4739	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
4740	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
4741	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
4742
4743	lea	rsi,[512+rsi]
4744	lea	rdi,[512+rdi]
4745	sub	rbx,16*32
4746	jmp	NEAR $L$open_avx2_main_loop
4747$L$open_avx2_main_loop_done:
4748	test	rbx,rbx
4749	vzeroupper
4750	je	NEAR $L$open_sse_finalize
4751
4752	cmp	rbx,12*32
4753	ja	NEAR $L$open_avx2_tail_512
4754	cmp	rbx,8*32
4755	ja	NEAR $L$open_avx2_tail_384
4756	cmp	rbx,4*32
4757	ja	NEAR $L$open_avx2_tail_256
4758	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4759	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4760	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4761	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4762	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4763	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4764
4765	xor	r8,r8
4766	mov	rcx,rbx
4767	and	rcx,-16
4768	test	rcx,rcx
4769	je	NEAR $L$open_avx2_tail_128_rounds
4770$L$open_avx2_tail_128_rounds_and_x1hash:
4771	add	r10,QWORD[((0+0))+r8*1+rsi]
4772	adc	r11,QWORD[((8+0))+r8*1+rsi]
4773	adc	r12,1
4774	mov	rax,QWORD[((0+160+0))+rbp]
4775	mov	r15,rax
4776	mul	r10
4777	mov	r13,rax
4778	mov	r14,rdx
4779	mov	rax,QWORD[((0+160+0))+rbp]
4780	mul	r11
4781	imul	r15,r12
4782	add	r14,rax
4783	adc	r15,rdx
4784	mov	rax,QWORD[((8+160+0))+rbp]
4785	mov	r9,rax
4786	mul	r10
4787	add	r14,rax
4788	adc	rdx,0
4789	mov	r10,rdx
4790	mov	rax,QWORD[((8+160+0))+rbp]
4791	mul	r11
4792	add	r15,rax
4793	adc	rdx,0
4794	imul	r9,r12
4795	add	r15,r10
4796	adc	r9,rdx
4797	mov	r10,r13
4798	mov	r11,r14
4799	mov	r12,r15
4800	and	r12,3
4801	mov	r13,r15
4802	and	r13,-4
4803	mov	r14,r9
4804	shrd	r15,r9,2
4805	shr	r9,2
4806	add	r15,r13
4807	adc	r9,r14
4808	add	r10,r15
4809	adc	r11,r9
4810	adc	r12,0
4811
4812$L$open_avx2_tail_128_rounds:
4813	add	r8,16
4814	vpaddd	ymm0,ymm0,ymm4
4815	vpxor	ymm12,ymm12,ymm0
4816	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4817	vpaddd	ymm8,ymm8,ymm12
4818	vpxor	ymm4,ymm4,ymm8
4819	vpsrld	ymm3,ymm4,20
4820	vpslld	ymm4,ymm4,12
4821	vpxor	ymm4,ymm4,ymm3
4822	vpaddd	ymm0,ymm0,ymm4
4823	vpxor	ymm12,ymm12,ymm0
4824	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4825	vpaddd	ymm8,ymm8,ymm12
4826	vpxor	ymm4,ymm4,ymm8
4827	vpslld	ymm3,ymm4,7
4828	vpsrld	ymm4,ymm4,25
4829	vpxor	ymm4,ymm4,ymm3
4830	vpalignr	ymm12,ymm12,ymm12,12
4831	vpalignr	ymm8,ymm8,ymm8,8
4832	vpalignr	ymm4,ymm4,ymm4,4
4833	vpaddd	ymm0,ymm0,ymm4
4834	vpxor	ymm12,ymm12,ymm0
4835	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4836	vpaddd	ymm8,ymm8,ymm12
4837	vpxor	ymm4,ymm4,ymm8
4838	vpsrld	ymm3,ymm4,20
4839	vpslld	ymm4,ymm4,12
4840	vpxor	ymm4,ymm4,ymm3
4841	vpaddd	ymm0,ymm0,ymm4
4842	vpxor	ymm12,ymm12,ymm0
4843	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4844	vpaddd	ymm8,ymm8,ymm12
4845	vpxor	ymm4,ymm4,ymm8
4846	vpslld	ymm3,ymm4,7
4847	vpsrld	ymm4,ymm4,25
4848	vpxor	ymm4,ymm4,ymm3
4849	vpalignr	ymm12,ymm12,ymm12,4
4850	vpalignr	ymm8,ymm8,ymm8,8
4851	vpalignr	ymm4,ymm4,ymm4,12
4852
4853	cmp	r8,rcx
4854	jb	NEAR $L$open_avx2_tail_128_rounds_and_x1hash
4855	cmp	r8,160
4856	jne	NEAR $L$open_avx2_tail_128_rounds
4857	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4858	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4859	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4860	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4861	vperm2i128	ymm3,ymm4,ymm0,0x13
4862	vperm2i128	ymm0,ymm4,ymm0,0x02
4863	vperm2i128	ymm4,ymm12,ymm8,0x02
4864	vperm2i128	ymm12,ymm12,ymm8,0x13
4865	vmovdqa	ymm8,ymm3
4866
4867	jmp	NEAR $L$open_avx2_tail_128_xor
4868
4869$L$open_avx2_tail_256:
4870	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4871	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4872	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4873	vmovdqa	ymm1,ymm0
4874	vmovdqa	ymm5,ymm4
4875	vmovdqa	ymm9,ymm8
4876	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4877	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
4878	vpaddd	ymm12,ymm12,ymm13
4879	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4880	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
4881
4882	mov	QWORD[((160+128))+rbp],rbx
4883	mov	rcx,rbx
4884	sub	rcx,4*32
4885	shr	rcx,4
4886	mov	r8,10
4887	cmp	rcx,10
4888	cmovg	rcx,r8
4889	mov	rbx,rsi
4890	xor	r8,r8
4891$L$open_avx2_tail_256_rounds_and_x1hash:
4892	add	r10,QWORD[((0+0))+rbx]
4893	adc	r11,QWORD[((8+0))+rbx]
4894	adc	r12,1
4895	mov	rdx,QWORD[((0+160+0))+rbp]
4896	mov	r15,rdx
4897	mulx	r14,r13,r10
4898	mulx	rdx,rax,r11
4899	imul	r15,r12
4900	add	r14,rax
4901	adc	r15,rdx
4902	mov	rdx,QWORD[((8+160+0))+rbp]
4903	mulx	rax,r10,r10
4904	add	r14,r10
4905	mulx	r9,r11,r11
4906	adc	r15,r11
4907	adc	r9,0
4908	imul	rdx,r12
4909	add	r15,rax
4910	adc	r9,rdx
4911	mov	r10,r13
4912	mov	r11,r14
4913	mov	r12,r15
4914	and	r12,3
4915	mov	r13,r15
4916	and	r13,-4
4917	mov	r14,r9
4918	shrd	r15,r9,2
4919	shr	r9,2
4920	add	r15,r13
4921	adc	r9,r14
4922	add	r10,r15
4923	adc	r11,r9
4924	adc	r12,0
4925
4926	lea	rbx,[16+rbx]
4927$L$open_avx2_tail_256_rounds:
4928	vpaddd	ymm0,ymm0,ymm4
4929	vpxor	ymm12,ymm12,ymm0
4930	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4931	vpaddd	ymm8,ymm8,ymm12
4932	vpxor	ymm4,ymm4,ymm8
4933	vpsrld	ymm3,ymm4,20
4934	vpslld	ymm4,ymm4,12
4935	vpxor	ymm4,ymm4,ymm3
4936	vpaddd	ymm0,ymm0,ymm4
4937	vpxor	ymm12,ymm12,ymm0
4938	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4939	vpaddd	ymm8,ymm8,ymm12
4940	vpxor	ymm4,ymm4,ymm8
4941	vpslld	ymm3,ymm4,7
4942	vpsrld	ymm4,ymm4,25
4943	vpxor	ymm4,ymm4,ymm3
4944	vpalignr	ymm12,ymm12,ymm12,12
4945	vpalignr	ymm8,ymm8,ymm8,8
4946	vpalignr	ymm4,ymm4,ymm4,4
4947	vpaddd	ymm1,ymm1,ymm5
4948	vpxor	ymm13,ymm13,ymm1
4949	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
4950	vpaddd	ymm9,ymm9,ymm13
4951	vpxor	ymm5,ymm5,ymm9
4952	vpsrld	ymm3,ymm5,20
4953	vpslld	ymm5,ymm5,12
4954	vpxor	ymm5,ymm5,ymm3
4955	vpaddd	ymm1,ymm1,ymm5
4956	vpxor	ymm13,ymm13,ymm1
4957	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
4958	vpaddd	ymm9,ymm9,ymm13
4959	vpxor	ymm5,ymm5,ymm9
4960	vpslld	ymm3,ymm5,7
4961	vpsrld	ymm5,ymm5,25
4962	vpxor	ymm5,ymm5,ymm3
4963	vpalignr	ymm13,ymm13,ymm13,12
4964	vpalignr	ymm9,ymm9,ymm9,8
4965	vpalignr	ymm5,ymm5,ymm5,4
4966
4967	inc	r8
4968	vpaddd	ymm0,ymm0,ymm4
4969	vpxor	ymm12,ymm12,ymm0
4970	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4971	vpaddd	ymm8,ymm8,ymm12
4972	vpxor	ymm4,ymm4,ymm8
4973	vpsrld	ymm3,ymm4,20
4974	vpslld	ymm4,ymm4,12
4975	vpxor	ymm4,ymm4,ymm3
4976	vpaddd	ymm0,ymm0,ymm4
4977	vpxor	ymm12,ymm12,ymm0
4978	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4979	vpaddd	ymm8,ymm8,ymm12
4980	vpxor	ymm4,ymm4,ymm8
4981	vpslld	ymm3,ymm4,7
4982	vpsrld	ymm4,ymm4,25
4983	vpxor	ymm4,ymm4,ymm3
4984	vpalignr	ymm12,ymm12,ymm12,4
4985	vpalignr	ymm8,ymm8,ymm8,8
4986	vpalignr	ymm4,ymm4,ymm4,12
4987	vpaddd	ymm1,ymm1,ymm5
4988	vpxor	ymm13,ymm13,ymm1
4989	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
4990	vpaddd	ymm9,ymm9,ymm13
4991	vpxor	ymm5,ymm5,ymm9
4992	vpsrld	ymm3,ymm5,20
4993	vpslld	ymm5,ymm5,12
4994	vpxor	ymm5,ymm5,ymm3
4995	vpaddd	ymm1,ymm1,ymm5
4996	vpxor	ymm13,ymm13,ymm1
4997	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
4998	vpaddd	ymm9,ymm9,ymm13
4999	vpxor	ymm5,ymm5,ymm9
5000	vpslld	ymm3,ymm5,7
5001	vpsrld	ymm5,ymm5,25
5002	vpxor	ymm5,ymm5,ymm3
5003	vpalignr	ymm13,ymm13,ymm13,4
5004	vpalignr	ymm9,ymm9,ymm9,8
5005	vpalignr	ymm5,ymm5,ymm5,12
5006	vpaddd	ymm2,ymm2,ymm6
5007	vpxor	ymm14,ymm14,ymm2
5008	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5009	vpaddd	ymm10,ymm10,ymm14
5010	vpxor	ymm6,ymm6,ymm10
5011	vpsrld	ymm3,ymm6,20
5012	vpslld	ymm6,ymm6,12
5013	vpxor	ymm6,ymm6,ymm3
5014	vpaddd	ymm2,ymm2,ymm6
5015	vpxor	ymm14,ymm14,ymm2
5016	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5017	vpaddd	ymm10,ymm10,ymm14
5018	vpxor	ymm6,ymm6,ymm10
5019	vpslld	ymm3,ymm6,7
5020	vpsrld	ymm6,ymm6,25
5021	vpxor	ymm6,ymm6,ymm3
5022	vpalignr	ymm14,ymm14,ymm14,4
5023	vpalignr	ymm10,ymm10,ymm10,8
5024	vpalignr	ymm6,ymm6,ymm6,12
5025
5026	cmp	r8,rcx
5027	jb	NEAR $L$open_avx2_tail_256_rounds_and_x1hash
5028	cmp	r8,10
5029	jne	NEAR $L$open_avx2_tail_256_rounds
5030	mov	r8,rbx
5031	sub	rbx,rsi
5032	mov	rcx,rbx
5033	mov	rbx,QWORD[((160+128))+rbp]
5034$L$open_avx2_tail_256_hash:
5035	add	rcx,16
5036	cmp	rcx,rbx
5037	jg	NEAR $L$open_avx2_tail_256_done
5038	add	r10,QWORD[((0+0))+r8]
5039	adc	r11,QWORD[((8+0))+r8]
5040	adc	r12,1
5041	mov	rdx,QWORD[((0+160+0))+rbp]
5042	mov	r15,rdx
5043	mulx	r14,r13,r10
5044	mulx	rdx,rax,r11
5045	imul	r15,r12
5046	add	r14,rax
5047	adc	r15,rdx
5048	mov	rdx,QWORD[((8+160+0))+rbp]
5049	mulx	rax,r10,r10
5050	add	r14,r10
5051	mulx	r9,r11,r11
5052	adc	r15,r11
5053	adc	r9,0
5054	imul	rdx,r12
5055	add	r15,rax
5056	adc	r9,rdx
5057	mov	r10,r13
5058	mov	r11,r14
5059	mov	r12,r15
5060	and	r12,3
5061	mov	r13,r15
5062	and	r13,-4
5063	mov	r14,r9
5064	shrd	r15,r9,2
5065	shr	r9,2
5066	add	r15,r13
5067	adc	r9,r14
5068	add	r10,r15
5069	adc	r11,r9
5070	adc	r12,0
5071
5072	lea	r8,[16+r8]
5073	jmp	NEAR $L$open_avx2_tail_256_hash
5074$L$open_avx2_tail_256_done:
5075	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5076	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5077	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5078	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5079	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5080	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5081	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5082	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5083	vperm2i128	ymm3,ymm5,ymm1,0x02
5084	vperm2i128	ymm5,ymm5,ymm1,0x13
5085	vperm2i128	ymm1,ymm13,ymm9,0x02
5086	vperm2i128	ymm9,ymm13,ymm9,0x13
5087	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
5088	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
5089	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
5090	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
5091	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
5092	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
5093	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
5094	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
5095	vperm2i128	ymm3,ymm4,ymm0,0x13
5096	vperm2i128	ymm0,ymm4,ymm0,0x02
5097	vperm2i128	ymm4,ymm12,ymm8,0x02
5098	vperm2i128	ymm12,ymm12,ymm8,0x13
5099	vmovdqa	ymm8,ymm3
5100
5101	lea	rsi,[128+rsi]
5102	lea	rdi,[128+rdi]
5103	sub	rbx,4*32
5104	jmp	NEAR $L$open_avx2_tail_128_xor
5105
5106$L$open_avx2_tail_384:
5107	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
5108	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
5109	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
5110	vmovdqa	ymm1,ymm0
5111	vmovdqa	ymm5,ymm4
5112	vmovdqa	ymm9,ymm8
5113	vmovdqa	ymm2,ymm0
5114	vmovdqa	ymm6,ymm4
5115	vmovdqa	ymm10,ymm8
5116	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
5117	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
5118	vpaddd	ymm13,ymm12,ymm14
5119	vpaddd	ymm12,ymm12,ymm13
5120	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
5121	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
5122	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
5123
5124	mov	QWORD[((160+128))+rbp],rbx
5125	mov	rcx,rbx
5126	sub	rcx,8*32
5127	shr	rcx,4
5128	add	rcx,6
5129	mov	r8,10
5130	cmp	rcx,10
5131	cmovg	rcx,r8
5132	mov	rbx,rsi
5133	xor	r8,r8
5134$L$open_avx2_tail_384_rounds_and_x2hash:
5135	add	r10,QWORD[((0+0))+rbx]
5136	adc	r11,QWORD[((8+0))+rbx]
5137	adc	r12,1
5138	mov	rdx,QWORD[((0+160+0))+rbp]
5139	mov	r15,rdx
5140	mulx	r14,r13,r10
5141	mulx	rdx,rax,r11
5142	imul	r15,r12
5143	add	r14,rax
5144	adc	r15,rdx
5145	mov	rdx,QWORD[((8+160+0))+rbp]
5146	mulx	rax,r10,r10
5147	add	r14,r10
5148	mulx	r9,r11,r11
5149	adc	r15,r11
5150	adc	r9,0
5151	imul	rdx,r12
5152	add	r15,rax
5153	adc	r9,rdx
5154	mov	r10,r13
5155	mov	r11,r14
5156	mov	r12,r15
5157	and	r12,3
5158	mov	r13,r15
5159	and	r13,-4
5160	mov	r14,r9
5161	shrd	r15,r9,2
5162	shr	r9,2
5163	add	r15,r13
5164	adc	r9,r14
5165	add	r10,r15
5166	adc	r11,r9
5167	adc	r12,0
5168
5169	lea	rbx,[16+rbx]
5170$L$open_avx2_tail_384_rounds_and_x1hash:
5171	vpaddd	ymm2,ymm2,ymm6
5172	vpxor	ymm14,ymm14,ymm2
5173	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5174	vpaddd	ymm10,ymm10,ymm14
5175	vpxor	ymm6,ymm6,ymm10
5176	vpsrld	ymm3,ymm6,20
5177	vpslld	ymm6,ymm6,12
5178	vpxor	ymm6,ymm6,ymm3
5179	vpaddd	ymm2,ymm2,ymm6
5180	vpxor	ymm14,ymm14,ymm2
5181	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5182	vpaddd	ymm10,ymm10,ymm14
5183	vpxor	ymm6,ymm6,ymm10
5184	vpslld	ymm3,ymm6,7
5185	vpsrld	ymm6,ymm6,25
5186	vpxor	ymm6,ymm6,ymm3
5187	vpalignr	ymm14,ymm14,ymm14,12
5188	vpalignr	ymm10,ymm10,ymm10,8
5189	vpalignr	ymm6,ymm6,ymm6,4
5190	vpaddd	ymm1,ymm1,ymm5
5191	vpxor	ymm13,ymm13,ymm1
5192	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5193	vpaddd	ymm9,ymm9,ymm13
5194	vpxor	ymm5,ymm5,ymm9
5195	vpsrld	ymm3,ymm5,20
5196	vpslld	ymm5,ymm5,12
5197	vpxor	ymm5,ymm5,ymm3
5198	vpaddd	ymm1,ymm1,ymm5
5199	vpxor	ymm13,ymm13,ymm1
5200	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5201	vpaddd	ymm9,ymm9,ymm13
5202	vpxor	ymm5,ymm5,ymm9
5203	vpslld	ymm3,ymm5,7
5204	vpsrld	ymm5,ymm5,25
5205	vpxor	ymm5,ymm5,ymm3
5206	vpalignr	ymm13,ymm13,ymm13,12
5207	vpalignr	ymm9,ymm9,ymm9,8
5208	vpalignr	ymm5,ymm5,ymm5,4
5209	vpaddd	ymm0,ymm0,ymm4
5210	vpxor	ymm12,ymm12,ymm0
5211	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5212	vpaddd	ymm8,ymm8,ymm12
5213	vpxor	ymm4,ymm4,ymm8
5214	vpsrld	ymm3,ymm4,20
5215	vpslld	ymm4,ymm4,12
5216	vpxor	ymm4,ymm4,ymm3
5217	vpaddd	ymm0,ymm0,ymm4
5218	vpxor	ymm12,ymm12,ymm0
5219	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5220	vpaddd	ymm8,ymm8,ymm12
5221	vpxor	ymm4,ymm4,ymm8
5222	vpslld	ymm3,ymm4,7
5223	vpsrld	ymm4,ymm4,25
5224	vpxor	ymm4,ymm4,ymm3
5225	vpalignr	ymm12,ymm12,ymm12,12
5226	vpalignr	ymm8,ymm8,ymm8,8
5227	vpalignr	ymm4,ymm4,ymm4,4
5228	add	r10,QWORD[((0+0))+rbx]
5229	adc	r11,QWORD[((8+0))+rbx]
5230	adc	r12,1
5231	mov	rax,QWORD[((0+160+0))+rbp]
5232	mov	r15,rax
5233	mul	r10
5234	mov	r13,rax
5235	mov	r14,rdx
5236	mov	rax,QWORD[((0+160+0))+rbp]
5237	mul	r11
5238	imul	r15,r12
5239	add	r14,rax
5240	adc	r15,rdx
5241	mov	rax,QWORD[((8+160+0))+rbp]
5242	mov	r9,rax
5243	mul	r10
5244	add	r14,rax
5245	adc	rdx,0
5246	mov	r10,rdx
5247	mov	rax,QWORD[((8+160+0))+rbp]
5248	mul	r11
5249	add	r15,rax
5250	adc	rdx,0
5251	imul	r9,r12
5252	add	r15,r10
5253	adc	r9,rdx
5254	mov	r10,r13
5255	mov	r11,r14
5256	mov	r12,r15
5257	and	r12,3
5258	mov	r13,r15
5259	and	r13,-4
5260	mov	r14,r9
5261	shrd	r15,r9,2
5262	shr	r9,2
5263	add	r15,r13
5264	adc	r9,r14
5265	add	r10,r15
5266	adc	r11,r9
5267	adc	r12,0
5268
5269	lea	rbx,[16+rbx]
5270	inc	r8
5271	vpaddd	ymm2,ymm2,ymm6
5272	vpxor	ymm14,ymm14,ymm2
5273	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5274	vpaddd	ymm10,ymm10,ymm14
5275	vpxor	ymm6,ymm6,ymm10
5276	vpsrld	ymm3,ymm6,20
5277	vpslld	ymm6,ymm6,12
5278	vpxor	ymm6,ymm6,ymm3
5279	vpaddd	ymm2,ymm2,ymm6
5280	vpxor	ymm14,ymm14,ymm2
5281	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5282	vpaddd	ymm10,ymm10,ymm14
5283	vpxor	ymm6,ymm6,ymm10
5284	vpslld	ymm3,ymm6,7
5285	vpsrld	ymm6,ymm6,25
5286	vpxor	ymm6,ymm6,ymm3
5287	vpalignr	ymm14,ymm14,ymm14,4
5288	vpalignr	ymm10,ymm10,ymm10,8
5289	vpalignr	ymm6,ymm6,ymm6,12
5290	vpaddd	ymm1,ymm1,ymm5
5291	vpxor	ymm13,ymm13,ymm1
5292	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5293	vpaddd	ymm9,ymm9,ymm13
5294	vpxor	ymm5,ymm5,ymm9
5295	vpsrld	ymm3,ymm5,20
5296	vpslld	ymm5,ymm5,12
5297	vpxor	ymm5,ymm5,ymm3
5298	vpaddd	ymm1,ymm1,ymm5
5299	vpxor	ymm13,ymm13,ymm1
5300	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5301	vpaddd	ymm9,ymm9,ymm13
5302	vpxor	ymm5,ymm5,ymm9
5303	vpslld	ymm3,ymm5,7
5304	vpsrld	ymm5,ymm5,25
5305	vpxor	ymm5,ymm5,ymm3
5306	vpalignr	ymm13,ymm13,ymm13,4
5307	vpalignr	ymm9,ymm9,ymm9,8
5308	vpalignr	ymm5,ymm5,ymm5,12
5309	vpaddd	ymm0,ymm0,ymm4
5310	vpxor	ymm12,ymm12,ymm0
5311	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5312	vpaddd	ymm8,ymm8,ymm12
5313	vpxor	ymm4,ymm4,ymm8
5314	vpsrld	ymm3,ymm4,20
5315	vpslld	ymm4,ymm4,12
5316	vpxor	ymm4,ymm4,ymm3
5317	vpaddd	ymm0,ymm0,ymm4
5318	vpxor	ymm12,ymm12,ymm0
5319	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5320	vpaddd	ymm8,ymm8,ymm12
5321	vpxor	ymm4,ymm4,ymm8
5322	vpslld	ymm3,ymm4,7
5323	vpsrld	ymm4,ymm4,25
5324	vpxor	ymm4,ymm4,ymm3
5325	vpalignr	ymm12,ymm12,ymm12,4
5326	vpalignr	ymm8,ymm8,ymm8,8
5327	vpalignr	ymm4,ymm4,ymm4,12
5328
5329	cmp	r8,rcx
5330	jb	NEAR $L$open_avx2_tail_384_rounds_and_x2hash
5331	cmp	r8,10
5332	jne	NEAR $L$open_avx2_tail_384_rounds_and_x1hash
5333	mov	r8,rbx
5334	sub	rbx,rsi
5335	mov	rcx,rbx
5336	mov	rbx,QWORD[((160+128))+rbp]
5337$L$open_avx2_384_tail_hash:
5338	add	rcx,16
5339	cmp	rcx,rbx
5340	jg	NEAR $L$open_avx2_384_tail_done
5341	add	r10,QWORD[((0+0))+r8]
5342	adc	r11,QWORD[((8+0))+r8]
5343	adc	r12,1
5344	mov	rdx,QWORD[((0+160+0))+rbp]
5345	mov	r15,rdx
5346	mulx	r14,r13,r10
5347	mulx	rdx,rax,r11
5348	imul	r15,r12
5349	add	r14,rax
5350	adc	r15,rdx
5351	mov	rdx,QWORD[((8+160+0))+rbp]
5352	mulx	rax,r10,r10
5353	add	r14,r10
5354	mulx	r9,r11,r11
5355	adc	r15,r11
5356	adc	r9,0
5357	imul	rdx,r12
5358	add	r15,rax
5359	adc	r9,rdx
5360	mov	r10,r13
5361	mov	r11,r14
5362	mov	r12,r15
5363	and	r12,3
5364	mov	r13,r15
5365	and	r13,-4
5366	mov	r14,r9
5367	shrd	r15,r9,2
5368	shr	r9,2
5369	add	r15,r13
5370	adc	r9,r14
5371	add	r10,r15
5372	adc	r11,r9
5373	adc	r12,0
5374
5375	lea	r8,[16+r8]
5376	jmp	NEAR $L$open_avx2_384_tail_hash
5377$L$open_avx2_384_tail_done:
5378	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
5379	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
5380	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
5381	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
5382	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5383	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5384	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5385	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5386	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5387	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5388	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5389	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5390	vperm2i128	ymm3,ymm6,ymm2,0x02
5391	vperm2i128	ymm6,ymm6,ymm2,0x13
5392	vperm2i128	ymm2,ymm14,ymm10,0x02
5393	vperm2i128	ymm10,ymm14,ymm10,0x13
5394	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
5395	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
5396	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
5397	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
5398	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
5399	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
5400	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
5401	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
5402	vperm2i128	ymm3,ymm5,ymm1,0x02
5403	vperm2i128	ymm5,ymm5,ymm1,0x13
5404	vperm2i128	ymm1,ymm13,ymm9,0x02
5405	vperm2i128	ymm9,ymm13,ymm9,0x13
5406	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
5407	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
5408	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
5409	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
5410	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
5411	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
5412	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
5413	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
5414	vperm2i128	ymm3,ymm4,ymm0,0x13
5415	vperm2i128	ymm0,ymm4,ymm0,0x02
5416	vperm2i128	ymm4,ymm12,ymm8,0x02
5417	vperm2i128	ymm12,ymm12,ymm8,0x13
5418	vmovdqa	ymm8,ymm3
5419
5420	lea	rsi,[256+rsi]
5421	lea	rdi,[256+rdi]
5422	sub	rbx,8*32
5423	jmp	NEAR $L$open_avx2_tail_128_xor
5424
5425$L$open_avx2_tail_512:
5426	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
5427	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
5428	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
5429	vmovdqa	ymm1,ymm0
5430	vmovdqa	ymm5,ymm4
5431	vmovdqa	ymm9,ymm8
5432	vmovdqa	ymm2,ymm0
5433	vmovdqa	ymm6,ymm4
5434	vmovdqa	ymm10,ymm8
5435	vmovdqa	ymm3,ymm0
5436	vmovdqa	ymm7,ymm4
5437	vmovdqa	ymm11,ymm8
5438	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
5439	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
5440	vpaddd	ymm14,ymm12,ymm15
5441	vpaddd	ymm13,ymm12,ymm14
5442	vpaddd	ymm12,ymm12,ymm13
5443	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
5444	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
5445	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
5446	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
5447
5448	xor	rcx,rcx
5449	mov	r8,rsi
5450$L$open_avx2_tail_512_rounds_and_x2hash:
5451	add	r10,QWORD[((0+0))+r8]
5452	adc	r11,QWORD[((8+0))+r8]
5453	adc	r12,1
5454	mov	rax,QWORD[((0+160+0))+rbp]
5455	mov	r15,rax
5456	mul	r10
5457	mov	r13,rax
5458	mov	r14,rdx
5459	mov	rax,QWORD[((0+160+0))+rbp]
5460	mul	r11
5461	imul	r15,r12
5462	add	r14,rax
5463	adc	r15,rdx
5464	mov	rax,QWORD[((8+160+0))+rbp]
5465	mov	r9,rax
5466	mul	r10
5467	add	r14,rax
5468	adc	rdx,0
5469	mov	r10,rdx
5470	mov	rax,QWORD[((8+160+0))+rbp]
5471	mul	r11
5472	add	r15,rax
5473	adc	rdx,0
5474	imul	r9,r12
5475	add	r15,r10
5476	adc	r9,rdx
5477	mov	r10,r13
5478	mov	r11,r14
5479	mov	r12,r15
5480	and	r12,3
5481	mov	r13,r15
5482	and	r13,-4
5483	mov	r14,r9
5484	shrd	r15,r9,2
5485	shr	r9,2
5486	add	r15,r13
5487	adc	r9,r14
5488	add	r10,r15
5489	adc	r11,r9
5490	adc	r12,0
5491
5492	lea	r8,[16+r8]
5493$L$open_avx2_tail_512_rounds_and_x1hash:
5494	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5495	vmovdqa	ymm8,YMMWORD[$L$rol16]
5496	vpaddd	ymm3,ymm3,ymm7
5497	vpaddd	ymm2,ymm2,ymm6
5498	vpaddd	ymm1,ymm1,ymm5
5499	vpaddd	ymm0,ymm0,ymm4
5500	vpxor	ymm15,ymm15,ymm3
5501	vpxor	ymm14,ymm14,ymm2
5502	vpxor	ymm13,ymm13,ymm1
5503	vpxor	ymm12,ymm12,ymm0
5504	vpshufb	ymm15,ymm15,ymm8
5505	vpshufb	ymm14,ymm14,ymm8
5506	vpshufb	ymm13,ymm13,ymm8
5507	vpshufb	ymm12,ymm12,ymm8
5508	vpaddd	ymm11,ymm11,ymm15
5509	vpaddd	ymm10,ymm10,ymm14
5510	vpaddd	ymm9,ymm9,ymm13
5511	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5512	vpxor	ymm7,ymm7,ymm11
5513	vpxor	ymm6,ymm6,ymm10
5514	vpxor	ymm5,ymm5,ymm9
5515	vpxor	ymm4,ymm4,ymm8
5516	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5517	vpsrld	ymm8,ymm7,20
5518	vpslld	ymm7,ymm7,32-20
5519	vpxor	ymm7,ymm7,ymm8
5520	vpsrld	ymm8,ymm6,20
5521	vpslld	ymm6,ymm6,32-20
5522	vpxor	ymm6,ymm6,ymm8
5523	vpsrld	ymm8,ymm5,20
5524	vpslld	ymm5,ymm5,32-20
5525	vpxor	ymm5,ymm5,ymm8
5526	vpsrld	ymm8,ymm4,20
5527	vpslld	ymm4,ymm4,32-20
5528	vpxor	ymm4,ymm4,ymm8
5529	vmovdqa	ymm8,YMMWORD[$L$rol8]
5530	vpaddd	ymm3,ymm3,ymm7
5531	add	r10,QWORD[((0+0))+r8]
5532	adc	r11,QWORD[((8+0))+r8]
5533	adc	r12,1
5534	mov	rdx,QWORD[((0+160+0))+rbp]
5535	mov	r15,rdx
5536	mulx	r14,r13,r10
5537	mulx	rdx,rax,r11
5538	imul	r15,r12
5539	add	r14,rax
5540	adc	r15,rdx
5541	mov	rdx,QWORD[((8+160+0))+rbp]
5542	mulx	rax,r10,r10
5543	add	r14,r10
5544	mulx	r9,r11,r11
5545	adc	r15,r11
5546	adc	r9,0
5547	imul	rdx,r12
5548	add	r15,rax
5549	adc	r9,rdx
5550	mov	r10,r13
5551	mov	r11,r14
5552	mov	r12,r15
5553	and	r12,3
5554	mov	r13,r15
5555	and	r13,-4
5556	mov	r14,r9
5557	shrd	r15,r9,2
5558	shr	r9,2
5559	add	r15,r13
5560	adc	r9,r14
5561	add	r10,r15
5562	adc	r11,r9
5563	adc	r12,0
5564	vpaddd	ymm2,ymm2,ymm6
5565	vpaddd	ymm1,ymm1,ymm5
5566	vpaddd	ymm0,ymm0,ymm4
5567	vpxor	ymm15,ymm15,ymm3
5568	vpxor	ymm14,ymm14,ymm2
5569	vpxor	ymm13,ymm13,ymm1
5570	vpxor	ymm12,ymm12,ymm0
5571	vpshufb	ymm15,ymm15,ymm8
5572	vpshufb	ymm14,ymm14,ymm8
5573	vpshufb	ymm13,ymm13,ymm8
5574	vpshufb	ymm12,ymm12,ymm8
5575	vpaddd	ymm11,ymm11,ymm15
5576	vpaddd	ymm10,ymm10,ymm14
5577	vpaddd	ymm9,ymm9,ymm13
5578	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5579	vpxor	ymm7,ymm7,ymm11
5580	vpxor	ymm6,ymm6,ymm10
5581	vpxor	ymm5,ymm5,ymm9
5582	vpxor	ymm4,ymm4,ymm8
5583	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5584	vpsrld	ymm8,ymm7,25
5585	vpslld	ymm7,ymm7,32-25
5586	vpxor	ymm7,ymm7,ymm8
5587	vpsrld	ymm8,ymm6,25
5588	vpslld	ymm6,ymm6,32-25
5589	vpxor	ymm6,ymm6,ymm8
5590	vpsrld	ymm8,ymm5,25
5591	vpslld	ymm5,ymm5,32-25
5592	vpxor	ymm5,ymm5,ymm8
5593	vpsrld	ymm8,ymm4,25
5594	vpslld	ymm4,ymm4,32-25
5595	vpxor	ymm4,ymm4,ymm8
5596	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
5597	vpalignr	ymm7,ymm7,ymm7,4
5598	vpalignr	ymm11,ymm11,ymm11,8
5599	vpalignr	ymm15,ymm15,ymm15,12
5600	vpalignr	ymm6,ymm6,ymm6,4
5601	vpalignr	ymm10,ymm10,ymm10,8
5602	vpalignr	ymm14,ymm14,ymm14,12
5603	vpalignr	ymm5,ymm5,ymm5,4
5604	vpalignr	ymm9,ymm9,ymm9,8
5605	vpalignr	ymm13,ymm13,ymm13,12
5606	vpalignr	ymm4,ymm4,ymm4,4
5607	vpalignr	ymm8,ymm8,ymm8,8
5608	vpalignr	ymm12,ymm12,ymm12,12
5609	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5610	vmovdqa	ymm8,YMMWORD[$L$rol16]
5611	vpaddd	ymm3,ymm3,ymm7
5612	add	r10,QWORD[((0+16))+r8]
5613	adc	r11,QWORD[((8+16))+r8]
5614	adc	r12,1
5615	mov	rdx,QWORD[((0+160+0))+rbp]
5616	mov	r15,rdx
5617	mulx	r14,r13,r10
5618	mulx	rdx,rax,r11
5619	imul	r15,r12
5620	add	r14,rax
5621	adc	r15,rdx
5622	mov	rdx,QWORD[((8+160+0))+rbp]
5623	mulx	rax,r10,r10
5624	add	r14,r10
5625	mulx	r9,r11,r11
5626	adc	r15,r11
5627	adc	r9,0
5628	imul	rdx,r12
5629	add	r15,rax
5630	adc	r9,rdx
5631	mov	r10,r13
5632	mov	r11,r14
5633	mov	r12,r15
5634	and	r12,3
5635	mov	r13,r15
5636	and	r13,-4
5637	mov	r14,r9
5638	shrd	r15,r9,2
5639	shr	r9,2
5640	add	r15,r13
5641	adc	r9,r14
5642	add	r10,r15
5643	adc	r11,r9
5644	adc	r12,0
5645
5646	lea	r8,[32+r8]
5647	vpaddd	ymm2,ymm2,ymm6
5648	vpaddd	ymm1,ymm1,ymm5
5649	vpaddd	ymm0,ymm0,ymm4
5650	vpxor	ymm15,ymm15,ymm3
5651	vpxor	ymm14,ymm14,ymm2
5652	vpxor	ymm13,ymm13,ymm1
5653	vpxor	ymm12,ymm12,ymm0
5654	vpshufb	ymm15,ymm15,ymm8
5655	vpshufb	ymm14,ymm14,ymm8
5656	vpshufb	ymm13,ymm13,ymm8
5657	vpshufb	ymm12,ymm12,ymm8
5658	vpaddd	ymm11,ymm11,ymm15
5659	vpaddd	ymm10,ymm10,ymm14
5660	vpaddd	ymm9,ymm9,ymm13
5661	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5662	vpxor	ymm7,ymm7,ymm11
5663	vpxor	ymm6,ymm6,ymm10
5664	vpxor	ymm5,ymm5,ymm9
5665	vpxor	ymm4,ymm4,ymm8
5666	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5667	vpsrld	ymm8,ymm7,20
5668	vpslld	ymm7,ymm7,32-20
5669	vpxor	ymm7,ymm7,ymm8
5670	vpsrld	ymm8,ymm6,20
5671	vpslld	ymm6,ymm6,32-20
5672	vpxor	ymm6,ymm6,ymm8
5673	vpsrld	ymm8,ymm5,20
5674	vpslld	ymm5,ymm5,32-20
5675	vpxor	ymm5,ymm5,ymm8
5676	vpsrld	ymm8,ymm4,20
5677	vpslld	ymm4,ymm4,32-20
5678	vpxor	ymm4,ymm4,ymm8
5679	vmovdqa	ymm8,YMMWORD[$L$rol8]
5680	vpaddd	ymm3,ymm3,ymm7
5681	vpaddd	ymm2,ymm2,ymm6
5682	vpaddd	ymm1,ymm1,ymm5
5683	vpaddd	ymm0,ymm0,ymm4
5684	vpxor	ymm15,ymm15,ymm3
5685	vpxor	ymm14,ymm14,ymm2
5686	vpxor	ymm13,ymm13,ymm1
5687	vpxor	ymm12,ymm12,ymm0
5688	vpshufb	ymm15,ymm15,ymm8
5689	vpshufb	ymm14,ymm14,ymm8
5690	vpshufb	ymm13,ymm13,ymm8
5691	vpshufb	ymm12,ymm12,ymm8
5692	vpaddd	ymm11,ymm11,ymm15
5693	vpaddd	ymm10,ymm10,ymm14
5694	vpaddd	ymm9,ymm9,ymm13
5695	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5696	vpxor	ymm7,ymm7,ymm11
5697	vpxor	ymm6,ymm6,ymm10
5698	vpxor	ymm5,ymm5,ymm9
5699	vpxor	ymm4,ymm4,ymm8
5700	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5701	vpsrld	ymm8,ymm7,25
5702	vpslld	ymm7,ymm7,32-25
5703	vpxor	ymm7,ymm7,ymm8
5704	vpsrld	ymm8,ymm6,25
5705	vpslld	ymm6,ymm6,32-25
5706	vpxor	ymm6,ymm6,ymm8
5707	vpsrld	ymm8,ymm5,25
5708	vpslld	ymm5,ymm5,32-25
5709	vpxor	ymm5,ymm5,ymm8
5710	vpsrld	ymm8,ymm4,25
5711	vpslld	ymm4,ymm4,32-25
5712	vpxor	ymm4,ymm4,ymm8
5713	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
5714	vpalignr	ymm7,ymm7,ymm7,12
5715	vpalignr	ymm11,ymm11,ymm11,8
5716	vpalignr	ymm15,ymm15,ymm15,4
5717	vpalignr	ymm6,ymm6,ymm6,12
5718	vpalignr	ymm10,ymm10,ymm10,8
5719	vpalignr	ymm14,ymm14,ymm14,4
5720	vpalignr	ymm5,ymm5,ymm5,12
5721	vpalignr	ymm9,ymm9,ymm9,8
5722	vpalignr	ymm13,ymm13,ymm13,4
5723	vpalignr	ymm4,ymm4,ymm4,12
5724	vpalignr	ymm8,ymm8,ymm8,8
5725	vpalignr	ymm12,ymm12,ymm12,4
5726
5727	inc	rcx
5728	cmp	rcx,4
5729	jl	NEAR $L$open_avx2_tail_512_rounds_and_x2hash
5730	cmp	rcx,10
5731	jne	NEAR $L$open_avx2_tail_512_rounds_and_x1hash
5732	mov	rcx,rbx
5733	sub	rcx,12*32
5734	and	rcx,-16
5735$L$open_avx2_tail_512_hash:
5736	test	rcx,rcx
5737	je	NEAR $L$open_avx2_tail_512_done
5738	add	r10,QWORD[((0+0))+r8]
5739	adc	r11,QWORD[((8+0))+r8]
5740	adc	r12,1
5741	mov	rdx,QWORD[((0+160+0))+rbp]
5742	mov	r15,rdx
5743	mulx	r14,r13,r10
5744	mulx	rdx,rax,r11
5745	imul	r15,r12
5746	add	r14,rax
5747	adc	r15,rdx
5748	mov	rdx,QWORD[((8+160+0))+rbp]
5749	mulx	rax,r10,r10
5750	add	r14,r10
5751	mulx	r9,r11,r11
5752	adc	r15,r11
5753	adc	r9,0
5754	imul	rdx,r12
5755	add	r15,rax
5756	adc	r9,rdx
5757	mov	r10,r13
5758	mov	r11,r14
5759	mov	r12,r15
5760	and	r12,3
5761	mov	r13,r15
5762	and	r13,-4
5763	mov	r14,r9
5764	shrd	r15,r9,2
5765	shr	r9,2
5766	add	r15,r13
5767	adc	r9,r14
5768	add	r10,r15
5769	adc	r11,r9
5770	adc	r12,0
5771
5772	lea	r8,[16+r8]
5773	sub	rcx,2*8
5774	jmp	NEAR $L$open_avx2_tail_512_hash
5775$L$open_avx2_tail_512_done:
5776	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
5777	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
5778	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
5779	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
5780	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
5781	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
5782	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
5783	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
5784	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5785	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5786	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5787	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5788	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5789	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5790	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5791	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5792
5793	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
5794	vperm2i128	ymm0,ymm7,ymm3,0x02
5795	vperm2i128	ymm7,ymm7,ymm3,0x13
5796	vperm2i128	ymm3,ymm15,ymm11,0x02
5797	vperm2i128	ymm11,ymm15,ymm11,0x13
5798	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
5799	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
5800	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
5801	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
5802	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
5803	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
5804	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
5805	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
5806
5807	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
5808	vperm2i128	ymm3,ymm6,ymm2,0x02
5809	vperm2i128	ymm6,ymm6,ymm2,0x13
5810	vperm2i128	ymm2,ymm14,ymm10,0x02
5811	vperm2i128	ymm10,ymm14,ymm10,0x13
5812	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
5813	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
5814	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
5815	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
5816	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
5817	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
5818	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
5819	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
5820	vperm2i128	ymm3,ymm5,ymm1,0x02
5821	vperm2i128	ymm5,ymm5,ymm1,0x13
5822	vperm2i128	ymm1,ymm13,ymm9,0x02
5823	vperm2i128	ymm9,ymm13,ymm9,0x13
5824	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
5825	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
5826	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
5827	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
5828	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
5829	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
5830	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
5831	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
5832	vperm2i128	ymm3,ymm4,ymm0,0x13
5833	vperm2i128	ymm0,ymm4,ymm0,0x02
5834	vperm2i128	ymm4,ymm12,ymm8,0x02
5835	vperm2i128	ymm12,ymm12,ymm8,0x13
5836	vmovdqa	ymm8,ymm3
5837
5838	lea	rsi,[384+rsi]
5839	lea	rdi,[384+rdi]
5840	sub	rbx,12*32
5841$L$open_avx2_tail_128_xor:
5842	cmp	rbx,32
5843	jb	NEAR $L$open_avx2_tail_32_xor
5844	sub	rbx,32
5845	vpxor	ymm0,ymm0,YMMWORD[rsi]
5846	vmovdqu	YMMWORD[rdi],ymm0
5847	lea	rsi,[32+rsi]
5848	lea	rdi,[32+rdi]
5849	vmovdqa	ymm0,ymm4
5850	vmovdqa	ymm4,ymm8
5851	vmovdqa	ymm8,ymm12
5852	jmp	NEAR $L$open_avx2_tail_128_xor
5853$L$open_avx2_tail_32_xor:
5854	cmp	rbx,16
5855	vmovdqa	xmm1,xmm0
5856	jb	NEAR $L$open_avx2_exit
5857	sub	rbx,16
5858
5859	vpxor	xmm1,xmm0,XMMWORD[rsi]
5860	vmovdqu	XMMWORD[rdi],xmm1
5861	lea	rsi,[16+rsi]
5862	lea	rdi,[16+rdi]
5863	vperm2i128	ymm0,ymm0,ymm0,0x11
5864	vmovdqa	xmm1,xmm0
5865$L$open_avx2_exit:
5866	vzeroupper
5867	jmp	NEAR $L$open_sse_tail_16
5868
5869$L$open_avx2_192:
5870	vmovdqa	ymm1,ymm0
5871	vmovdqa	ymm2,ymm0
5872	vmovdqa	ymm5,ymm4
5873	vmovdqa	ymm6,ymm4
5874	vmovdqa	ymm9,ymm8
5875	vmovdqa	ymm10,ymm8
5876	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
5877	vmovdqa	ymm11,ymm12
5878	vmovdqa	ymm15,ymm13
5879	mov	r10,10
5880$L$open_avx2_192_rounds:
5881	vpaddd	ymm0,ymm0,ymm4
5882	vpxor	ymm12,ymm12,ymm0
5883	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5884	vpaddd	ymm8,ymm8,ymm12
5885	vpxor	ymm4,ymm4,ymm8
5886	vpsrld	ymm3,ymm4,20
5887	vpslld	ymm4,ymm4,12
5888	vpxor	ymm4,ymm4,ymm3
5889	vpaddd	ymm0,ymm0,ymm4
5890	vpxor	ymm12,ymm12,ymm0
5891	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5892	vpaddd	ymm8,ymm8,ymm12
5893	vpxor	ymm4,ymm4,ymm8
5894	vpslld	ymm3,ymm4,7
5895	vpsrld	ymm4,ymm4,25
5896	vpxor	ymm4,ymm4,ymm3
5897	vpalignr	ymm12,ymm12,ymm12,12
5898	vpalignr	ymm8,ymm8,ymm8,8
5899	vpalignr	ymm4,ymm4,ymm4,4
5900	vpaddd	ymm1,ymm1,ymm5
5901	vpxor	ymm13,ymm13,ymm1
5902	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5903	vpaddd	ymm9,ymm9,ymm13
5904	vpxor	ymm5,ymm5,ymm9
5905	vpsrld	ymm3,ymm5,20
5906	vpslld	ymm5,ymm5,12
5907	vpxor	ymm5,ymm5,ymm3
5908	vpaddd	ymm1,ymm1,ymm5
5909	vpxor	ymm13,ymm13,ymm1
5910	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5911	vpaddd	ymm9,ymm9,ymm13
5912	vpxor	ymm5,ymm5,ymm9
5913	vpslld	ymm3,ymm5,7
5914	vpsrld	ymm5,ymm5,25
5915	vpxor	ymm5,ymm5,ymm3
5916	vpalignr	ymm13,ymm13,ymm13,12
5917	vpalignr	ymm9,ymm9,ymm9,8
5918	vpalignr	ymm5,ymm5,ymm5,4
5919	vpaddd	ymm0,ymm0,ymm4
5920	vpxor	ymm12,ymm12,ymm0
5921	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5922	vpaddd	ymm8,ymm8,ymm12
5923	vpxor	ymm4,ymm4,ymm8
5924	vpsrld	ymm3,ymm4,20
5925	vpslld	ymm4,ymm4,12
5926	vpxor	ymm4,ymm4,ymm3
5927	vpaddd	ymm0,ymm0,ymm4
5928	vpxor	ymm12,ymm12,ymm0
5929	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5930	vpaddd	ymm8,ymm8,ymm12
5931	vpxor	ymm4,ymm4,ymm8
5932	vpslld	ymm3,ymm4,7
5933	vpsrld	ymm4,ymm4,25
5934	vpxor	ymm4,ymm4,ymm3
5935	vpalignr	ymm12,ymm12,ymm12,4
5936	vpalignr	ymm8,ymm8,ymm8,8
5937	vpalignr	ymm4,ymm4,ymm4,12
5938	vpaddd	ymm1,ymm1,ymm5
5939	vpxor	ymm13,ymm13,ymm1
5940	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5941	vpaddd	ymm9,ymm9,ymm13
5942	vpxor	ymm5,ymm5,ymm9
5943	vpsrld	ymm3,ymm5,20
5944	vpslld	ymm5,ymm5,12
5945	vpxor	ymm5,ymm5,ymm3
5946	vpaddd	ymm1,ymm1,ymm5
5947	vpxor	ymm13,ymm13,ymm1
5948	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5949	vpaddd	ymm9,ymm9,ymm13
5950	vpxor	ymm5,ymm5,ymm9
5951	vpslld	ymm3,ymm5,7
5952	vpsrld	ymm5,ymm5,25
5953	vpxor	ymm5,ymm5,ymm3
5954	vpalignr	ymm13,ymm13,ymm13,4
5955	vpalignr	ymm9,ymm9,ymm9,8
5956	vpalignr	ymm5,ymm5,ymm5,12
5957
5958	dec	r10
5959	jne	NEAR $L$open_avx2_192_rounds
5960	vpaddd	ymm0,ymm0,ymm2
5961	vpaddd	ymm1,ymm1,ymm2
5962	vpaddd	ymm4,ymm4,ymm6
5963	vpaddd	ymm5,ymm5,ymm6
5964	vpaddd	ymm8,ymm8,ymm10
5965	vpaddd	ymm9,ymm9,ymm10
5966	vpaddd	ymm12,ymm12,ymm11
5967	vpaddd	ymm13,ymm13,ymm15
5968	vperm2i128	ymm3,ymm4,ymm0,0x02
5969
5970	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
5971	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
5972
5973	vperm2i128	ymm0,ymm4,ymm0,0x13
5974	vperm2i128	ymm4,ymm12,ymm8,0x13
5975	vperm2i128	ymm8,ymm5,ymm1,0x02
5976	vperm2i128	ymm12,ymm13,ymm9,0x02
5977	vperm2i128	ymm1,ymm5,ymm1,0x13
5978	vperm2i128	ymm5,ymm13,ymm9,0x13
5979$L$open_avx2_short:
5980	mov	r8,r8
5981	call	poly_hash_ad_internal
5982$L$open_avx2_short_hash_and_xor_loop:
5983	cmp	rbx,32
5984	jb	NEAR $L$open_avx2_short_tail_32
5985	sub	rbx,32
5986	add	r10,QWORD[((0+0))+rsi]
5987	adc	r11,QWORD[((8+0))+rsi]
5988	adc	r12,1
5989	mov	rax,QWORD[((0+160+0))+rbp]
5990	mov	r15,rax
5991	mul	r10
5992	mov	r13,rax
5993	mov	r14,rdx
5994	mov	rax,QWORD[((0+160+0))+rbp]
5995	mul	r11
5996	imul	r15,r12
5997	add	r14,rax
5998	adc	r15,rdx
5999	mov	rax,QWORD[((8+160+0))+rbp]
6000	mov	r9,rax
6001	mul	r10
6002	add	r14,rax
6003	adc	rdx,0
6004	mov	r10,rdx
6005	mov	rax,QWORD[((8+160+0))+rbp]
6006	mul	r11
6007	add	r15,rax
6008	adc	rdx,0
6009	imul	r9,r12
6010	add	r15,r10
6011	adc	r9,rdx
6012	mov	r10,r13
6013	mov	r11,r14
6014	mov	r12,r15
6015	and	r12,3
6016	mov	r13,r15
6017	and	r13,-4
6018	mov	r14,r9
6019	shrd	r15,r9,2
6020	shr	r9,2
6021	add	r15,r13
6022	adc	r9,r14
6023	add	r10,r15
6024	adc	r11,r9
6025	adc	r12,0
6026	add	r10,QWORD[((0+16))+rsi]
6027	adc	r11,QWORD[((8+16))+rsi]
6028	adc	r12,1
6029	mov	rax,QWORD[((0+160+0))+rbp]
6030	mov	r15,rax
6031	mul	r10
6032	mov	r13,rax
6033	mov	r14,rdx
6034	mov	rax,QWORD[((0+160+0))+rbp]
6035	mul	r11
6036	imul	r15,r12
6037	add	r14,rax
6038	adc	r15,rdx
6039	mov	rax,QWORD[((8+160+0))+rbp]
6040	mov	r9,rax
6041	mul	r10
6042	add	r14,rax
6043	adc	rdx,0
6044	mov	r10,rdx
6045	mov	rax,QWORD[((8+160+0))+rbp]
6046	mul	r11
6047	add	r15,rax
6048	adc	rdx,0
6049	imul	r9,r12
6050	add	r15,r10
6051	adc	r9,rdx
6052	mov	r10,r13
6053	mov	r11,r14
6054	mov	r12,r15
6055	and	r12,3
6056	mov	r13,r15
6057	and	r13,-4
6058	mov	r14,r9
6059	shrd	r15,r9,2
6060	shr	r9,2
6061	add	r15,r13
6062	adc	r9,r14
6063	add	r10,r15
6064	adc	r11,r9
6065	adc	r12,0
6066
6067
6068	vpxor	ymm0,ymm0,YMMWORD[rsi]
6069	vmovdqu	YMMWORD[rdi],ymm0
6070	lea	rsi,[32+rsi]
6071	lea	rdi,[32+rdi]
6072
6073	vmovdqa	ymm0,ymm4
6074	vmovdqa	ymm4,ymm8
6075	vmovdqa	ymm8,ymm12
6076	vmovdqa	ymm12,ymm1
6077	vmovdqa	ymm1,ymm5
6078	vmovdqa	ymm5,ymm9
6079	vmovdqa	ymm9,ymm13
6080	vmovdqa	ymm13,ymm2
6081	vmovdqa	ymm2,ymm6
6082	jmp	NEAR $L$open_avx2_short_hash_and_xor_loop
6083$L$open_avx2_short_tail_32:
6084	cmp	rbx,16
6085	vmovdqa	xmm1,xmm0
6086	jb	NEAR $L$open_avx2_short_tail_32_exit
6087	sub	rbx,16
6088	add	r10,QWORD[((0+0))+rsi]
6089	adc	r11,QWORD[((8+0))+rsi]
6090	adc	r12,1
6091	mov	rax,QWORD[((0+160+0))+rbp]
6092	mov	r15,rax
6093	mul	r10
6094	mov	r13,rax
6095	mov	r14,rdx
6096	mov	rax,QWORD[((0+160+0))+rbp]
6097	mul	r11
6098	imul	r15,r12
6099	add	r14,rax
6100	adc	r15,rdx
6101	mov	rax,QWORD[((8+160+0))+rbp]
6102	mov	r9,rax
6103	mul	r10
6104	add	r14,rax
6105	adc	rdx,0
6106	mov	r10,rdx
6107	mov	rax,QWORD[((8+160+0))+rbp]
6108	mul	r11
6109	add	r15,rax
6110	adc	rdx,0
6111	imul	r9,r12
6112	add	r15,r10
6113	adc	r9,rdx
6114	mov	r10,r13
6115	mov	r11,r14
6116	mov	r12,r15
6117	and	r12,3
6118	mov	r13,r15
6119	and	r13,-4
6120	mov	r14,r9
6121	shrd	r15,r9,2
6122	shr	r9,2
6123	add	r15,r13
6124	adc	r9,r14
6125	add	r10,r15
6126	adc	r11,r9
6127	adc	r12,0
6128
6129	vpxor	xmm3,xmm0,XMMWORD[rsi]
6130	vmovdqu	XMMWORD[rdi],xmm3
6131	lea	rsi,[16+rsi]
6132	lea	rdi,[16+rdi]
6133	vextracti128	xmm1,ymm0,1
6134$L$open_avx2_short_tail_32_exit:
6135	vzeroupper
6136	jmp	NEAR $L$open_sse_tail_16
6137
6138$L$open_avx2_320:
6139	vmovdqa	ymm1,ymm0
6140	vmovdqa	ymm2,ymm0
6141	vmovdqa	ymm5,ymm4
6142	vmovdqa	ymm6,ymm4
6143	vmovdqa	ymm9,ymm8
6144	vmovdqa	ymm10,ymm8
6145	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
6146	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
6147	vmovdqa	ymm7,ymm4
6148	vmovdqa	ymm11,ymm8
6149	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6150	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6151	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6152	mov	r10,10
6153$L$open_avx2_320_rounds:
6154	vpaddd	ymm0,ymm0,ymm4
6155	vpxor	ymm12,ymm12,ymm0
6156	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
6157	vpaddd	ymm8,ymm8,ymm12
6158	vpxor	ymm4,ymm4,ymm8
6159	vpsrld	ymm3,ymm4,20
6160	vpslld	ymm4,ymm4,12
6161	vpxor	ymm4,ymm4,ymm3
6162	vpaddd	ymm0,ymm0,ymm4
6163	vpxor	ymm12,ymm12,ymm0
6164	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
6165	vpaddd	ymm8,ymm8,ymm12
6166	vpxor	ymm4,ymm4,ymm8
6167	vpslld	ymm3,ymm4,7
6168	vpsrld	ymm4,ymm4,25
6169	vpxor	ymm4,ymm4,ymm3
6170	vpalignr	ymm12,ymm12,ymm12,12
6171	vpalignr	ymm8,ymm8,ymm8,8
6172	vpalignr	ymm4,ymm4,ymm4,4
6173	vpaddd	ymm1,ymm1,ymm5
6174	vpxor	ymm13,ymm13,ymm1
6175	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
6176	vpaddd	ymm9,ymm9,ymm13
6177	vpxor	ymm5,ymm5,ymm9
6178	vpsrld	ymm3,ymm5,20
6179	vpslld	ymm5,ymm5,12
6180	vpxor	ymm5,ymm5,ymm3
6181	vpaddd	ymm1,ymm1,ymm5
6182	vpxor	ymm13,ymm13,ymm1
6183	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
6184	vpaddd	ymm9,ymm9,ymm13
6185	vpxor	ymm5,ymm5,ymm9
6186	vpslld	ymm3,ymm5,7
6187	vpsrld	ymm5,ymm5,25
6188	vpxor	ymm5,ymm5,ymm3
6189	vpalignr	ymm13,ymm13,ymm13,12
6190	vpalignr	ymm9,ymm9,ymm9,8
6191	vpalignr	ymm5,ymm5,ymm5,4
6192	vpaddd	ymm2,ymm2,ymm6
6193	vpxor	ymm14,ymm14,ymm2
6194	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
6195	vpaddd	ymm10,ymm10,ymm14
6196	vpxor	ymm6,ymm6,ymm10
6197	vpsrld	ymm3,ymm6,20
6198	vpslld	ymm6,ymm6,12
6199	vpxor	ymm6,ymm6,ymm3
6200	vpaddd	ymm2,ymm2,ymm6
6201	vpxor	ymm14,ymm14,ymm2
6202	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
6203	vpaddd	ymm10,ymm10,ymm14
6204	vpxor	ymm6,ymm6,ymm10
6205	vpslld	ymm3,ymm6,7
6206	vpsrld	ymm6,ymm6,25
6207	vpxor	ymm6,ymm6,ymm3
6208	vpalignr	ymm14,ymm14,ymm14,12
6209	vpalignr	ymm10,ymm10,ymm10,8
6210	vpalignr	ymm6,ymm6,ymm6,4
6211	vpaddd	ymm0,ymm0,ymm4
6212	vpxor	ymm12,ymm12,ymm0
6213	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
6214	vpaddd	ymm8,ymm8,ymm12
6215	vpxor	ymm4,ymm4,ymm8
6216	vpsrld	ymm3,ymm4,20
6217	vpslld	ymm4,ymm4,12
6218	vpxor	ymm4,ymm4,ymm3
6219	vpaddd	ymm0,ymm0,ymm4
6220	vpxor	ymm12,ymm12,ymm0
6221	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
6222	vpaddd	ymm8,ymm8,ymm12
6223	vpxor	ymm4,ymm4,ymm8
6224	vpslld	ymm3,ymm4,7
6225	vpsrld	ymm4,ymm4,25
6226	vpxor	ymm4,ymm4,ymm3
6227	vpalignr	ymm12,ymm12,ymm12,4
6228	vpalignr	ymm8,ymm8,ymm8,8
6229	vpalignr	ymm4,ymm4,ymm4,12
6230	vpaddd	ymm1,ymm1,ymm5
6231	vpxor	ymm13,ymm13,ymm1
6232	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
6233	vpaddd	ymm9,ymm9,ymm13
6234	vpxor	ymm5,ymm5,ymm9
6235	vpsrld	ymm3,ymm5,20
6236	vpslld	ymm5,ymm5,12
6237	vpxor	ymm5,ymm5,ymm3
6238	vpaddd	ymm1,ymm1,ymm5
6239	vpxor	ymm13,ymm13,ymm1
6240	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
6241	vpaddd	ymm9,ymm9,ymm13
6242	vpxor	ymm5,ymm5,ymm9
6243	vpslld	ymm3,ymm5,7
6244	vpsrld	ymm5,ymm5,25
6245	vpxor	ymm5,ymm5,ymm3
6246	vpalignr	ymm13,ymm13,ymm13,4
6247	vpalignr	ymm9,ymm9,ymm9,8
6248	vpalignr	ymm5,ymm5,ymm5,12
6249	vpaddd	ymm2,ymm2,ymm6
6250	vpxor	ymm14,ymm14,ymm2
6251	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
6252	vpaddd	ymm10,ymm10,ymm14
6253	vpxor	ymm6,ymm6,ymm10
6254	vpsrld	ymm3,ymm6,20
6255	vpslld	ymm6,ymm6,12
6256	vpxor	ymm6,ymm6,ymm3
6257	vpaddd	ymm2,ymm2,ymm6
6258	vpxor	ymm14,ymm14,ymm2
6259	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
6260	vpaddd	ymm10,ymm10,ymm14
6261	vpxor	ymm6,ymm6,ymm10
6262	vpslld	ymm3,ymm6,7
6263	vpsrld	ymm6,ymm6,25
6264	vpxor	ymm6,ymm6,ymm3
6265	vpalignr	ymm14,ymm14,ymm14,4
6266	vpalignr	ymm10,ymm10,ymm10,8
6267	vpalignr	ymm6,ymm6,ymm6,12
6268
6269	dec	r10
6270	jne	NEAR $L$open_avx2_320_rounds
6271	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
6272	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
6273	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
6274	vpaddd	ymm4,ymm4,ymm7
6275	vpaddd	ymm5,ymm5,ymm7
6276	vpaddd	ymm6,ymm6,ymm7
6277	vpaddd	ymm8,ymm8,ymm11
6278	vpaddd	ymm9,ymm9,ymm11
6279	vpaddd	ymm10,ymm10,ymm11
6280	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
6281	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
6282	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
6283	vperm2i128	ymm3,ymm4,ymm0,0x02
6284
6285	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
6286	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
6287
6288	vperm2i128	ymm0,ymm4,ymm0,0x13
6289	vperm2i128	ymm4,ymm12,ymm8,0x13
6290	vperm2i128	ymm8,ymm5,ymm1,0x02
6291	vperm2i128	ymm12,ymm13,ymm9,0x02
6292	vperm2i128	ymm1,ymm5,ymm1,0x13
6293	vperm2i128	ymm5,ymm13,ymm9,0x13
6294	vperm2i128	ymm9,ymm6,ymm2,0x02
6295	vperm2i128	ymm13,ymm14,ymm10,0x02
6296	vperm2i128	ymm2,ymm6,ymm2,0x13
6297	vperm2i128	ymm6,ymm14,ymm10,0x13
6298	jmp	NEAR $L$open_avx2_short
6299
6300
6301
6302
6303
6304ALIGN	64
6305chacha20_poly1305_seal_avx2:
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318	vzeroupper
6319	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6320	vbroadcasti128	ymm4,XMMWORD[r9]
6321	vbroadcasti128	ymm8,XMMWORD[16+r9]
6322	vbroadcasti128	ymm12,XMMWORD[32+r9]
6323	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
6324	cmp	rbx,6*32
6325	jbe	NEAR $L$seal_avx2_192
6326	cmp	rbx,10*32
6327	jbe	NEAR $L$seal_avx2_320
6328	vmovdqa	ymm1,ymm0
6329	vmovdqa	ymm2,ymm0
6330	vmovdqa	ymm3,ymm0
6331	vmovdqa	ymm5,ymm4
6332	vmovdqa	ymm6,ymm4
6333	vmovdqa	ymm7,ymm4
6334	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
6335	vmovdqa	ymm9,ymm8
6336	vmovdqa	ymm10,ymm8
6337	vmovdqa	ymm11,ymm8
6338	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
6339	vmovdqa	ymm15,ymm12
6340	vpaddd	ymm14,ymm15,YMMWORD[$L$avx2_inc]
6341	vpaddd	ymm13,ymm14,YMMWORD[$L$avx2_inc]
6342	vpaddd	ymm12,ymm13,YMMWORD[$L$avx2_inc]
6343	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6344	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6345	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6346	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6347	mov	r10,10
6348$L$seal_avx2_init_rounds:
6349	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6350	vmovdqa	ymm8,YMMWORD[$L$rol16]
6351	vpaddd	ymm3,ymm3,ymm7
6352	vpaddd	ymm2,ymm2,ymm6
6353	vpaddd	ymm1,ymm1,ymm5
6354	vpaddd	ymm0,ymm0,ymm4
6355	vpxor	ymm15,ymm15,ymm3
6356	vpxor	ymm14,ymm14,ymm2
6357	vpxor	ymm13,ymm13,ymm1
6358	vpxor	ymm12,ymm12,ymm0
6359	vpshufb	ymm15,ymm15,ymm8
6360	vpshufb	ymm14,ymm14,ymm8
6361	vpshufb	ymm13,ymm13,ymm8
6362	vpshufb	ymm12,ymm12,ymm8
6363	vpaddd	ymm11,ymm11,ymm15
6364	vpaddd	ymm10,ymm10,ymm14
6365	vpaddd	ymm9,ymm9,ymm13
6366	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6367	vpxor	ymm7,ymm7,ymm11
6368	vpxor	ymm6,ymm6,ymm10
6369	vpxor	ymm5,ymm5,ymm9
6370	vpxor	ymm4,ymm4,ymm8
6371	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6372	vpsrld	ymm8,ymm7,20
6373	vpslld	ymm7,ymm7,32-20
6374	vpxor	ymm7,ymm7,ymm8
6375	vpsrld	ymm8,ymm6,20
6376	vpslld	ymm6,ymm6,32-20
6377	vpxor	ymm6,ymm6,ymm8
6378	vpsrld	ymm8,ymm5,20
6379	vpslld	ymm5,ymm5,32-20
6380	vpxor	ymm5,ymm5,ymm8
6381	vpsrld	ymm8,ymm4,20
6382	vpslld	ymm4,ymm4,32-20
6383	vpxor	ymm4,ymm4,ymm8
6384	vmovdqa	ymm8,YMMWORD[$L$rol8]
6385	vpaddd	ymm3,ymm3,ymm7
6386	vpaddd	ymm2,ymm2,ymm6
6387	vpaddd	ymm1,ymm1,ymm5
6388	vpaddd	ymm0,ymm0,ymm4
6389	vpxor	ymm15,ymm15,ymm3
6390	vpxor	ymm14,ymm14,ymm2
6391	vpxor	ymm13,ymm13,ymm1
6392	vpxor	ymm12,ymm12,ymm0
6393	vpshufb	ymm15,ymm15,ymm8
6394	vpshufb	ymm14,ymm14,ymm8
6395	vpshufb	ymm13,ymm13,ymm8
6396	vpshufb	ymm12,ymm12,ymm8
6397	vpaddd	ymm11,ymm11,ymm15
6398	vpaddd	ymm10,ymm10,ymm14
6399	vpaddd	ymm9,ymm9,ymm13
6400	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6401	vpxor	ymm7,ymm7,ymm11
6402	vpxor	ymm6,ymm6,ymm10
6403	vpxor	ymm5,ymm5,ymm9
6404	vpxor	ymm4,ymm4,ymm8
6405	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6406	vpsrld	ymm8,ymm7,25
6407	vpslld	ymm7,ymm7,32-25
6408	vpxor	ymm7,ymm7,ymm8
6409	vpsrld	ymm8,ymm6,25
6410	vpslld	ymm6,ymm6,32-25
6411	vpxor	ymm6,ymm6,ymm8
6412	vpsrld	ymm8,ymm5,25
6413	vpslld	ymm5,ymm5,32-25
6414	vpxor	ymm5,ymm5,ymm8
6415	vpsrld	ymm8,ymm4,25
6416	vpslld	ymm4,ymm4,32-25
6417	vpxor	ymm4,ymm4,ymm8
6418	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6419	vpalignr	ymm7,ymm7,ymm7,4
6420	vpalignr	ymm11,ymm11,ymm11,8
6421	vpalignr	ymm15,ymm15,ymm15,12
6422	vpalignr	ymm6,ymm6,ymm6,4
6423	vpalignr	ymm10,ymm10,ymm10,8
6424	vpalignr	ymm14,ymm14,ymm14,12
6425	vpalignr	ymm5,ymm5,ymm5,4
6426	vpalignr	ymm9,ymm9,ymm9,8
6427	vpalignr	ymm13,ymm13,ymm13,12
6428	vpalignr	ymm4,ymm4,ymm4,4
6429	vpalignr	ymm8,ymm8,ymm8,8
6430	vpalignr	ymm12,ymm12,ymm12,12
6431	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6432	vmovdqa	ymm8,YMMWORD[$L$rol16]
6433	vpaddd	ymm3,ymm3,ymm7
6434	vpaddd	ymm2,ymm2,ymm6
6435	vpaddd	ymm1,ymm1,ymm5
6436	vpaddd	ymm0,ymm0,ymm4
6437	vpxor	ymm15,ymm15,ymm3
6438	vpxor	ymm14,ymm14,ymm2
6439	vpxor	ymm13,ymm13,ymm1
6440	vpxor	ymm12,ymm12,ymm0
6441	vpshufb	ymm15,ymm15,ymm8
6442	vpshufb	ymm14,ymm14,ymm8
6443	vpshufb	ymm13,ymm13,ymm8
6444	vpshufb	ymm12,ymm12,ymm8
6445	vpaddd	ymm11,ymm11,ymm15
6446	vpaddd	ymm10,ymm10,ymm14
6447	vpaddd	ymm9,ymm9,ymm13
6448	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6449	vpxor	ymm7,ymm7,ymm11
6450	vpxor	ymm6,ymm6,ymm10
6451	vpxor	ymm5,ymm5,ymm9
6452	vpxor	ymm4,ymm4,ymm8
6453	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6454	vpsrld	ymm8,ymm7,20
6455	vpslld	ymm7,ymm7,32-20
6456	vpxor	ymm7,ymm7,ymm8
6457	vpsrld	ymm8,ymm6,20
6458	vpslld	ymm6,ymm6,32-20
6459	vpxor	ymm6,ymm6,ymm8
6460	vpsrld	ymm8,ymm5,20
6461	vpslld	ymm5,ymm5,32-20
6462	vpxor	ymm5,ymm5,ymm8
6463	vpsrld	ymm8,ymm4,20
6464	vpslld	ymm4,ymm4,32-20
6465	vpxor	ymm4,ymm4,ymm8
6466	vmovdqa	ymm8,YMMWORD[$L$rol8]
6467	vpaddd	ymm3,ymm3,ymm7
6468	vpaddd	ymm2,ymm2,ymm6
6469	vpaddd	ymm1,ymm1,ymm5
6470	vpaddd	ymm0,ymm0,ymm4
6471	vpxor	ymm15,ymm15,ymm3
6472	vpxor	ymm14,ymm14,ymm2
6473	vpxor	ymm13,ymm13,ymm1
6474	vpxor	ymm12,ymm12,ymm0
6475	vpshufb	ymm15,ymm15,ymm8
6476	vpshufb	ymm14,ymm14,ymm8
6477	vpshufb	ymm13,ymm13,ymm8
6478	vpshufb	ymm12,ymm12,ymm8
6479	vpaddd	ymm11,ymm11,ymm15
6480	vpaddd	ymm10,ymm10,ymm14
6481	vpaddd	ymm9,ymm9,ymm13
6482	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6483	vpxor	ymm7,ymm7,ymm11
6484	vpxor	ymm6,ymm6,ymm10
6485	vpxor	ymm5,ymm5,ymm9
6486	vpxor	ymm4,ymm4,ymm8
6487	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6488	vpsrld	ymm8,ymm7,25
6489	vpslld	ymm7,ymm7,32-25
6490	vpxor	ymm7,ymm7,ymm8
6491	vpsrld	ymm8,ymm6,25
6492	vpslld	ymm6,ymm6,32-25
6493	vpxor	ymm6,ymm6,ymm8
6494	vpsrld	ymm8,ymm5,25
6495	vpslld	ymm5,ymm5,32-25
6496	vpxor	ymm5,ymm5,ymm8
6497	vpsrld	ymm8,ymm4,25
6498	vpslld	ymm4,ymm4,32-25
6499	vpxor	ymm4,ymm4,ymm8
6500	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6501	vpalignr	ymm7,ymm7,ymm7,12
6502	vpalignr	ymm11,ymm11,ymm11,8
6503	vpalignr	ymm15,ymm15,ymm15,4
6504	vpalignr	ymm6,ymm6,ymm6,12
6505	vpalignr	ymm10,ymm10,ymm10,8
6506	vpalignr	ymm14,ymm14,ymm14,4
6507	vpalignr	ymm5,ymm5,ymm5,12
6508	vpalignr	ymm9,ymm9,ymm9,8
6509	vpalignr	ymm13,ymm13,ymm13,4
6510	vpalignr	ymm4,ymm4,ymm4,12
6511	vpalignr	ymm8,ymm8,ymm8,8
6512	vpalignr	ymm12,ymm12,ymm12,4
6513
6514	dec	r10
6515	jnz	NEAR $L$seal_avx2_init_rounds
6516	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
6517	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
6518	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
6519	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
6520	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
6521	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
6522	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
6523	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
6524	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
6525	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
6526	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
6527	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
6528	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
6529	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
6530	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
6531	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
6532
6533	vperm2i128	ymm11,ymm15,ymm11,0x13
6534	vperm2i128	ymm15,ymm7,ymm3,0x02
6535	vperm2i128	ymm3,ymm7,ymm3,0x13
6536	vpand	ymm15,ymm15,YMMWORD[$L$clamp]
6537	vmovdqa	YMMWORD[(160+0)+rbp],ymm15
6538	mov	r8,r8
6539	call	poly_hash_ad_internal
6540
6541	vpxor	ymm3,ymm3,YMMWORD[rsi]
6542	vpxor	ymm11,ymm11,YMMWORD[32+rsi]
6543	vmovdqu	YMMWORD[rdi],ymm3
6544	vmovdqu	YMMWORD[32+rdi],ymm11
6545	vperm2i128	ymm15,ymm6,ymm2,0x02
6546	vperm2i128	ymm6,ymm6,ymm2,0x13
6547	vperm2i128	ymm2,ymm14,ymm10,0x02
6548	vperm2i128	ymm10,ymm14,ymm10,0x13
6549	vpxor	ymm15,ymm15,YMMWORD[((0+64))+rsi]
6550	vpxor	ymm2,ymm2,YMMWORD[((32+64))+rsi]
6551	vpxor	ymm6,ymm6,YMMWORD[((64+64))+rsi]
6552	vpxor	ymm10,ymm10,YMMWORD[((96+64))+rsi]
6553	vmovdqu	YMMWORD[(0+64)+rdi],ymm15
6554	vmovdqu	YMMWORD[(32+64)+rdi],ymm2
6555	vmovdqu	YMMWORD[(64+64)+rdi],ymm6
6556	vmovdqu	YMMWORD[(96+64)+rdi],ymm10
6557	vperm2i128	ymm15,ymm5,ymm1,0x02
6558	vperm2i128	ymm5,ymm5,ymm1,0x13
6559	vperm2i128	ymm1,ymm13,ymm9,0x02
6560	vperm2i128	ymm9,ymm13,ymm9,0x13
6561	vpxor	ymm15,ymm15,YMMWORD[((0+192))+rsi]
6562	vpxor	ymm1,ymm1,YMMWORD[((32+192))+rsi]
6563	vpxor	ymm5,ymm5,YMMWORD[((64+192))+rsi]
6564	vpxor	ymm9,ymm9,YMMWORD[((96+192))+rsi]
6565	vmovdqu	YMMWORD[(0+192)+rdi],ymm15
6566	vmovdqu	YMMWORD[(32+192)+rdi],ymm1
6567	vmovdqu	YMMWORD[(64+192)+rdi],ymm5
6568	vmovdqu	YMMWORD[(96+192)+rdi],ymm9
6569	vperm2i128	ymm15,ymm4,ymm0,0x13
6570	vperm2i128	ymm0,ymm4,ymm0,0x02
6571	vperm2i128	ymm4,ymm12,ymm8,0x02
6572	vperm2i128	ymm12,ymm12,ymm8,0x13
6573	vmovdqa	ymm8,ymm15
6574
6575	lea	rsi,[320+rsi]
6576	sub	rbx,10*32
6577	mov	rcx,10*32
6578	cmp	rbx,4*32
6579	jbe	NEAR $L$seal_avx2_short_hash_remainder
6580	vpxor	ymm0,ymm0,YMMWORD[rsi]
6581	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
6582	vpxor	ymm8,ymm8,YMMWORD[64+rsi]
6583	vpxor	ymm12,ymm12,YMMWORD[96+rsi]
6584	vmovdqu	YMMWORD[320+rdi],ymm0
6585	vmovdqu	YMMWORD[352+rdi],ymm4
6586	vmovdqu	YMMWORD[384+rdi],ymm8
6587	vmovdqu	YMMWORD[416+rdi],ymm12
6588	lea	rsi,[128+rsi]
6589	sub	rbx,4*32
6590	mov	rcx,8
6591	mov	r8,2
6592	cmp	rbx,4*32
6593	jbe	NEAR $L$seal_avx2_tail_128
6594	cmp	rbx,8*32
6595	jbe	NEAR $L$seal_avx2_tail_256
6596	cmp	rbx,12*32
6597	jbe	NEAR $L$seal_avx2_tail_384
6598	cmp	rbx,16*32
6599	jbe	NEAR $L$seal_avx2_tail_512
6600	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6601	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
6602	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
6603	vmovdqa	ymm1,ymm0
6604	vmovdqa	ymm5,ymm4
6605	vmovdqa	ymm9,ymm8
6606	vmovdqa	ymm2,ymm0
6607	vmovdqa	ymm6,ymm4
6608	vmovdqa	ymm10,ymm8
6609	vmovdqa	ymm3,ymm0
6610	vmovdqa	ymm7,ymm4
6611	vmovdqa	ymm11,ymm8
6612	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
6613	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
6614	vpaddd	ymm14,ymm12,ymm15
6615	vpaddd	ymm13,ymm12,ymm14
6616	vpaddd	ymm12,ymm12,ymm13
6617	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6618	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6619	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6620	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6621	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6622	vmovdqa	ymm8,YMMWORD[$L$rol16]
6623	vpaddd	ymm3,ymm3,ymm7
6624	vpaddd	ymm2,ymm2,ymm6
6625	vpaddd	ymm1,ymm1,ymm5
6626	vpaddd	ymm0,ymm0,ymm4
6627	vpxor	ymm15,ymm15,ymm3
6628	vpxor	ymm14,ymm14,ymm2
6629	vpxor	ymm13,ymm13,ymm1
6630	vpxor	ymm12,ymm12,ymm0
6631	vpshufb	ymm15,ymm15,ymm8
6632	vpshufb	ymm14,ymm14,ymm8
6633	vpshufb	ymm13,ymm13,ymm8
6634	vpshufb	ymm12,ymm12,ymm8
6635	vpaddd	ymm11,ymm11,ymm15
6636	vpaddd	ymm10,ymm10,ymm14
6637	vpaddd	ymm9,ymm9,ymm13
6638	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6639	vpxor	ymm7,ymm7,ymm11
6640	vpxor	ymm6,ymm6,ymm10
6641	vpxor	ymm5,ymm5,ymm9
6642	vpxor	ymm4,ymm4,ymm8
6643	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6644	vpsrld	ymm8,ymm7,20
6645	vpslld	ymm7,ymm7,32-20
6646	vpxor	ymm7,ymm7,ymm8
6647	vpsrld	ymm8,ymm6,20
6648	vpslld	ymm6,ymm6,32-20
6649	vpxor	ymm6,ymm6,ymm8
6650	vpsrld	ymm8,ymm5,20
6651	vpslld	ymm5,ymm5,32-20
6652	vpxor	ymm5,ymm5,ymm8
6653	vpsrld	ymm8,ymm4,20
6654	vpslld	ymm4,ymm4,32-20
6655	vpxor	ymm4,ymm4,ymm8
6656	vmovdqa	ymm8,YMMWORD[$L$rol8]
6657	vpaddd	ymm3,ymm3,ymm7
6658	vpaddd	ymm2,ymm2,ymm6
6659	vpaddd	ymm1,ymm1,ymm5
6660	vpaddd	ymm0,ymm0,ymm4
6661	vpxor	ymm15,ymm15,ymm3
6662	vpxor	ymm14,ymm14,ymm2
6663	vpxor	ymm13,ymm13,ymm1
6664	vpxor	ymm12,ymm12,ymm0
6665	vpshufb	ymm15,ymm15,ymm8
6666	vpshufb	ymm14,ymm14,ymm8
6667	vpshufb	ymm13,ymm13,ymm8
6668	vpshufb	ymm12,ymm12,ymm8
6669	vpaddd	ymm11,ymm11,ymm15
6670	vpaddd	ymm10,ymm10,ymm14
6671	vpaddd	ymm9,ymm9,ymm13
6672	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6673	vpxor	ymm7,ymm7,ymm11
6674	vpxor	ymm6,ymm6,ymm10
6675	vpxor	ymm5,ymm5,ymm9
6676	vpxor	ymm4,ymm4,ymm8
6677	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6678	vpsrld	ymm8,ymm7,25
6679	vpslld	ymm7,ymm7,32-25
6680	vpxor	ymm7,ymm7,ymm8
6681	vpsrld	ymm8,ymm6,25
6682	vpslld	ymm6,ymm6,32-25
6683	vpxor	ymm6,ymm6,ymm8
6684	vpsrld	ymm8,ymm5,25
6685	vpslld	ymm5,ymm5,32-25
6686	vpxor	ymm5,ymm5,ymm8
6687	vpsrld	ymm8,ymm4,25
6688	vpslld	ymm4,ymm4,32-25
6689	vpxor	ymm4,ymm4,ymm8
6690	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6691	vpalignr	ymm7,ymm7,ymm7,4
6692	vpalignr	ymm11,ymm11,ymm11,8
6693	vpalignr	ymm15,ymm15,ymm15,12
6694	vpalignr	ymm6,ymm6,ymm6,4
6695	vpalignr	ymm10,ymm10,ymm10,8
6696	vpalignr	ymm14,ymm14,ymm14,12
6697	vpalignr	ymm5,ymm5,ymm5,4
6698	vpalignr	ymm9,ymm9,ymm9,8
6699	vpalignr	ymm13,ymm13,ymm13,12
6700	vpalignr	ymm4,ymm4,ymm4,4
6701	vpalignr	ymm8,ymm8,ymm8,8
6702	vpalignr	ymm12,ymm12,ymm12,12
6703	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6704	vmovdqa	ymm8,YMMWORD[$L$rol16]
6705	vpaddd	ymm3,ymm3,ymm7
6706	vpaddd	ymm2,ymm2,ymm6
6707	vpaddd	ymm1,ymm1,ymm5
6708	vpaddd	ymm0,ymm0,ymm4
6709	vpxor	ymm15,ymm15,ymm3
6710	vpxor	ymm14,ymm14,ymm2
6711	vpxor	ymm13,ymm13,ymm1
6712	vpxor	ymm12,ymm12,ymm0
6713	vpshufb	ymm15,ymm15,ymm8
6714	vpshufb	ymm14,ymm14,ymm8
6715	vpshufb	ymm13,ymm13,ymm8
6716	vpshufb	ymm12,ymm12,ymm8
6717	vpaddd	ymm11,ymm11,ymm15
6718	vpaddd	ymm10,ymm10,ymm14
6719	vpaddd	ymm9,ymm9,ymm13
6720	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6721	vpxor	ymm7,ymm7,ymm11
6722	vpxor	ymm6,ymm6,ymm10
6723	vpxor	ymm5,ymm5,ymm9
6724	vpxor	ymm4,ymm4,ymm8
6725	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6726	vpsrld	ymm8,ymm7,20
6727	vpslld	ymm7,ymm7,32-20
6728	vpxor	ymm7,ymm7,ymm8
6729	vpsrld	ymm8,ymm6,20
6730	vpslld	ymm6,ymm6,32-20
6731	vpxor	ymm6,ymm6,ymm8
6732	vpsrld	ymm8,ymm5,20
6733	vpslld	ymm5,ymm5,32-20
6734	vpxor	ymm5,ymm5,ymm8
6735	vpsrld	ymm8,ymm4,20
6736	vpslld	ymm4,ymm4,32-20
6737	vpxor	ymm4,ymm4,ymm8
6738	vmovdqa	ymm8,YMMWORD[$L$rol8]
6739	vpaddd	ymm3,ymm3,ymm7
6740	vpaddd	ymm2,ymm2,ymm6
6741	vpaddd	ymm1,ymm1,ymm5
6742	vpaddd	ymm0,ymm0,ymm4
6743	vpxor	ymm15,ymm15,ymm3
6744	vpxor	ymm14,ymm14,ymm2
6745	vpxor	ymm13,ymm13,ymm1
6746	vpxor	ymm12,ymm12,ymm0
6747	vpshufb	ymm15,ymm15,ymm8
6748	vpshufb	ymm14,ymm14,ymm8
6749	vpshufb	ymm13,ymm13,ymm8
6750	vpshufb	ymm12,ymm12,ymm8
6751	vpaddd	ymm11,ymm11,ymm15
6752	vpaddd	ymm10,ymm10,ymm14
6753	vpaddd	ymm9,ymm9,ymm13
6754	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6755	vpxor	ymm7,ymm7,ymm11
6756	vpxor	ymm6,ymm6,ymm10
6757	vpxor	ymm5,ymm5,ymm9
6758	vpxor	ymm4,ymm4,ymm8
6759	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6760	vpsrld	ymm8,ymm7,25
6761	vpslld	ymm7,ymm7,32-25
6762	vpxor	ymm7,ymm7,ymm8
6763	vpsrld	ymm8,ymm6,25
6764	vpslld	ymm6,ymm6,32-25
6765	vpxor	ymm6,ymm6,ymm8
6766	vpsrld	ymm8,ymm5,25
6767	vpslld	ymm5,ymm5,32-25
6768	vpxor	ymm5,ymm5,ymm8
6769	vpsrld	ymm8,ymm4,25
6770	vpslld	ymm4,ymm4,32-25
6771	vpxor	ymm4,ymm4,ymm8
6772	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6773	vpalignr	ymm7,ymm7,ymm7,12
6774	vpalignr	ymm11,ymm11,ymm11,8
6775	vpalignr	ymm15,ymm15,ymm15,4
6776	vpalignr	ymm6,ymm6,ymm6,12
6777	vpalignr	ymm10,ymm10,ymm10,8
6778	vpalignr	ymm14,ymm14,ymm14,4
6779	vpalignr	ymm5,ymm5,ymm5,12
6780	vpalignr	ymm9,ymm9,ymm9,8
6781	vpalignr	ymm13,ymm13,ymm13,4
6782	vpalignr	ymm4,ymm4,ymm4,12
6783	vpalignr	ymm8,ymm8,ymm8,8
6784	vpalignr	ymm12,ymm12,ymm12,4
6785	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6786	vmovdqa	ymm8,YMMWORD[$L$rol16]
6787	vpaddd	ymm3,ymm3,ymm7
6788	vpaddd	ymm2,ymm2,ymm6
6789	vpaddd	ymm1,ymm1,ymm5
6790	vpaddd	ymm0,ymm0,ymm4
6791	vpxor	ymm15,ymm15,ymm3
6792	vpxor	ymm14,ymm14,ymm2
6793	vpxor	ymm13,ymm13,ymm1
6794	vpxor	ymm12,ymm12,ymm0
6795	vpshufb	ymm15,ymm15,ymm8
6796	vpshufb	ymm14,ymm14,ymm8
6797	vpshufb	ymm13,ymm13,ymm8
6798	vpshufb	ymm12,ymm12,ymm8
6799	vpaddd	ymm11,ymm11,ymm15
6800	vpaddd	ymm10,ymm10,ymm14
6801	vpaddd	ymm9,ymm9,ymm13
6802	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6803	vpxor	ymm7,ymm7,ymm11
6804	vpxor	ymm6,ymm6,ymm10
6805	vpxor	ymm5,ymm5,ymm9
6806	vpxor	ymm4,ymm4,ymm8
6807	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6808	vpsrld	ymm8,ymm7,20
6809	vpslld	ymm7,ymm7,32-20
6810	vpxor	ymm7,ymm7,ymm8
6811	vpsrld	ymm8,ymm6,20
6812	vpslld	ymm6,ymm6,32-20
6813	vpxor	ymm6,ymm6,ymm8
6814	vpsrld	ymm8,ymm5,20
6815	vpslld	ymm5,ymm5,32-20
6816	vpxor	ymm5,ymm5,ymm8
6817	vpsrld	ymm8,ymm4,20
6818	vpslld	ymm4,ymm4,32-20
6819	vpxor	ymm4,ymm4,ymm8
6820	vmovdqa	ymm8,YMMWORD[$L$rol8]
6821	vpaddd	ymm3,ymm3,ymm7
6822	vpaddd	ymm2,ymm2,ymm6
6823	vpaddd	ymm1,ymm1,ymm5
6824	vpaddd	ymm0,ymm0,ymm4
6825	vpxor	ymm15,ymm15,ymm3
6826
6827	sub	rdi,16
6828	mov	rcx,9
6829	jmp	NEAR $L$seal_avx2_main_loop_rounds_entry
6830ALIGN	32
6831$L$seal_avx2_main_loop:
6832	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6833	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
6834	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
6835	vmovdqa	ymm1,ymm0
6836	vmovdqa	ymm5,ymm4
6837	vmovdqa	ymm9,ymm8
6838	vmovdqa	ymm2,ymm0
6839	vmovdqa	ymm6,ymm4
6840	vmovdqa	ymm10,ymm8
6841	vmovdqa	ymm3,ymm0
6842	vmovdqa	ymm7,ymm4
6843	vmovdqa	ymm11,ymm8
6844	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
6845	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
6846	vpaddd	ymm14,ymm12,ymm15
6847	vpaddd	ymm13,ymm12,ymm14
6848	vpaddd	ymm12,ymm12,ymm13
6849	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6850	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6851	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6852	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6853
6854	mov	rcx,10
6855ALIGN	32
6856$L$seal_avx2_main_loop_rounds:
6857	add	r10,QWORD[((0+0))+rdi]
6858	adc	r11,QWORD[((8+0))+rdi]
6859	adc	r12,1
6860	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6861	vmovdqa	ymm8,YMMWORD[$L$rol16]
6862	vpaddd	ymm3,ymm3,ymm7
6863	vpaddd	ymm2,ymm2,ymm6
6864	vpaddd	ymm1,ymm1,ymm5
6865	vpaddd	ymm0,ymm0,ymm4
6866	vpxor	ymm15,ymm15,ymm3
6867	vpxor	ymm14,ymm14,ymm2
6868	vpxor	ymm13,ymm13,ymm1
6869	vpxor	ymm12,ymm12,ymm0
6870	mov	rdx,QWORD[((0+160+0))+rbp]
6871	mov	r15,rdx
6872	mulx	r14,r13,r10
6873	mulx	rdx,rax,r11
6874	imul	r15,r12
6875	add	r14,rax
6876	adc	r15,rdx
6877	vpshufb	ymm15,ymm15,ymm8
6878	vpshufb	ymm14,ymm14,ymm8
6879	vpshufb	ymm13,ymm13,ymm8
6880	vpshufb	ymm12,ymm12,ymm8
6881	vpaddd	ymm11,ymm11,ymm15
6882	vpaddd	ymm10,ymm10,ymm14
6883	vpaddd	ymm9,ymm9,ymm13
6884	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6885	vpxor	ymm7,ymm7,ymm11
6886	mov	rdx,QWORD[((8+160+0))+rbp]
6887	mulx	rax,r10,r10
6888	add	r14,r10
6889	mulx	r9,r11,r11
6890	adc	r15,r11
6891	adc	r9,0
6892	imul	rdx,r12
6893	vpxor	ymm6,ymm6,ymm10
6894	vpxor	ymm5,ymm5,ymm9
6895	vpxor	ymm4,ymm4,ymm8
6896	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6897	vpsrld	ymm8,ymm7,20
6898	vpslld	ymm7,ymm7,32-20
6899	vpxor	ymm7,ymm7,ymm8
6900	vpsrld	ymm8,ymm6,20
6901	vpslld	ymm6,ymm6,32-20
6902	vpxor	ymm6,ymm6,ymm8
6903	vpsrld	ymm8,ymm5,20
6904	vpslld	ymm5,ymm5,32-20
6905	add	r15,rax
6906	adc	r9,rdx
6907	vpxor	ymm5,ymm5,ymm8
6908	vpsrld	ymm8,ymm4,20
6909	vpslld	ymm4,ymm4,32-20
6910	vpxor	ymm4,ymm4,ymm8
6911	vmovdqa	ymm8,YMMWORD[$L$rol8]
6912	vpaddd	ymm3,ymm3,ymm7
6913	vpaddd	ymm2,ymm2,ymm6
6914	vpaddd	ymm1,ymm1,ymm5
6915	vpaddd	ymm0,ymm0,ymm4
6916	vpxor	ymm15,ymm15,ymm3
6917	mov	r10,r13
6918	mov	r11,r14
6919	mov	r12,r15
6920	and	r12,3
6921	mov	r13,r15
6922	and	r13,-4
6923	mov	r14,r9
6924	shrd	r15,r9,2
6925	shr	r9,2
6926	add	r15,r13
6927	adc	r9,r14
6928	add	r10,r15
6929	adc	r11,r9
6930	adc	r12,0
6931
6932$L$seal_avx2_main_loop_rounds_entry:
6933	vpxor	ymm14,ymm14,ymm2
6934	vpxor	ymm13,ymm13,ymm1
6935	vpxor	ymm12,ymm12,ymm0
6936	vpshufb	ymm15,ymm15,ymm8
6937	vpshufb	ymm14,ymm14,ymm8
6938	vpshufb	ymm13,ymm13,ymm8
6939	vpshufb	ymm12,ymm12,ymm8
6940	vpaddd	ymm11,ymm11,ymm15
6941	vpaddd	ymm10,ymm10,ymm14
6942	add	r10,QWORD[((0+16))+rdi]
6943	adc	r11,QWORD[((8+16))+rdi]
6944	adc	r12,1
6945	vpaddd	ymm9,ymm9,ymm13
6946	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6947	vpxor	ymm7,ymm7,ymm11
6948	vpxor	ymm6,ymm6,ymm10
6949	vpxor	ymm5,ymm5,ymm9
6950	vpxor	ymm4,ymm4,ymm8
6951	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6952	vpsrld	ymm8,ymm7,25
6953	mov	rdx,QWORD[((0+160+0))+rbp]
6954	mov	r15,rdx
6955	mulx	r14,r13,r10
6956	mulx	rdx,rax,r11
6957	imul	r15,r12
6958	add	r14,rax
6959	adc	r15,rdx
6960	vpslld	ymm7,ymm7,32-25
6961	vpxor	ymm7,ymm7,ymm8
6962	vpsrld	ymm8,ymm6,25
6963	vpslld	ymm6,ymm6,32-25
6964	vpxor	ymm6,ymm6,ymm8
6965	vpsrld	ymm8,ymm5,25
6966	vpslld	ymm5,ymm5,32-25
6967	vpxor	ymm5,ymm5,ymm8
6968	vpsrld	ymm8,ymm4,25
6969	vpslld	ymm4,ymm4,32-25
6970	vpxor	ymm4,ymm4,ymm8
6971	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6972	vpalignr	ymm7,ymm7,ymm7,4
6973	vpalignr	ymm11,ymm11,ymm11,8
6974	vpalignr	ymm15,ymm15,ymm15,12
6975	vpalignr	ymm6,ymm6,ymm6,4
6976	vpalignr	ymm10,ymm10,ymm10,8
6977	vpalignr	ymm14,ymm14,ymm14,12
6978	mov	rdx,QWORD[((8+160+0))+rbp]
6979	mulx	rax,r10,r10
6980	add	r14,r10
6981	mulx	r9,r11,r11
6982	adc	r15,r11
6983	adc	r9,0
6984	imul	rdx,r12
6985	vpalignr	ymm5,ymm5,ymm5,4
6986	vpalignr	ymm9,ymm9,ymm9,8
6987	vpalignr	ymm13,ymm13,ymm13,12
6988	vpalignr	ymm4,ymm4,ymm4,4
6989	vpalignr	ymm8,ymm8,ymm8,8
6990	vpalignr	ymm12,ymm12,ymm12,12
6991	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6992	vmovdqa	ymm8,YMMWORD[$L$rol16]
6993	vpaddd	ymm3,ymm3,ymm7
6994	vpaddd	ymm2,ymm2,ymm6
6995	vpaddd	ymm1,ymm1,ymm5
6996	vpaddd	ymm0,ymm0,ymm4
6997	vpxor	ymm15,ymm15,ymm3
6998	vpxor	ymm14,ymm14,ymm2
6999	vpxor	ymm13,ymm13,ymm1
7000	vpxor	ymm12,ymm12,ymm0
7001	vpshufb	ymm15,ymm15,ymm8
7002	vpshufb	ymm14,ymm14,ymm8
7003	add	r15,rax
7004	adc	r9,rdx
7005	vpshufb	ymm13,ymm13,ymm8
7006	vpshufb	ymm12,ymm12,ymm8
7007	vpaddd	ymm11,ymm11,ymm15
7008	vpaddd	ymm10,ymm10,ymm14
7009	vpaddd	ymm9,ymm9,ymm13
7010	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
7011	vpxor	ymm7,ymm7,ymm11
7012	vpxor	ymm6,ymm6,ymm10
7013	vpxor	ymm5,ymm5,ymm9
7014	mov	r10,r13
7015	mov	r11,r14
7016	mov	r12,r15
7017	and	r12,3
7018	mov	r13,r15
7019	and	r13,-4
7020	mov	r14,r9
7021	shrd	r15,r9,2
7022	shr	r9,2
7023	add	r15,r13
7024	adc	r9,r14
7025	add	r10,r15
7026	adc	r11,r9
7027	adc	r12,0
7028	vpxor	ymm4,ymm4,ymm8
7029	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
7030	vpsrld	ymm8,ymm7,20
7031	vpslld	ymm7,ymm7,32-20
7032	vpxor	ymm7,ymm7,ymm8
7033	vpsrld	ymm8,ymm6,20
7034	vpslld	ymm6,ymm6,32-20
7035	vpxor	ymm6,ymm6,ymm8
7036	add	r10,QWORD[((0+32))+rdi]
7037	adc	r11,QWORD[((8+32))+rdi]
7038	adc	r12,1
7039
7040	lea	rdi,[48+rdi]
7041	vpsrld	ymm8,ymm5,20
7042	vpslld	ymm5,ymm5,32-20
7043	vpxor	ymm5,ymm5,ymm8
7044	vpsrld	ymm8,ymm4,20
7045	vpslld	ymm4,ymm4,32-20
7046	vpxor	ymm4,ymm4,ymm8
7047	vmovdqa	ymm8,YMMWORD[$L$rol8]
7048	vpaddd	ymm3,ymm3,ymm7
7049	vpaddd	ymm2,ymm2,ymm6
7050	vpaddd	ymm1,ymm1,ymm5
7051	vpaddd	ymm0,ymm0,ymm4
7052	vpxor	ymm15,ymm15,ymm3
7053	vpxor	ymm14,ymm14,ymm2
7054	vpxor	ymm13,ymm13,ymm1
7055	vpxor	ymm12,ymm12,ymm0
7056	vpshufb	ymm15,ymm15,ymm8
7057	vpshufb	ymm14,ymm14,ymm8
7058	vpshufb	ymm13,ymm13,ymm8
7059	mov	rdx,QWORD[((0+160+0))+rbp]
7060	mov	r15,rdx
7061	mulx	r14,r13,r10
7062	mulx	rdx,rax,r11
7063	imul	r15,r12
7064	add	r14,rax
7065	adc	r15,rdx
7066	vpshufb	ymm12,ymm12,ymm8
7067	vpaddd	ymm11,ymm11,ymm15
7068	vpaddd	ymm10,ymm10,ymm14
7069	vpaddd	ymm9,ymm9,ymm13
7070	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
7071	vpxor	ymm7,ymm7,ymm11
7072	vpxor	ymm6,ymm6,ymm10
7073	vpxor	ymm5,ymm5,ymm9
7074	mov	rdx,QWORD[((8+160+0))+rbp]
7075	mulx	rax,r10,r10
7076	add	r14,r10
7077	mulx	r9,r11,r11
7078	adc	r15,r11
7079	adc	r9,0
7080	imul	rdx,r12
7081	vpxor	ymm4,ymm4,ymm8
7082	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
7083	vpsrld	ymm8,ymm7,25
7084	vpslld	ymm7,ymm7,32-25
7085	vpxor	ymm7,ymm7,ymm8
7086	vpsrld	ymm8,ymm6,25
7087	vpslld	ymm6,ymm6,32-25
7088	vpxor	ymm6,ymm6,ymm8
7089	add	r15,rax
7090	adc	r9,rdx
7091	vpsrld	ymm8,ymm5,25
7092	vpslld	ymm5,ymm5,32-25
7093	vpxor	ymm5,ymm5,ymm8
7094	vpsrld	ymm8,ymm4,25
7095	vpslld	ymm4,ymm4,32-25
7096	vpxor	ymm4,ymm4,ymm8
7097	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
7098	vpalignr	ymm7,ymm7,ymm7,12
7099	vpalignr	ymm11,ymm11,ymm11,8
7100	vpalignr	ymm15,ymm15,ymm15,4
7101	vpalignr	ymm6,ymm6,ymm6,12
7102	vpalignr	ymm10,ymm10,ymm10,8
7103	vpalignr	ymm14,ymm14,ymm14,4
7104	vpalignr	ymm5,ymm5,ymm5,12
7105	vpalignr	ymm9,ymm9,ymm9,8
7106	vpalignr	ymm13,ymm13,ymm13,4
7107	vpalignr	ymm4,ymm4,ymm4,12
7108	vpalignr	ymm8,ymm8,ymm8,8
7109	mov	r10,r13
7110	mov	r11,r14
7111	mov	r12,r15
7112	and	r12,3
7113	mov	r13,r15
7114	and	r13,-4
7115	mov	r14,r9
7116	shrd	r15,r9,2
7117	shr	r9,2
7118	add	r15,r13
7119	adc	r9,r14
7120	add	r10,r15
7121	adc	r11,r9
7122	adc	r12,0
7123	vpalignr	ymm12,ymm12,ymm12,4
7124
7125	dec	rcx
7126	jne	NEAR $L$seal_avx2_main_loop_rounds
7127	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
7128	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
7129	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
7130	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
7131	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
7132	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
7133	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
7134	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
7135	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
7136	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
7137	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
7138	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
7139	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7140	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7141	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7142	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7143
7144	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
7145	add	r10,QWORD[((0+0))+rdi]
7146	adc	r11,QWORD[((8+0))+rdi]
7147	adc	r12,1
7148	mov	rdx,QWORD[((0+160+0))+rbp]
7149	mov	r15,rdx
7150	mulx	r14,r13,r10
7151	mulx	rdx,rax,r11
7152	imul	r15,r12
7153	add	r14,rax
7154	adc	r15,rdx
7155	mov	rdx,QWORD[((8+160+0))+rbp]
7156	mulx	rax,r10,r10
7157	add	r14,r10
7158	mulx	r9,r11,r11
7159	adc	r15,r11
7160	adc	r9,0
7161	imul	rdx,r12
7162	add	r15,rax
7163	adc	r9,rdx
7164	mov	r10,r13
7165	mov	r11,r14
7166	mov	r12,r15
7167	and	r12,3
7168	mov	r13,r15
7169	and	r13,-4
7170	mov	r14,r9
7171	shrd	r15,r9,2
7172	shr	r9,2
7173	add	r15,r13
7174	adc	r9,r14
7175	add	r10,r15
7176	adc	r11,r9
7177	adc	r12,0
7178	add	r10,QWORD[((0+16))+rdi]
7179	adc	r11,QWORD[((8+16))+rdi]
7180	adc	r12,1
7181	mov	rdx,QWORD[((0+160+0))+rbp]
7182	mov	r15,rdx
7183	mulx	r14,r13,r10
7184	mulx	rdx,rax,r11
7185	imul	r15,r12
7186	add	r14,rax
7187	adc	r15,rdx
7188	mov	rdx,QWORD[((8+160+0))+rbp]
7189	mulx	rax,r10,r10
7190	add	r14,r10
7191	mulx	r9,r11,r11
7192	adc	r15,r11
7193	adc	r9,0
7194	imul	rdx,r12
7195	add	r15,rax
7196	adc	r9,rdx
7197	mov	r10,r13
7198	mov	r11,r14
7199	mov	r12,r15
7200	and	r12,3
7201	mov	r13,r15
7202	and	r13,-4
7203	mov	r14,r9
7204	shrd	r15,r9,2
7205	shr	r9,2
7206	add	r15,r13
7207	adc	r9,r14
7208	add	r10,r15
7209	adc	r11,r9
7210	adc	r12,0
7211
7212	lea	rdi,[32+rdi]
7213	vperm2i128	ymm0,ymm7,ymm3,0x02
7214	vperm2i128	ymm7,ymm7,ymm3,0x13
7215	vperm2i128	ymm3,ymm15,ymm11,0x02
7216	vperm2i128	ymm11,ymm15,ymm11,0x13
7217	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
7218	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
7219	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
7220	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
7221	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
7222	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
7223	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
7224	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
7225
7226	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
7227	vperm2i128	ymm3,ymm6,ymm2,0x02
7228	vperm2i128	ymm6,ymm6,ymm2,0x13
7229	vperm2i128	ymm2,ymm14,ymm10,0x02
7230	vperm2i128	ymm10,ymm14,ymm10,0x13
7231	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
7232	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
7233	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
7234	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
7235	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
7236	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
7237	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
7238	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
7239	vperm2i128	ymm3,ymm5,ymm1,0x02
7240	vperm2i128	ymm5,ymm5,ymm1,0x13
7241	vperm2i128	ymm1,ymm13,ymm9,0x02
7242	vperm2i128	ymm9,ymm13,ymm9,0x13
7243	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
7244	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
7245	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
7246	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
7247	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
7248	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
7249	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
7250	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
7251	vperm2i128	ymm3,ymm4,ymm0,0x02
7252	vperm2i128	ymm4,ymm4,ymm0,0x13
7253	vperm2i128	ymm0,ymm12,ymm8,0x02
7254	vperm2i128	ymm8,ymm12,ymm8,0x13
7255	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
7256	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
7257	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
7258	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
7259	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
7260	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
7261	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
7262	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
7263
7264	lea	rsi,[512+rsi]
7265	sub	rbx,16*32
7266	cmp	rbx,16*32
7267	jg	NEAR $L$seal_avx2_main_loop
7268
7269	add	r10,QWORD[((0+0))+rdi]
7270	adc	r11,QWORD[((8+0))+rdi]
7271	adc	r12,1
7272	mov	rdx,QWORD[((0+160+0))+rbp]
7273	mov	r15,rdx
7274	mulx	r14,r13,r10
7275	mulx	rdx,rax,r11
7276	imul	r15,r12
7277	add	r14,rax
7278	adc	r15,rdx
7279	mov	rdx,QWORD[((8+160+0))+rbp]
7280	mulx	rax,r10,r10
7281	add	r14,r10
7282	mulx	r9,r11,r11
7283	adc	r15,r11
7284	adc	r9,0
7285	imul	rdx,r12
7286	add	r15,rax
7287	adc	r9,rdx
7288	mov	r10,r13
7289	mov	r11,r14
7290	mov	r12,r15
7291	and	r12,3
7292	mov	r13,r15
7293	and	r13,-4
7294	mov	r14,r9
7295	shrd	r15,r9,2
7296	shr	r9,2
7297	add	r15,r13
7298	adc	r9,r14
7299	add	r10,r15
7300	adc	r11,r9
7301	adc	r12,0
7302	add	r10,QWORD[((0+16))+rdi]
7303	adc	r11,QWORD[((8+16))+rdi]
7304	adc	r12,1
7305	mov	rdx,QWORD[((0+160+0))+rbp]
7306	mov	r15,rdx
7307	mulx	r14,r13,r10
7308	mulx	rdx,rax,r11
7309	imul	r15,r12
7310	add	r14,rax
7311	adc	r15,rdx
7312	mov	rdx,QWORD[((8+160+0))+rbp]
7313	mulx	rax,r10,r10
7314	add	r14,r10
7315	mulx	r9,r11,r11
7316	adc	r15,r11
7317	adc	r9,0
7318	imul	rdx,r12
7319	add	r15,rax
7320	adc	r9,rdx
7321	mov	r10,r13
7322	mov	r11,r14
7323	mov	r12,r15
7324	and	r12,3
7325	mov	r13,r15
7326	and	r13,-4
7327	mov	r14,r9
7328	shrd	r15,r9,2
7329	shr	r9,2
7330	add	r15,r13
7331	adc	r9,r14
7332	add	r10,r15
7333	adc	r11,r9
7334	adc	r12,0
7335
7336	lea	rdi,[32+rdi]
7337	mov	rcx,10
7338	xor	r8,r8
7339
7340	cmp	rbx,12*32
7341	ja	NEAR $L$seal_avx2_tail_512
7342	cmp	rbx,8*32
7343	ja	NEAR $L$seal_avx2_tail_384
7344	cmp	rbx,4*32
7345	ja	NEAR $L$seal_avx2_tail_256
7346
7347$L$seal_avx2_tail_128:
7348	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7349	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7350	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7351	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7352	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7353	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7354
7355$L$seal_avx2_tail_128_rounds_and_3xhash:
7356	add	r10,QWORD[((0+0))+rdi]
7357	adc	r11,QWORD[((8+0))+rdi]
7358	adc	r12,1
7359	mov	rdx,QWORD[((0+160+0))+rbp]
7360	mov	r15,rdx
7361	mulx	r14,r13,r10
7362	mulx	rdx,rax,r11
7363	imul	r15,r12
7364	add	r14,rax
7365	adc	r15,rdx
7366	mov	rdx,QWORD[((8+160+0))+rbp]
7367	mulx	rax,r10,r10
7368	add	r14,r10
7369	mulx	r9,r11,r11
7370	adc	r15,r11
7371	adc	r9,0
7372	imul	rdx,r12
7373	add	r15,rax
7374	adc	r9,rdx
7375	mov	r10,r13
7376	mov	r11,r14
7377	mov	r12,r15
7378	and	r12,3
7379	mov	r13,r15
7380	and	r13,-4
7381	mov	r14,r9
7382	shrd	r15,r9,2
7383	shr	r9,2
7384	add	r15,r13
7385	adc	r9,r14
7386	add	r10,r15
7387	adc	r11,r9
7388	adc	r12,0
7389
7390	lea	rdi,[16+rdi]
7391$L$seal_avx2_tail_128_rounds_and_2xhash:
7392	vpaddd	ymm0,ymm0,ymm4
7393	vpxor	ymm12,ymm12,ymm0
7394	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7395	vpaddd	ymm8,ymm8,ymm12
7396	vpxor	ymm4,ymm4,ymm8
7397	vpsrld	ymm3,ymm4,20
7398	vpslld	ymm4,ymm4,12
7399	vpxor	ymm4,ymm4,ymm3
7400	vpaddd	ymm0,ymm0,ymm4
7401	vpxor	ymm12,ymm12,ymm0
7402	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7403	vpaddd	ymm8,ymm8,ymm12
7404	vpxor	ymm4,ymm4,ymm8
7405	vpslld	ymm3,ymm4,7
7406	vpsrld	ymm4,ymm4,25
7407	vpxor	ymm4,ymm4,ymm3
7408	vpalignr	ymm12,ymm12,ymm12,12
7409	vpalignr	ymm8,ymm8,ymm8,8
7410	vpalignr	ymm4,ymm4,ymm4,4
7411	add	r10,QWORD[((0+0))+rdi]
7412	adc	r11,QWORD[((8+0))+rdi]
7413	adc	r12,1
7414	mov	rdx,QWORD[((0+160+0))+rbp]
7415	mov	r15,rdx
7416	mulx	r14,r13,r10
7417	mulx	rdx,rax,r11
7418	imul	r15,r12
7419	add	r14,rax
7420	adc	r15,rdx
7421	mov	rdx,QWORD[((8+160+0))+rbp]
7422	mulx	rax,r10,r10
7423	add	r14,r10
7424	mulx	r9,r11,r11
7425	adc	r15,r11
7426	adc	r9,0
7427	imul	rdx,r12
7428	add	r15,rax
7429	adc	r9,rdx
7430	mov	r10,r13
7431	mov	r11,r14
7432	mov	r12,r15
7433	and	r12,3
7434	mov	r13,r15
7435	and	r13,-4
7436	mov	r14,r9
7437	shrd	r15,r9,2
7438	shr	r9,2
7439	add	r15,r13
7440	adc	r9,r14
7441	add	r10,r15
7442	adc	r11,r9
7443	adc	r12,0
7444	vpaddd	ymm0,ymm0,ymm4
7445	vpxor	ymm12,ymm12,ymm0
7446	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7447	vpaddd	ymm8,ymm8,ymm12
7448	vpxor	ymm4,ymm4,ymm8
7449	vpsrld	ymm3,ymm4,20
7450	vpslld	ymm4,ymm4,12
7451	vpxor	ymm4,ymm4,ymm3
7452	vpaddd	ymm0,ymm0,ymm4
7453	vpxor	ymm12,ymm12,ymm0
7454	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7455	vpaddd	ymm8,ymm8,ymm12
7456	vpxor	ymm4,ymm4,ymm8
7457	vpslld	ymm3,ymm4,7
7458	vpsrld	ymm4,ymm4,25
7459	vpxor	ymm4,ymm4,ymm3
7460	vpalignr	ymm12,ymm12,ymm12,4
7461	vpalignr	ymm8,ymm8,ymm8,8
7462	vpalignr	ymm4,ymm4,ymm4,12
7463	add	r10,QWORD[((0+16))+rdi]
7464	adc	r11,QWORD[((8+16))+rdi]
7465	adc	r12,1
7466	mov	rdx,QWORD[((0+160+0))+rbp]
7467	mov	r15,rdx
7468	mulx	r14,r13,r10
7469	mulx	rdx,rax,r11
7470	imul	r15,r12
7471	add	r14,rax
7472	adc	r15,rdx
7473	mov	rdx,QWORD[((8+160+0))+rbp]
7474	mulx	rax,r10,r10
7475	add	r14,r10
7476	mulx	r9,r11,r11
7477	adc	r15,r11
7478	adc	r9,0
7479	imul	rdx,r12
7480	add	r15,rax
7481	adc	r9,rdx
7482	mov	r10,r13
7483	mov	r11,r14
7484	mov	r12,r15
7485	and	r12,3
7486	mov	r13,r15
7487	and	r13,-4
7488	mov	r14,r9
7489	shrd	r15,r9,2
7490	shr	r9,2
7491	add	r15,r13
7492	adc	r9,r14
7493	add	r10,r15
7494	adc	r11,r9
7495	adc	r12,0
7496
7497	lea	rdi,[32+rdi]
7498	dec	rcx
7499	jg	NEAR $L$seal_avx2_tail_128_rounds_and_3xhash
7500	dec	r8
7501	jge	NEAR $L$seal_avx2_tail_128_rounds_and_2xhash
7502	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7503	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7504	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7505	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7506	vperm2i128	ymm3,ymm4,ymm0,0x13
7507	vperm2i128	ymm0,ymm4,ymm0,0x02
7508	vperm2i128	ymm4,ymm12,ymm8,0x02
7509	vperm2i128	ymm12,ymm12,ymm8,0x13
7510	vmovdqa	ymm8,ymm3
7511
7512	jmp	NEAR $L$seal_avx2_short_loop
7513
7514$L$seal_avx2_tail_256:
7515	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7516	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7517	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7518	vmovdqa	ymm1,ymm0
7519	vmovdqa	ymm5,ymm4
7520	vmovdqa	ymm9,ymm8
7521	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7522	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
7523	vpaddd	ymm12,ymm12,ymm13
7524	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7525	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
7526
7527$L$seal_avx2_tail_256_rounds_and_3xhash:
7528	add	r10,QWORD[((0+0))+rdi]
7529	adc	r11,QWORD[((8+0))+rdi]
7530	adc	r12,1
7531	mov	rax,QWORD[((0+160+0))+rbp]
7532	mov	r15,rax
7533	mul	r10
7534	mov	r13,rax
7535	mov	r14,rdx
7536	mov	rax,QWORD[((0+160+0))+rbp]
7537	mul	r11
7538	imul	r15,r12
7539	add	r14,rax
7540	adc	r15,rdx
7541	mov	rax,QWORD[((8+160+0))+rbp]
7542	mov	r9,rax
7543	mul	r10
7544	add	r14,rax
7545	adc	rdx,0
7546	mov	r10,rdx
7547	mov	rax,QWORD[((8+160+0))+rbp]
7548	mul	r11
7549	add	r15,rax
7550	adc	rdx,0
7551	imul	r9,r12
7552	add	r15,r10
7553	adc	r9,rdx
7554	mov	r10,r13
7555	mov	r11,r14
7556	mov	r12,r15
7557	and	r12,3
7558	mov	r13,r15
7559	and	r13,-4
7560	mov	r14,r9
7561	shrd	r15,r9,2
7562	shr	r9,2
7563	add	r15,r13
7564	adc	r9,r14
7565	add	r10,r15
7566	adc	r11,r9
7567	adc	r12,0
7568
7569	lea	rdi,[16+rdi]
7570$L$seal_avx2_tail_256_rounds_and_2xhash:
7571	vpaddd	ymm0,ymm0,ymm4
7572	vpxor	ymm12,ymm12,ymm0
7573	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7574	vpaddd	ymm8,ymm8,ymm12
7575	vpxor	ymm4,ymm4,ymm8
7576	vpsrld	ymm3,ymm4,20
7577	vpslld	ymm4,ymm4,12
7578	vpxor	ymm4,ymm4,ymm3
7579	vpaddd	ymm0,ymm0,ymm4
7580	vpxor	ymm12,ymm12,ymm0
7581	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7582	vpaddd	ymm8,ymm8,ymm12
7583	vpxor	ymm4,ymm4,ymm8
7584	vpslld	ymm3,ymm4,7
7585	vpsrld	ymm4,ymm4,25
7586	vpxor	ymm4,ymm4,ymm3
7587	vpalignr	ymm12,ymm12,ymm12,12
7588	vpalignr	ymm8,ymm8,ymm8,8
7589	vpalignr	ymm4,ymm4,ymm4,4
7590	vpaddd	ymm1,ymm1,ymm5
7591	vpxor	ymm13,ymm13,ymm1
7592	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7593	vpaddd	ymm9,ymm9,ymm13
7594	vpxor	ymm5,ymm5,ymm9
7595	vpsrld	ymm3,ymm5,20
7596	vpslld	ymm5,ymm5,12
7597	vpxor	ymm5,ymm5,ymm3
7598	vpaddd	ymm1,ymm1,ymm5
7599	vpxor	ymm13,ymm13,ymm1
7600	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7601	vpaddd	ymm9,ymm9,ymm13
7602	vpxor	ymm5,ymm5,ymm9
7603	vpslld	ymm3,ymm5,7
7604	vpsrld	ymm5,ymm5,25
7605	vpxor	ymm5,ymm5,ymm3
7606	vpalignr	ymm13,ymm13,ymm13,12
7607	vpalignr	ymm9,ymm9,ymm9,8
7608	vpalignr	ymm5,ymm5,ymm5,4
7609	add	r10,QWORD[((0+0))+rdi]
7610	adc	r11,QWORD[((8+0))+rdi]
7611	adc	r12,1
7612	mov	rax,QWORD[((0+160+0))+rbp]
7613	mov	r15,rax
7614	mul	r10
7615	mov	r13,rax
7616	mov	r14,rdx
7617	mov	rax,QWORD[((0+160+0))+rbp]
7618	mul	r11
7619	imul	r15,r12
7620	add	r14,rax
7621	adc	r15,rdx
7622	mov	rax,QWORD[((8+160+0))+rbp]
7623	mov	r9,rax
7624	mul	r10
7625	add	r14,rax
7626	adc	rdx,0
7627	mov	r10,rdx
7628	mov	rax,QWORD[((8+160+0))+rbp]
7629	mul	r11
7630	add	r15,rax
7631	adc	rdx,0
7632	imul	r9,r12
7633	add	r15,r10
7634	adc	r9,rdx
7635	mov	r10,r13
7636	mov	r11,r14
7637	mov	r12,r15
7638	and	r12,3
7639	mov	r13,r15
7640	and	r13,-4
7641	mov	r14,r9
7642	shrd	r15,r9,2
7643	shr	r9,2
7644	add	r15,r13
7645	adc	r9,r14
7646	add	r10,r15
7647	adc	r11,r9
7648	adc	r12,0
7649	vpaddd	ymm0,ymm0,ymm4
7650	vpxor	ymm12,ymm12,ymm0
7651	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7652	vpaddd	ymm8,ymm8,ymm12
7653	vpxor	ymm4,ymm4,ymm8
7654	vpsrld	ymm3,ymm4,20
7655	vpslld	ymm4,ymm4,12
7656	vpxor	ymm4,ymm4,ymm3
7657	vpaddd	ymm0,ymm0,ymm4
7658	vpxor	ymm12,ymm12,ymm0
7659	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7660	vpaddd	ymm8,ymm8,ymm12
7661	vpxor	ymm4,ymm4,ymm8
7662	vpslld	ymm3,ymm4,7
7663	vpsrld	ymm4,ymm4,25
7664	vpxor	ymm4,ymm4,ymm3
7665	vpalignr	ymm12,ymm12,ymm12,4
7666	vpalignr	ymm8,ymm8,ymm8,8
7667	vpalignr	ymm4,ymm4,ymm4,12
7668	vpaddd	ymm1,ymm1,ymm5
7669	vpxor	ymm13,ymm13,ymm1
7670	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7671	vpaddd	ymm9,ymm9,ymm13
7672	vpxor	ymm5,ymm5,ymm9
7673	vpsrld	ymm3,ymm5,20
7674	vpslld	ymm5,ymm5,12
7675	vpxor	ymm5,ymm5,ymm3
7676	vpaddd	ymm1,ymm1,ymm5
7677	vpxor	ymm13,ymm13,ymm1
7678	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7679	vpaddd	ymm9,ymm9,ymm13
7680	vpxor	ymm5,ymm5,ymm9
7681	vpslld	ymm3,ymm5,7
7682	vpsrld	ymm5,ymm5,25
7683	vpxor	ymm5,ymm5,ymm3
7684	vpalignr	ymm13,ymm13,ymm13,4
7685	vpalignr	ymm9,ymm9,ymm9,8
7686	vpalignr	ymm5,ymm5,ymm5,12
7687	add	r10,QWORD[((0+16))+rdi]
7688	adc	r11,QWORD[((8+16))+rdi]
7689	adc	r12,1
7690	mov	rax,QWORD[((0+160+0))+rbp]
7691	mov	r15,rax
7692	mul	r10
7693	mov	r13,rax
7694	mov	r14,rdx
7695	mov	rax,QWORD[((0+160+0))+rbp]
7696	mul	r11
7697	imul	r15,r12
7698	add	r14,rax
7699	adc	r15,rdx
7700	mov	rax,QWORD[((8+160+0))+rbp]
7701	mov	r9,rax
7702	mul	r10
7703	add	r14,rax
7704	adc	rdx,0
7705	mov	r10,rdx
7706	mov	rax,QWORD[((8+160+0))+rbp]
7707	mul	r11
7708	add	r15,rax
7709	adc	rdx,0
7710	imul	r9,r12
7711	add	r15,r10
7712	adc	r9,rdx
7713	mov	r10,r13
7714	mov	r11,r14
7715	mov	r12,r15
7716	and	r12,3
7717	mov	r13,r15
7718	and	r13,-4
7719	mov	r14,r9
7720	shrd	r15,r9,2
7721	shr	r9,2
7722	add	r15,r13
7723	adc	r9,r14
7724	add	r10,r15
7725	adc	r11,r9
7726	adc	r12,0
7727
7728	lea	rdi,[32+rdi]
7729	dec	rcx
7730	jg	NEAR $L$seal_avx2_tail_256_rounds_and_3xhash
7731	dec	r8
7732	jge	NEAR $L$seal_avx2_tail_256_rounds_and_2xhash
7733	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
7734	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
7735	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
7736	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
7737	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7738	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7739	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7740	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7741	vperm2i128	ymm3,ymm5,ymm1,0x02
7742	vperm2i128	ymm5,ymm5,ymm1,0x13
7743	vperm2i128	ymm1,ymm13,ymm9,0x02
7744	vperm2i128	ymm9,ymm13,ymm9,0x13
7745	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
7746	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
7747	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
7748	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
7749	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
7750	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
7751	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
7752	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
7753	vperm2i128	ymm3,ymm4,ymm0,0x13
7754	vperm2i128	ymm0,ymm4,ymm0,0x02
7755	vperm2i128	ymm4,ymm12,ymm8,0x02
7756	vperm2i128	ymm12,ymm12,ymm8,0x13
7757	vmovdqa	ymm8,ymm3
7758
7759	mov	rcx,4*32
7760	lea	rsi,[128+rsi]
7761	sub	rbx,4*32
7762	jmp	NEAR $L$seal_avx2_short_hash_remainder
7763
7764$L$seal_avx2_tail_384:
7765	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7766	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7767	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7768	vmovdqa	ymm1,ymm0
7769	vmovdqa	ymm5,ymm4
7770	vmovdqa	ymm9,ymm8
7771	vmovdqa	ymm2,ymm0
7772	vmovdqa	ymm6,ymm4
7773	vmovdqa	ymm10,ymm8
7774	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7775	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
7776	vpaddd	ymm13,ymm12,ymm14
7777	vpaddd	ymm12,ymm12,ymm13
7778	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7779	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
7780	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
7781
7782$L$seal_avx2_tail_384_rounds_and_3xhash:
7783	add	r10,QWORD[((0+0))+rdi]
7784	adc	r11,QWORD[((8+0))+rdi]
7785	adc	r12,1
7786	mov	rax,QWORD[((0+160+0))+rbp]
7787	mov	r15,rax
7788	mul	r10
7789	mov	r13,rax
7790	mov	r14,rdx
7791	mov	rax,QWORD[((0+160+0))+rbp]
7792	mul	r11
7793	imul	r15,r12
7794	add	r14,rax
7795	adc	r15,rdx
7796	mov	rax,QWORD[((8+160+0))+rbp]
7797	mov	r9,rax
7798	mul	r10
7799	add	r14,rax
7800	adc	rdx,0
7801	mov	r10,rdx
7802	mov	rax,QWORD[((8+160+0))+rbp]
7803	mul	r11
7804	add	r15,rax
7805	adc	rdx,0
7806	imul	r9,r12
7807	add	r15,r10
7808	adc	r9,rdx
7809	mov	r10,r13
7810	mov	r11,r14
7811	mov	r12,r15
7812	and	r12,3
7813	mov	r13,r15
7814	and	r13,-4
7815	mov	r14,r9
7816	shrd	r15,r9,2
7817	shr	r9,2
7818	add	r15,r13
7819	adc	r9,r14
7820	add	r10,r15
7821	adc	r11,r9
7822	adc	r12,0
7823
7824	lea	rdi,[16+rdi]
7825$L$seal_avx2_tail_384_rounds_and_2xhash:
7826	vpaddd	ymm0,ymm0,ymm4
7827	vpxor	ymm12,ymm12,ymm0
7828	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7829	vpaddd	ymm8,ymm8,ymm12
7830	vpxor	ymm4,ymm4,ymm8
7831	vpsrld	ymm3,ymm4,20
7832	vpslld	ymm4,ymm4,12
7833	vpxor	ymm4,ymm4,ymm3
7834	vpaddd	ymm0,ymm0,ymm4
7835	vpxor	ymm12,ymm12,ymm0
7836	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7837	vpaddd	ymm8,ymm8,ymm12
7838	vpxor	ymm4,ymm4,ymm8
7839	vpslld	ymm3,ymm4,7
7840	vpsrld	ymm4,ymm4,25
7841	vpxor	ymm4,ymm4,ymm3
7842	vpalignr	ymm12,ymm12,ymm12,12
7843	vpalignr	ymm8,ymm8,ymm8,8
7844	vpalignr	ymm4,ymm4,ymm4,4
7845	vpaddd	ymm1,ymm1,ymm5
7846	vpxor	ymm13,ymm13,ymm1
7847	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7848	vpaddd	ymm9,ymm9,ymm13
7849	vpxor	ymm5,ymm5,ymm9
7850	vpsrld	ymm3,ymm5,20
7851	vpslld	ymm5,ymm5,12
7852	vpxor	ymm5,ymm5,ymm3
7853	vpaddd	ymm1,ymm1,ymm5
7854	vpxor	ymm13,ymm13,ymm1
7855	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7856	vpaddd	ymm9,ymm9,ymm13
7857	vpxor	ymm5,ymm5,ymm9
7858	vpslld	ymm3,ymm5,7
7859	vpsrld	ymm5,ymm5,25
7860	vpxor	ymm5,ymm5,ymm3
7861	vpalignr	ymm13,ymm13,ymm13,12
7862	vpalignr	ymm9,ymm9,ymm9,8
7863	vpalignr	ymm5,ymm5,ymm5,4
7864	add	r10,QWORD[((0+0))+rdi]
7865	adc	r11,QWORD[((8+0))+rdi]
7866	adc	r12,1
7867	mov	rax,QWORD[((0+160+0))+rbp]
7868	mov	r15,rax
7869	mul	r10
7870	mov	r13,rax
7871	mov	r14,rdx
7872	mov	rax,QWORD[((0+160+0))+rbp]
7873	mul	r11
7874	imul	r15,r12
7875	add	r14,rax
7876	adc	r15,rdx
7877	mov	rax,QWORD[((8+160+0))+rbp]
7878	mov	r9,rax
7879	mul	r10
7880	add	r14,rax
7881	adc	rdx,0
7882	mov	r10,rdx
7883	mov	rax,QWORD[((8+160+0))+rbp]
7884	mul	r11
7885	add	r15,rax
7886	adc	rdx,0
7887	imul	r9,r12
7888	add	r15,r10
7889	adc	r9,rdx
7890	mov	r10,r13
7891	mov	r11,r14
7892	mov	r12,r15
7893	and	r12,3
7894	mov	r13,r15
7895	and	r13,-4
7896	mov	r14,r9
7897	shrd	r15,r9,2
7898	shr	r9,2
7899	add	r15,r13
7900	adc	r9,r14
7901	add	r10,r15
7902	adc	r11,r9
7903	adc	r12,0
7904	vpaddd	ymm2,ymm2,ymm6
7905	vpxor	ymm14,ymm14,ymm2
7906	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
7907	vpaddd	ymm10,ymm10,ymm14
7908	vpxor	ymm6,ymm6,ymm10
7909	vpsrld	ymm3,ymm6,20
7910	vpslld	ymm6,ymm6,12
7911	vpxor	ymm6,ymm6,ymm3
7912	vpaddd	ymm2,ymm2,ymm6
7913	vpxor	ymm14,ymm14,ymm2
7914	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
7915	vpaddd	ymm10,ymm10,ymm14
7916	vpxor	ymm6,ymm6,ymm10
7917	vpslld	ymm3,ymm6,7
7918	vpsrld	ymm6,ymm6,25
7919	vpxor	ymm6,ymm6,ymm3
7920	vpalignr	ymm14,ymm14,ymm14,12
7921	vpalignr	ymm10,ymm10,ymm10,8
7922	vpalignr	ymm6,ymm6,ymm6,4
7923	vpaddd	ymm0,ymm0,ymm4
7924	vpxor	ymm12,ymm12,ymm0
7925	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7926	vpaddd	ymm8,ymm8,ymm12
7927	vpxor	ymm4,ymm4,ymm8
7928	vpsrld	ymm3,ymm4,20
7929	vpslld	ymm4,ymm4,12
7930	vpxor	ymm4,ymm4,ymm3
7931	vpaddd	ymm0,ymm0,ymm4
7932	vpxor	ymm12,ymm12,ymm0
7933	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7934	vpaddd	ymm8,ymm8,ymm12
7935	vpxor	ymm4,ymm4,ymm8
7936	vpslld	ymm3,ymm4,7
7937	vpsrld	ymm4,ymm4,25
7938	vpxor	ymm4,ymm4,ymm3
7939	vpalignr	ymm12,ymm12,ymm12,4
7940	vpalignr	ymm8,ymm8,ymm8,8
7941	vpalignr	ymm4,ymm4,ymm4,12
7942	add	r10,QWORD[((0+16))+rdi]
7943	adc	r11,QWORD[((8+16))+rdi]
7944	adc	r12,1
7945	mov	rax,QWORD[((0+160+0))+rbp]
7946	mov	r15,rax
7947	mul	r10
7948	mov	r13,rax
7949	mov	r14,rdx
7950	mov	rax,QWORD[((0+160+0))+rbp]
7951	mul	r11
7952	imul	r15,r12
7953	add	r14,rax
7954	adc	r15,rdx
7955	mov	rax,QWORD[((8+160+0))+rbp]
7956	mov	r9,rax
7957	mul	r10
7958	add	r14,rax
7959	adc	rdx,0
7960	mov	r10,rdx
7961	mov	rax,QWORD[((8+160+0))+rbp]
7962	mul	r11
7963	add	r15,rax
7964	adc	rdx,0
7965	imul	r9,r12
7966	add	r15,r10
7967	adc	r9,rdx
7968	mov	r10,r13
7969	mov	r11,r14
7970	mov	r12,r15
7971	and	r12,3
7972	mov	r13,r15
7973	and	r13,-4
7974	mov	r14,r9
7975	shrd	r15,r9,2
7976	shr	r9,2
7977	add	r15,r13
7978	adc	r9,r14
7979	add	r10,r15
7980	adc	r11,r9
7981	adc	r12,0
7982	vpaddd	ymm1,ymm1,ymm5
7983	vpxor	ymm13,ymm13,ymm1
7984	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7985	vpaddd	ymm9,ymm9,ymm13
7986	vpxor	ymm5,ymm5,ymm9
7987	vpsrld	ymm3,ymm5,20
7988	vpslld	ymm5,ymm5,12
7989	vpxor	ymm5,ymm5,ymm3
7990	vpaddd	ymm1,ymm1,ymm5
7991	vpxor	ymm13,ymm13,ymm1
7992	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7993	vpaddd	ymm9,ymm9,ymm13
7994	vpxor	ymm5,ymm5,ymm9
7995	vpslld	ymm3,ymm5,7
7996	vpsrld	ymm5,ymm5,25
7997	vpxor	ymm5,ymm5,ymm3
7998	vpalignr	ymm13,ymm13,ymm13,4
7999	vpalignr	ymm9,ymm9,ymm9,8
8000	vpalignr	ymm5,ymm5,ymm5,12
8001	vpaddd	ymm2,ymm2,ymm6
8002	vpxor	ymm14,ymm14,ymm2
8003	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8004	vpaddd	ymm10,ymm10,ymm14
8005	vpxor	ymm6,ymm6,ymm10
8006	vpsrld	ymm3,ymm6,20
8007	vpslld	ymm6,ymm6,12
8008	vpxor	ymm6,ymm6,ymm3
8009	vpaddd	ymm2,ymm2,ymm6
8010	vpxor	ymm14,ymm14,ymm2
8011	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8012	vpaddd	ymm10,ymm10,ymm14
8013	vpxor	ymm6,ymm6,ymm10
8014	vpslld	ymm3,ymm6,7
8015	vpsrld	ymm6,ymm6,25
8016	vpxor	ymm6,ymm6,ymm3
8017	vpalignr	ymm14,ymm14,ymm14,4
8018	vpalignr	ymm10,ymm10,ymm10,8
8019	vpalignr	ymm6,ymm6,ymm6,12
8020
8021	lea	rdi,[32+rdi]
8022	dec	rcx
8023	jg	NEAR $L$seal_avx2_tail_384_rounds_and_3xhash
8024	dec	r8
8025	jge	NEAR $L$seal_avx2_tail_384_rounds_and_2xhash
8026	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8027	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
8028	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
8029	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8030	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8031	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
8032	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
8033	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8034	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8035	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
8036	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
8037	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8038	vperm2i128	ymm3,ymm6,ymm2,0x02
8039	vperm2i128	ymm6,ymm6,ymm2,0x13
8040	vperm2i128	ymm2,ymm14,ymm10,0x02
8041	vperm2i128	ymm10,ymm14,ymm10,0x13
8042	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
8043	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
8044	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
8045	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
8046	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
8047	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
8048	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
8049	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
8050	vperm2i128	ymm3,ymm5,ymm1,0x02
8051	vperm2i128	ymm5,ymm5,ymm1,0x13
8052	vperm2i128	ymm1,ymm13,ymm9,0x02
8053	vperm2i128	ymm9,ymm13,ymm9,0x13
8054	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
8055	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
8056	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
8057	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
8058	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
8059	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
8060	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
8061	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
8062	vperm2i128	ymm3,ymm4,ymm0,0x13
8063	vperm2i128	ymm0,ymm4,ymm0,0x02
8064	vperm2i128	ymm4,ymm12,ymm8,0x02
8065	vperm2i128	ymm12,ymm12,ymm8,0x13
8066	vmovdqa	ymm8,ymm3
8067
8068	mov	rcx,8*32
8069	lea	rsi,[256+rsi]
8070	sub	rbx,8*32
8071	jmp	NEAR $L$seal_avx2_short_hash_remainder
8072
8073$L$seal_avx2_tail_512:
8074	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
8075	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
8076	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
8077	vmovdqa	ymm1,ymm0
8078	vmovdqa	ymm5,ymm4
8079	vmovdqa	ymm9,ymm8
8080	vmovdqa	ymm2,ymm0
8081	vmovdqa	ymm6,ymm4
8082	vmovdqa	ymm10,ymm8
8083	vmovdqa	ymm3,ymm0
8084	vmovdqa	ymm7,ymm4
8085	vmovdqa	ymm11,ymm8
8086	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
8087	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
8088	vpaddd	ymm14,ymm12,ymm15
8089	vpaddd	ymm13,ymm12,ymm14
8090	vpaddd	ymm12,ymm12,ymm13
8091	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
8092	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
8093	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
8094	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
8095
8096$L$seal_avx2_tail_512_rounds_and_3xhash:
8097	add	r10,QWORD[((0+0))+rdi]
8098	adc	r11,QWORD[((8+0))+rdi]
8099	adc	r12,1
8100	mov	rdx,QWORD[((0+160+0))+rbp]
8101	mov	r15,rdx
8102	mulx	r14,r13,r10
8103	mulx	rdx,rax,r11
8104	imul	r15,r12
8105	add	r14,rax
8106	adc	r15,rdx
8107	mov	rdx,QWORD[((8+160+0))+rbp]
8108	mulx	rax,r10,r10
8109	add	r14,r10
8110	mulx	r9,r11,r11
8111	adc	r15,r11
8112	adc	r9,0
8113	imul	rdx,r12
8114	add	r15,rax
8115	adc	r9,rdx
8116	mov	r10,r13
8117	mov	r11,r14
8118	mov	r12,r15
8119	and	r12,3
8120	mov	r13,r15
8121	and	r13,-4
8122	mov	r14,r9
8123	shrd	r15,r9,2
8124	shr	r9,2
8125	add	r15,r13
8126	adc	r9,r14
8127	add	r10,r15
8128	adc	r11,r9
8129	adc	r12,0
8130
8131	lea	rdi,[16+rdi]
8132$L$seal_avx2_tail_512_rounds_and_2xhash:
8133	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8134	vmovdqa	ymm8,YMMWORD[$L$rol16]
8135	vpaddd	ymm3,ymm3,ymm7
8136	vpaddd	ymm2,ymm2,ymm6
8137	vpaddd	ymm1,ymm1,ymm5
8138	vpaddd	ymm0,ymm0,ymm4
8139	vpxor	ymm15,ymm15,ymm3
8140	vpxor	ymm14,ymm14,ymm2
8141	vpxor	ymm13,ymm13,ymm1
8142	vpxor	ymm12,ymm12,ymm0
8143	vpshufb	ymm15,ymm15,ymm8
8144	vpshufb	ymm14,ymm14,ymm8
8145	vpshufb	ymm13,ymm13,ymm8
8146	vpshufb	ymm12,ymm12,ymm8
8147	vpaddd	ymm11,ymm11,ymm15
8148	vpaddd	ymm10,ymm10,ymm14
8149	vpaddd	ymm9,ymm9,ymm13
8150	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8151	vpxor	ymm7,ymm7,ymm11
8152	vpxor	ymm6,ymm6,ymm10
8153	add	r10,QWORD[((0+0))+rdi]
8154	adc	r11,QWORD[((8+0))+rdi]
8155	adc	r12,1
8156	vpxor	ymm5,ymm5,ymm9
8157	vpxor	ymm4,ymm4,ymm8
8158	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8159	vpsrld	ymm8,ymm7,20
8160	vpslld	ymm7,ymm7,32-20
8161	vpxor	ymm7,ymm7,ymm8
8162	vpsrld	ymm8,ymm6,20
8163	vpslld	ymm6,ymm6,32-20
8164	vpxor	ymm6,ymm6,ymm8
8165	vpsrld	ymm8,ymm5,20
8166	vpslld	ymm5,ymm5,32-20
8167	vpxor	ymm5,ymm5,ymm8
8168	vpsrld	ymm8,ymm4,20
8169	vpslld	ymm4,ymm4,32-20
8170	vpxor	ymm4,ymm4,ymm8
8171	vmovdqa	ymm8,YMMWORD[$L$rol8]
8172	vpaddd	ymm3,ymm3,ymm7
8173	vpaddd	ymm2,ymm2,ymm6
8174	vpaddd	ymm1,ymm1,ymm5
8175	vpaddd	ymm0,ymm0,ymm4
8176	mov	rdx,QWORD[((0+160+0))+rbp]
8177	mov	r15,rdx
8178	mulx	r14,r13,r10
8179	mulx	rdx,rax,r11
8180	imul	r15,r12
8181	add	r14,rax
8182	adc	r15,rdx
8183	vpxor	ymm15,ymm15,ymm3
8184	vpxor	ymm14,ymm14,ymm2
8185	vpxor	ymm13,ymm13,ymm1
8186	vpxor	ymm12,ymm12,ymm0
8187	vpshufb	ymm15,ymm15,ymm8
8188	vpshufb	ymm14,ymm14,ymm8
8189	vpshufb	ymm13,ymm13,ymm8
8190	vpshufb	ymm12,ymm12,ymm8
8191	vpaddd	ymm11,ymm11,ymm15
8192	vpaddd	ymm10,ymm10,ymm14
8193	vpaddd	ymm9,ymm9,ymm13
8194	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8195	vpxor	ymm7,ymm7,ymm11
8196	vpxor	ymm6,ymm6,ymm10
8197	vpxor	ymm5,ymm5,ymm9
8198	vpxor	ymm4,ymm4,ymm8
8199	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8200	vpsrld	ymm8,ymm7,25
8201	vpslld	ymm7,ymm7,32-25
8202	vpxor	ymm7,ymm7,ymm8
8203	mov	rdx,QWORD[((8+160+0))+rbp]
8204	mulx	rax,r10,r10
8205	add	r14,r10
8206	mulx	r9,r11,r11
8207	adc	r15,r11
8208	adc	r9,0
8209	imul	rdx,r12
8210	vpsrld	ymm8,ymm6,25
8211	vpslld	ymm6,ymm6,32-25
8212	vpxor	ymm6,ymm6,ymm8
8213	vpsrld	ymm8,ymm5,25
8214	vpslld	ymm5,ymm5,32-25
8215	vpxor	ymm5,ymm5,ymm8
8216	vpsrld	ymm8,ymm4,25
8217	vpslld	ymm4,ymm4,32-25
8218	vpxor	ymm4,ymm4,ymm8
8219	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
8220	vpalignr	ymm7,ymm7,ymm7,4
8221	vpalignr	ymm11,ymm11,ymm11,8
8222	vpalignr	ymm15,ymm15,ymm15,12
8223	vpalignr	ymm6,ymm6,ymm6,4
8224	vpalignr	ymm10,ymm10,ymm10,8
8225	vpalignr	ymm14,ymm14,ymm14,12
8226	vpalignr	ymm5,ymm5,ymm5,4
8227	vpalignr	ymm9,ymm9,ymm9,8
8228	vpalignr	ymm13,ymm13,ymm13,12
8229	vpalignr	ymm4,ymm4,ymm4,4
8230	add	r15,rax
8231	adc	r9,rdx
8232	vpalignr	ymm8,ymm8,ymm8,8
8233	vpalignr	ymm12,ymm12,ymm12,12
8234	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8235	vmovdqa	ymm8,YMMWORD[$L$rol16]
8236	vpaddd	ymm3,ymm3,ymm7
8237	vpaddd	ymm2,ymm2,ymm6
8238	vpaddd	ymm1,ymm1,ymm5
8239	vpaddd	ymm0,ymm0,ymm4
8240	vpxor	ymm15,ymm15,ymm3
8241	vpxor	ymm14,ymm14,ymm2
8242	vpxor	ymm13,ymm13,ymm1
8243	vpxor	ymm12,ymm12,ymm0
8244	vpshufb	ymm15,ymm15,ymm8
8245	vpshufb	ymm14,ymm14,ymm8
8246	vpshufb	ymm13,ymm13,ymm8
8247	vpshufb	ymm12,ymm12,ymm8
8248	vpaddd	ymm11,ymm11,ymm15
8249	vpaddd	ymm10,ymm10,ymm14
8250	vpaddd	ymm9,ymm9,ymm13
8251	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8252	mov	r10,r13
8253	mov	r11,r14
8254	mov	r12,r15
8255	and	r12,3
8256	mov	r13,r15
8257	and	r13,-4
8258	mov	r14,r9
8259	shrd	r15,r9,2
8260	shr	r9,2
8261	add	r15,r13
8262	adc	r9,r14
8263	add	r10,r15
8264	adc	r11,r9
8265	adc	r12,0
8266	vpxor	ymm7,ymm7,ymm11
8267	vpxor	ymm6,ymm6,ymm10
8268	vpxor	ymm5,ymm5,ymm9
8269	vpxor	ymm4,ymm4,ymm8
8270	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8271	vpsrld	ymm8,ymm7,20
8272	vpslld	ymm7,ymm7,32-20
8273	vpxor	ymm7,ymm7,ymm8
8274	vpsrld	ymm8,ymm6,20
8275	vpslld	ymm6,ymm6,32-20
8276	vpxor	ymm6,ymm6,ymm8
8277	vpsrld	ymm8,ymm5,20
8278	vpslld	ymm5,ymm5,32-20
8279	vpxor	ymm5,ymm5,ymm8
8280	vpsrld	ymm8,ymm4,20
8281	vpslld	ymm4,ymm4,32-20
8282	vpxor	ymm4,ymm4,ymm8
8283	vmovdqa	ymm8,YMMWORD[$L$rol8]
8284	vpaddd	ymm3,ymm3,ymm7
8285	vpaddd	ymm2,ymm2,ymm6
8286	add	r10,QWORD[((0+16))+rdi]
8287	adc	r11,QWORD[((8+16))+rdi]
8288	adc	r12,1
8289	vpaddd	ymm1,ymm1,ymm5
8290	vpaddd	ymm0,ymm0,ymm4
8291	vpxor	ymm15,ymm15,ymm3
8292	vpxor	ymm14,ymm14,ymm2
8293	vpxor	ymm13,ymm13,ymm1
8294	vpxor	ymm12,ymm12,ymm0
8295	vpshufb	ymm15,ymm15,ymm8
8296	vpshufb	ymm14,ymm14,ymm8
8297	vpshufb	ymm13,ymm13,ymm8
8298	vpshufb	ymm12,ymm12,ymm8
8299	vpaddd	ymm11,ymm11,ymm15
8300	vpaddd	ymm10,ymm10,ymm14
8301	vpaddd	ymm9,ymm9,ymm13
8302	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8303	vpxor	ymm7,ymm7,ymm11
8304	vpxor	ymm6,ymm6,ymm10
8305	vpxor	ymm5,ymm5,ymm9
8306	vpxor	ymm4,ymm4,ymm8
8307	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8308	vpsrld	ymm8,ymm7,25
8309	mov	rdx,QWORD[((0+160+0))+rbp]
8310	mov	r15,rdx
8311	mulx	r14,r13,r10
8312	mulx	rdx,rax,r11
8313	imul	r15,r12
8314	add	r14,rax
8315	adc	r15,rdx
8316	vpslld	ymm7,ymm7,32-25
8317	vpxor	ymm7,ymm7,ymm8
8318	vpsrld	ymm8,ymm6,25
8319	vpslld	ymm6,ymm6,32-25
8320	vpxor	ymm6,ymm6,ymm8
8321	vpsrld	ymm8,ymm5,25
8322	vpslld	ymm5,ymm5,32-25
8323	vpxor	ymm5,ymm5,ymm8
8324	vpsrld	ymm8,ymm4,25
8325	vpslld	ymm4,ymm4,32-25
8326	vpxor	ymm4,ymm4,ymm8
8327	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
8328	vpalignr	ymm7,ymm7,ymm7,12
8329	vpalignr	ymm11,ymm11,ymm11,8
8330	vpalignr	ymm15,ymm15,ymm15,4
8331	vpalignr	ymm6,ymm6,ymm6,12
8332	vpalignr	ymm10,ymm10,ymm10,8
8333	vpalignr	ymm14,ymm14,ymm14,4
8334	vpalignr	ymm5,ymm5,ymm5,12
8335	vpalignr	ymm9,ymm9,ymm9,8
8336	mov	rdx,QWORD[((8+160+0))+rbp]
8337	mulx	rax,r10,r10
8338	add	r14,r10
8339	mulx	r9,r11,r11
8340	adc	r15,r11
8341	adc	r9,0
8342	imul	rdx,r12
8343	vpalignr	ymm13,ymm13,ymm13,4
8344	vpalignr	ymm4,ymm4,ymm4,12
8345	vpalignr	ymm8,ymm8,ymm8,8
8346	vpalignr	ymm12,ymm12,ymm12,4
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363	add	r15,rax
8364	adc	r9,rdx
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385	mov	r10,r13
8386	mov	r11,r14
8387	mov	r12,r15
8388	and	r12,3
8389	mov	r13,r15
8390	and	r13,-4
8391	mov	r14,r9
8392	shrd	r15,r9,2
8393	shr	r9,2
8394	add	r15,r13
8395	adc	r9,r14
8396	add	r10,r15
8397	adc	r11,r9
8398	adc	r12,0
8399
8400	lea	rdi,[32+rdi]
8401	dec	rcx
8402	jg	NEAR $L$seal_avx2_tail_512_rounds_and_3xhash
8403	dec	r8
8404	jge	NEAR $L$seal_avx2_tail_512_rounds_and_2xhash
8405	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
8406	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
8407	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
8408	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
8409	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8410	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
8411	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
8412	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8413	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8414	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
8415	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
8416	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8417	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8418	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
8419	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
8420	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8421
8422	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
8423	vperm2i128	ymm0,ymm7,ymm3,0x02
8424	vperm2i128	ymm7,ymm7,ymm3,0x13
8425	vperm2i128	ymm3,ymm15,ymm11,0x02
8426	vperm2i128	ymm11,ymm15,ymm11,0x13
8427	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
8428	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
8429	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
8430	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
8431	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
8432	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
8433	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
8434	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
8435
8436	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
8437	vperm2i128	ymm3,ymm6,ymm2,0x02
8438	vperm2i128	ymm6,ymm6,ymm2,0x13
8439	vperm2i128	ymm2,ymm14,ymm10,0x02
8440	vperm2i128	ymm10,ymm14,ymm10,0x13
8441	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
8442	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
8443	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
8444	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
8445	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
8446	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
8447	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
8448	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
8449	vperm2i128	ymm3,ymm5,ymm1,0x02
8450	vperm2i128	ymm5,ymm5,ymm1,0x13
8451	vperm2i128	ymm1,ymm13,ymm9,0x02
8452	vperm2i128	ymm9,ymm13,ymm9,0x13
8453	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
8454	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
8455	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
8456	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
8457	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
8458	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
8459	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
8460	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
8461	vperm2i128	ymm3,ymm4,ymm0,0x13
8462	vperm2i128	ymm0,ymm4,ymm0,0x02
8463	vperm2i128	ymm4,ymm12,ymm8,0x02
8464	vperm2i128	ymm12,ymm12,ymm8,0x13
8465	vmovdqa	ymm8,ymm3
8466
8467	mov	rcx,12*32
8468	lea	rsi,[384+rsi]
8469	sub	rbx,12*32
8470	jmp	NEAR $L$seal_avx2_short_hash_remainder
8471
8472$L$seal_avx2_320:
8473	vmovdqa	ymm1,ymm0
8474	vmovdqa	ymm2,ymm0
8475	vmovdqa	ymm5,ymm4
8476	vmovdqa	ymm6,ymm4
8477	vmovdqa	ymm9,ymm8
8478	vmovdqa	ymm10,ymm8
8479	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
8480	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
8481	vmovdqa	ymm7,ymm4
8482	vmovdqa	ymm11,ymm8
8483	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
8484	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
8485	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
8486	mov	r10,10
8487$L$seal_avx2_320_rounds:
8488	vpaddd	ymm0,ymm0,ymm4
8489	vpxor	ymm12,ymm12,ymm0
8490	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8491	vpaddd	ymm8,ymm8,ymm12
8492	vpxor	ymm4,ymm4,ymm8
8493	vpsrld	ymm3,ymm4,20
8494	vpslld	ymm4,ymm4,12
8495	vpxor	ymm4,ymm4,ymm3
8496	vpaddd	ymm0,ymm0,ymm4
8497	vpxor	ymm12,ymm12,ymm0
8498	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8499	vpaddd	ymm8,ymm8,ymm12
8500	vpxor	ymm4,ymm4,ymm8
8501	vpslld	ymm3,ymm4,7
8502	vpsrld	ymm4,ymm4,25
8503	vpxor	ymm4,ymm4,ymm3
8504	vpalignr	ymm12,ymm12,ymm12,12
8505	vpalignr	ymm8,ymm8,ymm8,8
8506	vpalignr	ymm4,ymm4,ymm4,4
8507	vpaddd	ymm1,ymm1,ymm5
8508	vpxor	ymm13,ymm13,ymm1
8509	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8510	vpaddd	ymm9,ymm9,ymm13
8511	vpxor	ymm5,ymm5,ymm9
8512	vpsrld	ymm3,ymm5,20
8513	vpslld	ymm5,ymm5,12
8514	vpxor	ymm5,ymm5,ymm3
8515	vpaddd	ymm1,ymm1,ymm5
8516	vpxor	ymm13,ymm13,ymm1
8517	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8518	vpaddd	ymm9,ymm9,ymm13
8519	vpxor	ymm5,ymm5,ymm9
8520	vpslld	ymm3,ymm5,7
8521	vpsrld	ymm5,ymm5,25
8522	vpxor	ymm5,ymm5,ymm3
8523	vpalignr	ymm13,ymm13,ymm13,12
8524	vpalignr	ymm9,ymm9,ymm9,8
8525	vpalignr	ymm5,ymm5,ymm5,4
8526	vpaddd	ymm2,ymm2,ymm6
8527	vpxor	ymm14,ymm14,ymm2
8528	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8529	vpaddd	ymm10,ymm10,ymm14
8530	vpxor	ymm6,ymm6,ymm10
8531	vpsrld	ymm3,ymm6,20
8532	vpslld	ymm6,ymm6,12
8533	vpxor	ymm6,ymm6,ymm3
8534	vpaddd	ymm2,ymm2,ymm6
8535	vpxor	ymm14,ymm14,ymm2
8536	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8537	vpaddd	ymm10,ymm10,ymm14
8538	vpxor	ymm6,ymm6,ymm10
8539	vpslld	ymm3,ymm6,7
8540	vpsrld	ymm6,ymm6,25
8541	vpxor	ymm6,ymm6,ymm3
8542	vpalignr	ymm14,ymm14,ymm14,12
8543	vpalignr	ymm10,ymm10,ymm10,8
8544	vpalignr	ymm6,ymm6,ymm6,4
8545	vpaddd	ymm0,ymm0,ymm4
8546	vpxor	ymm12,ymm12,ymm0
8547	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8548	vpaddd	ymm8,ymm8,ymm12
8549	vpxor	ymm4,ymm4,ymm8
8550	vpsrld	ymm3,ymm4,20
8551	vpslld	ymm4,ymm4,12
8552	vpxor	ymm4,ymm4,ymm3
8553	vpaddd	ymm0,ymm0,ymm4
8554	vpxor	ymm12,ymm12,ymm0
8555	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8556	vpaddd	ymm8,ymm8,ymm12
8557	vpxor	ymm4,ymm4,ymm8
8558	vpslld	ymm3,ymm4,7
8559	vpsrld	ymm4,ymm4,25
8560	vpxor	ymm4,ymm4,ymm3
8561	vpalignr	ymm12,ymm12,ymm12,4
8562	vpalignr	ymm8,ymm8,ymm8,8
8563	vpalignr	ymm4,ymm4,ymm4,12
8564	vpaddd	ymm1,ymm1,ymm5
8565	vpxor	ymm13,ymm13,ymm1
8566	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8567	vpaddd	ymm9,ymm9,ymm13
8568	vpxor	ymm5,ymm5,ymm9
8569	vpsrld	ymm3,ymm5,20
8570	vpslld	ymm5,ymm5,12
8571	vpxor	ymm5,ymm5,ymm3
8572	vpaddd	ymm1,ymm1,ymm5
8573	vpxor	ymm13,ymm13,ymm1
8574	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8575	vpaddd	ymm9,ymm9,ymm13
8576	vpxor	ymm5,ymm5,ymm9
8577	vpslld	ymm3,ymm5,7
8578	vpsrld	ymm5,ymm5,25
8579	vpxor	ymm5,ymm5,ymm3
8580	vpalignr	ymm13,ymm13,ymm13,4
8581	vpalignr	ymm9,ymm9,ymm9,8
8582	vpalignr	ymm5,ymm5,ymm5,12
8583	vpaddd	ymm2,ymm2,ymm6
8584	vpxor	ymm14,ymm14,ymm2
8585	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8586	vpaddd	ymm10,ymm10,ymm14
8587	vpxor	ymm6,ymm6,ymm10
8588	vpsrld	ymm3,ymm6,20
8589	vpslld	ymm6,ymm6,12
8590	vpxor	ymm6,ymm6,ymm3
8591	vpaddd	ymm2,ymm2,ymm6
8592	vpxor	ymm14,ymm14,ymm2
8593	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8594	vpaddd	ymm10,ymm10,ymm14
8595	vpxor	ymm6,ymm6,ymm10
8596	vpslld	ymm3,ymm6,7
8597	vpsrld	ymm6,ymm6,25
8598	vpxor	ymm6,ymm6,ymm3
8599	vpalignr	ymm14,ymm14,ymm14,4
8600	vpalignr	ymm10,ymm10,ymm10,8
8601	vpalignr	ymm6,ymm6,ymm6,12
8602
8603	dec	r10
8604	jne	NEAR $L$seal_avx2_320_rounds
8605	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8606	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8607	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8608	vpaddd	ymm4,ymm4,ymm7
8609	vpaddd	ymm5,ymm5,ymm7
8610	vpaddd	ymm6,ymm6,ymm7
8611	vpaddd	ymm8,ymm8,ymm11
8612	vpaddd	ymm9,ymm9,ymm11
8613	vpaddd	ymm10,ymm10,ymm11
8614	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8615	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8616	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8617	vperm2i128	ymm3,ymm4,ymm0,0x02
8618
8619	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
8620	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
8621
8622	vperm2i128	ymm0,ymm4,ymm0,0x13
8623	vperm2i128	ymm4,ymm12,ymm8,0x13
8624	vperm2i128	ymm8,ymm5,ymm1,0x02
8625	vperm2i128	ymm12,ymm13,ymm9,0x02
8626	vperm2i128	ymm1,ymm5,ymm1,0x13
8627	vperm2i128	ymm5,ymm13,ymm9,0x13
8628	vperm2i128	ymm9,ymm6,ymm2,0x02
8629	vperm2i128	ymm13,ymm14,ymm10,0x02
8630	vperm2i128	ymm2,ymm6,ymm2,0x13
8631	vperm2i128	ymm6,ymm14,ymm10,0x13
8632	jmp	NEAR $L$seal_avx2_short
8633
8634$L$seal_avx2_192:
8635	vmovdqa	ymm1,ymm0
8636	vmovdqa	ymm2,ymm0
8637	vmovdqa	ymm5,ymm4
8638	vmovdqa	ymm6,ymm4
8639	vmovdqa	ymm9,ymm8
8640	vmovdqa	ymm10,ymm8
8641	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
8642	vmovdqa	ymm11,ymm12
8643	vmovdqa	ymm15,ymm13
8644	mov	r10,10
8645$L$seal_avx2_192_rounds:
8646	vpaddd	ymm0,ymm0,ymm4
8647	vpxor	ymm12,ymm12,ymm0
8648	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8649	vpaddd	ymm8,ymm8,ymm12
8650	vpxor	ymm4,ymm4,ymm8
8651	vpsrld	ymm3,ymm4,20
8652	vpslld	ymm4,ymm4,12
8653	vpxor	ymm4,ymm4,ymm3
8654	vpaddd	ymm0,ymm0,ymm4
8655	vpxor	ymm12,ymm12,ymm0
8656	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8657	vpaddd	ymm8,ymm8,ymm12
8658	vpxor	ymm4,ymm4,ymm8
8659	vpslld	ymm3,ymm4,7
8660	vpsrld	ymm4,ymm4,25
8661	vpxor	ymm4,ymm4,ymm3
8662	vpalignr	ymm12,ymm12,ymm12,12
8663	vpalignr	ymm8,ymm8,ymm8,8
8664	vpalignr	ymm4,ymm4,ymm4,4
8665	vpaddd	ymm1,ymm1,ymm5
8666	vpxor	ymm13,ymm13,ymm1
8667	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8668	vpaddd	ymm9,ymm9,ymm13
8669	vpxor	ymm5,ymm5,ymm9
8670	vpsrld	ymm3,ymm5,20
8671	vpslld	ymm5,ymm5,12
8672	vpxor	ymm5,ymm5,ymm3
8673	vpaddd	ymm1,ymm1,ymm5
8674	vpxor	ymm13,ymm13,ymm1
8675	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8676	vpaddd	ymm9,ymm9,ymm13
8677	vpxor	ymm5,ymm5,ymm9
8678	vpslld	ymm3,ymm5,7
8679	vpsrld	ymm5,ymm5,25
8680	vpxor	ymm5,ymm5,ymm3
8681	vpalignr	ymm13,ymm13,ymm13,12
8682	vpalignr	ymm9,ymm9,ymm9,8
8683	vpalignr	ymm5,ymm5,ymm5,4
8684	vpaddd	ymm0,ymm0,ymm4
8685	vpxor	ymm12,ymm12,ymm0
8686	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8687	vpaddd	ymm8,ymm8,ymm12
8688	vpxor	ymm4,ymm4,ymm8
8689	vpsrld	ymm3,ymm4,20
8690	vpslld	ymm4,ymm4,12
8691	vpxor	ymm4,ymm4,ymm3
8692	vpaddd	ymm0,ymm0,ymm4
8693	vpxor	ymm12,ymm12,ymm0
8694	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8695	vpaddd	ymm8,ymm8,ymm12
8696	vpxor	ymm4,ymm4,ymm8
8697	vpslld	ymm3,ymm4,7
8698	vpsrld	ymm4,ymm4,25
8699	vpxor	ymm4,ymm4,ymm3
8700	vpalignr	ymm12,ymm12,ymm12,4
8701	vpalignr	ymm8,ymm8,ymm8,8
8702	vpalignr	ymm4,ymm4,ymm4,12
8703	vpaddd	ymm1,ymm1,ymm5
8704	vpxor	ymm13,ymm13,ymm1
8705	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8706	vpaddd	ymm9,ymm9,ymm13
8707	vpxor	ymm5,ymm5,ymm9
8708	vpsrld	ymm3,ymm5,20
8709	vpslld	ymm5,ymm5,12
8710	vpxor	ymm5,ymm5,ymm3
8711	vpaddd	ymm1,ymm1,ymm5
8712	vpxor	ymm13,ymm13,ymm1
8713	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8714	vpaddd	ymm9,ymm9,ymm13
8715	vpxor	ymm5,ymm5,ymm9
8716	vpslld	ymm3,ymm5,7
8717	vpsrld	ymm5,ymm5,25
8718	vpxor	ymm5,ymm5,ymm3
8719	vpalignr	ymm13,ymm13,ymm13,4
8720	vpalignr	ymm9,ymm9,ymm9,8
8721	vpalignr	ymm5,ymm5,ymm5,12
8722
8723	dec	r10
8724	jne	NEAR $L$seal_avx2_192_rounds
8725	vpaddd	ymm0,ymm0,ymm2
8726	vpaddd	ymm1,ymm1,ymm2
8727	vpaddd	ymm4,ymm4,ymm6
8728	vpaddd	ymm5,ymm5,ymm6
8729	vpaddd	ymm8,ymm8,ymm10
8730	vpaddd	ymm9,ymm9,ymm10
8731	vpaddd	ymm12,ymm12,ymm11
8732	vpaddd	ymm13,ymm13,ymm15
8733	vperm2i128	ymm3,ymm4,ymm0,0x02
8734
8735	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
8736	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
8737
8738	vperm2i128	ymm0,ymm4,ymm0,0x13
8739	vperm2i128	ymm4,ymm12,ymm8,0x13
8740	vperm2i128	ymm8,ymm5,ymm1,0x02
8741	vperm2i128	ymm12,ymm13,ymm9,0x02
8742	vperm2i128	ymm1,ymm5,ymm1,0x13
8743	vperm2i128	ymm5,ymm13,ymm9,0x13
8744$L$seal_avx2_short:
8745	mov	r8,r8
8746	call	poly_hash_ad_internal
8747	xor	rcx,rcx
8748$L$seal_avx2_short_hash_remainder:
8749	cmp	rcx,16
8750	jb	NEAR $L$seal_avx2_short_loop
8751	add	r10,QWORD[((0+0))+rdi]
8752	adc	r11,QWORD[((8+0))+rdi]
8753	adc	r12,1
8754	mov	rax,QWORD[((0+160+0))+rbp]
8755	mov	r15,rax
8756	mul	r10
8757	mov	r13,rax
8758	mov	r14,rdx
8759	mov	rax,QWORD[((0+160+0))+rbp]
8760	mul	r11
8761	imul	r15,r12
8762	add	r14,rax
8763	adc	r15,rdx
8764	mov	rax,QWORD[((8+160+0))+rbp]
8765	mov	r9,rax
8766	mul	r10
8767	add	r14,rax
8768	adc	rdx,0
8769	mov	r10,rdx
8770	mov	rax,QWORD[((8+160+0))+rbp]
8771	mul	r11
8772	add	r15,rax
8773	adc	rdx,0
8774	imul	r9,r12
8775	add	r15,r10
8776	adc	r9,rdx
8777	mov	r10,r13
8778	mov	r11,r14
8779	mov	r12,r15
8780	and	r12,3
8781	mov	r13,r15
8782	and	r13,-4
8783	mov	r14,r9
8784	shrd	r15,r9,2
8785	shr	r9,2
8786	add	r15,r13
8787	adc	r9,r14
8788	add	r10,r15
8789	adc	r11,r9
8790	adc	r12,0
8791
8792	sub	rcx,16
8793	add	rdi,16
8794	jmp	NEAR $L$seal_avx2_short_hash_remainder
8795$L$seal_avx2_short_loop:
8796	cmp	rbx,32
8797	jb	NEAR $L$seal_avx2_short_tail
8798	sub	rbx,32
8799
8800	vpxor	ymm0,ymm0,YMMWORD[rsi]
8801	vmovdqu	YMMWORD[rdi],ymm0
8802	lea	rsi,[32+rsi]
8803
8804	add	r10,QWORD[((0+0))+rdi]
8805	adc	r11,QWORD[((8+0))+rdi]
8806	adc	r12,1
8807	mov	rax,QWORD[((0+160+0))+rbp]
8808	mov	r15,rax
8809	mul	r10
8810	mov	r13,rax
8811	mov	r14,rdx
8812	mov	rax,QWORD[((0+160+0))+rbp]
8813	mul	r11
8814	imul	r15,r12
8815	add	r14,rax
8816	adc	r15,rdx
8817	mov	rax,QWORD[((8+160+0))+rbp]
8818	mov	r9,rax
8819	mul	r10
8820	add	r14,rax
8821	adc	rdx,0
8822	mov	r10,rdx
8823	mov	rax,QWORD[((8+160+0))+rbp]
8824	mul	r11
8825	add	r15,rax
8826	adc	rdx,0
8827	imul	r9,r12
8828	add	r15,r10
8829	adc	r9,rdx
8830	mov	r10,r13
8831	mov	r11,r14
8832	mov	r12,r15
8833	and	r12,3
8834	mov	r13,r15
8835	and	r13,-4
8836	mov	r14,r9
8837	shrd	r15,r9,2
8838	shr	r9,2
8839	add	r15,r13
8840	adc	r9,r14
8841	add	r10,r15
8842	adc	r11,r9
8843	adc	r12,0
8844	add	r10,QWORD[((0+16))+rdi]
8845	adc	r11,QWORD[((8+16))+rdi]
8846	adc	r12,1
8847	mov	rax,QWORD[((0+160+0))+rbp]
8848	mov	r15,rax
8849	mul	r10
8850	mov	r13,rax
8851	mov	r14,rdx
8852	mov	rax,QWORD[((0+160+0))+rbp]
8853	mul	r11
8854	imul	r15,r12
8855	add	r14,rax
8856	adc	r15,rdx
8857	mov	rax,QWORD[((8+160+0))+rbp]
8858	mov	r9,rax
8859	mul	r10
8860	add	r14,rax
8861	adc	rdx,0
8862	mov	r10,rdx
8863	mov	rax,QWORD[((8+160+0))+rbp]
8864	mul	r11
8865	add	r15,rax
8866	adc	rdx,0
8867	imul	r9,r12
8868	add	r15,r10
8869	adc	r9,rdx
8870	mov	r10,r13
8871	mov	r11,r14
8872	mov	r12,r15
8873	and	r12,3
8874	mov	r13,r15
8875	and	r13,-4
8876	mov	r14,r9
8877	shrd	r15,r9,2
8878	shr	r9,2
8879	add	r15,r13
8880	adc	r9,r14
8881	add	r10,r15
8882	adc	r11,r9
8883	adc	r12,0
8884
8885	lea	rdi,[32+rdi]
8886
8887	vmovdqa	ymm0,ymm4
8888	vmovdqa	ymm4,ymm8
8889	vmovdqa	ymm8,ymm12
8890	vmovdqa	ymm12,ymm1
8891	vmovdqa	ymm1,ymm5
8892	vmovdqa	ymm5,ymm9
8893	vmovdqa	ymm9,ymm13
8894	vmovdqa	ymm13,ymm2
8895	vmovdqa	ymm2,ymm6
8896	jmp	NEAR $L$seal_avx2_short_loop
8897$L$seal_avx2_short_tail:
8898	cmp	rbx,16
8899	jb	NEAR $L$seal_avx2_exit
8900	sub	rbx,16
8901	vpxor	xmm3,xmm0,XMMWORD[rsi]
8902	vmovdqu	XMMWORD[rdi],xmm3
8903	lea	rsi,[16+rsi]
8904	add	r10,QWORD[((0+0))+rdi]
8905	adc	r11,QWORD[((8+0))+rdi]
8906	adc	r12,1
8907	mov	rax,QWORD[((0+160+0))+rbp]
8908	mov	r15,rax
8909	mul	r10
8910	mov	r13,rax
8911	mov	r14,rdx
8912	mov	rax,QWORD[((0+160+0))+rbp]
8913	mul	r11
8914	imul	r15,r12
8915	add	r14,rax
8916	adc	r15,rdx
8917	mov	rax,QWORD[((8+160+0))+rbp]
8918	mov	r9,rax
8919	mul	r10
8920	add	r14,rax
8921	adc	rdx,0
8922	mov	r10,rdx
8923	mov	rax,QWORD[((8+160+0))+rbp]
8924	mul	r11
8925	add	r15,rax
8926	adc	rdx,0
8927	imul	r9,r12
8928	add	r15,r10
8929	adc	r9,rdx
8930	mov	r10,r13
8931	mov	r11,r14
8932	mov	r12,r15
8933	and	r12,3
8934	mov	r13,r15
8935	and	r13,-4
8936	mov	r14,r9
8937	shrd	r15,r9,2
8938	shr	r9,2
8939	add	r15,r13
8940	adc	r9,r14
8941	add	r10,r15
8942	adc	r11,r9
8943	adc	r12,0
8944
8945	lea	rdi,[16+rdi]
8946	vextracti128	xmm0,ymm0,1
8947$L$seal_avx2_exit:
8948	vzeroupper
8949	jmp	NEAR $L$seal_sse_tail_16
8950
8951
8952%else
8953; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
8954ret
8955%endif
8956