xref: /aosp_15_r20/external/boringssl/src/gen/crypto/chacha-x86-linux.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
7.text
8.globl	ChaCha20_ctr32_nohw
9.hidden	ChaCha20_ctr32_nohw
10.type	ChaCha20_ctr32_nohw,@function
11.align	16
12ChaCha20_ctr32_nohw:
13.L_ChaCha20_ctr32_nohw_begin:
14	pushl	%ebp
15	pushl	%ebx
16	pushl	%esi
17	pushl	%edi
18	movl	32(%esp),%esi
19	movl	36(%esp),%edi
20	subl	$132,%esp
21	movl	(%esi),%eax
22	movl	4(%esi),%ebx
23	movl	8(%esi),%ecx
24	movl	12(%esi),%edx
25	movl	%eax,80(%esp)
26	movl	%ebx,84(%esp)
27	movl	%ecx,88(%esp)
28	movl	%edx,92(%esp)
29	movl	16(%esi),%eax
30	movl	20(%esi),%ebx
31	movl	24(%esi),%ecx
32	movl	28(%esi),%edx
33	movl	%eax,96(%esp)
34	movl	%ebx,100(%esp)
35	movl	%ecx,104(%esp)
36	movl	%edx,108(%esp)
37	movl	(%edi),%eax
38	movl	4(%edi),%ebx
39	movl	8(%edi),%ecx
40	movl	12(%edi),%edx
41	subl	$1,%eax
42	movl	%eax,112(%esp)
43	movl	%ebx,116(%esp)
44	movl	%ecx,120(%esp)
45	movl	%edx,124(%esp)
46	jmp	.L000entry
47.align	16
48.L001outer_loop:
49	movl	%ebx,156(%esp)
50	movl	%eax,152(%esp)
51	movl	%ecx,160(%esp)
52.L000entry:
53	movl	$1634760805,%eax
54	movl	$857760878,4(%esp)
55	movl	$2036477234,8(%esp)
56	movl	$1797285236,12(%esp)
57	movl	84(%esp),%ebx
58	movl	88(%esp),%ebp
59	movl	104(%esp),%ecx
60	movl	108(%esp),%esi
61	movl	116(%esp),%edx
62	movl	120(%esp),%edi
63	movl	%ebx,20(%esp)
64	movl	%ebp,24(%esp)
65	movl	%ecx,40(%esp)
66	movl	%esi,44(%esp)
67	movl	%edx,52(%esp)
68	movl	%edi,56(%esp)
69	movl	92(%esp),%ebx
70	movl	124(%esp),%edi
71	movl	112(%esp),%edx
72	movl	80(%esp),%ebp
73	movl	96(%esp),%ecx
74	movl	100(%esp),%esi
75	addl	$1,%edx
76	movl	%ebx,28(%esp)
77	movl	%edi,60(%esp)
78	movl	%edx,112(%esp)
79	movl	$10,%ebx
80	jmp	.L002loop
81.align	16
82.L002loop:
83	addl	%ebp,%eax
84	movl	%ebx,128(%esp)
85	movl	%ebp,%ebx
86	xorl	%eax,%edx
87	roll	$16,%edx
88	addl	%edx,%ecx
89	xorl	%ecx,%ebx
90	movl	52(%esp),%edi
91	roll	$12,%ebx
92	movl	20(%esp),%ebp
93	addl	%ebx,%eax
94	xorl	%eax,%edx
95	movl	%eax,(%esp)
96	roll	$8,%edx
97	movl	4(%esp),%eax
98	addl	%edx,%ecx
99	movl	%edx,48(%esp)
100	xorl	%ecx,%ebx
101	addl	%ebp,%eax
102	roll	$7,%ebx
103	xorl	%eax,%edi
104	movl	%ecx,32(%esp)
105	roll	$16,%edi
106	movl	%ebx,16(%esp)
107	addl	%edi,%esi
108	movl	40(%esp),%ecx
109	xorl	%esi,%ebp
110	movl	56(%esp),%edx
111	roll	$12,%ebp
112	movl	24(%esp),%ebx
113	addl	%ebp,%eax
114	xorl	%eax,%edi
115	movl	%eax,4(%esp)
116	roll	$8,%edi
117	movl	8(%esp),%eax
118	addl	%edi,%esi
119	movl	%edi,52(%esp)
120	xorl	%esi,%ebp
121	addl	%ebx,%eax
122	roll	$7,%ebp
123	xorl	%eax,%edx
124	movl	%esi,36(%esp)
125	roll	$16,%edx
126	movl	%ebp,20(%esp)
127	addl	%edx,%ecx
128	movl	44(%esp),%esi
129	xorl	%ecx,%ebx
130	movl	60(%esp),%edi
131	roll	$12,%ebx
132	movl	28(%esp),%ebp
133	addl	%ebx,%eax
134	xorl	%eax,%edx
135	movl	%eax,8(%esp)
136	roll	$8,%edx
137	movl	12(%esp),%eax
138	addl	%edx,%ecx
139	movl	%edx,56(%esp)
140	xorl	%ecx,%ebx
141	addl	%ebp,%eax
142	roll	$7,%ebx
143	xorl	%eax,%edi
144	roll	$16,%edi
145	movl	%ebx,24(%esp)
146	addl	%edi,%esi
147	xorl	%esi,%ebp
148	roll	$12,%ebp
149	movl	20(%esp),%ebx
150	addl	%ebp,%eax
151	xorl	%eax,%edi
152	movl	%eax,12(%esp)
153	roll	$8,%edi
154	movl	(%esp),%eax
155	addl	%edi,%esi
156	movl	%edi,%edx
157	xorl	%esi,%ebp
158	addl	%ebx,%eax
159	roll	$7,%ebp
160	xorl	%eax,%edx
161	roll	$16,%edx
162	movl	%ebp,28(%esp)
163	addl	%edx,%ecx
164	xorl	%ecx,%ebx
165	movl	48(%esp),%edi
166	roll	$12,%ebx
167	movl	24(%esp),%ebp
168	addl	%ebx,%eax
169	xorl	%eax,%edx
170	movl	%eax,(%esp)
171	roll	$8,%edx
172	movl	4(%esp),%eax
173	addl	%edx,%ecx
174	movl	%edx,60(%esp)
175	xorl	%ecx,%ebx
176	addl	%ebp,%eax
177	roll	$7,%ebx
178	xorl	%eax,%edi
179	movl	%ecx,40(%esp)
180	roll	$16,%edi
181	movl	%ebx,20(%esp)
182	addl	%edi,%esi
183	movl	32(%esp),%ecx
184	xorl	%esi,%ebp
185	movl	52(%esp),%edx
186	roll	$12,%ebp
187	movl	28(%esp),%ebx
188	addl	%ebp,%eax
189	xorl	%eax,%edi
190	movl	%eax,4(%esp)
191	roll	$8,%edi
192	movl	8(%esp),%eax
193	addl	%edi,%esi
194	movl	%edi,48(%esp)
195	xorl	%esi,%ebp
196	addl	%ebx,%eax
197	roll	$7,%ebp
198	xorl	%eax,%edx
199	movl	%esi,44(%esp)
200	roll	$16,%edx
201	movl	%ebp,24(%esp)
202	addl	%edx,%ecx
203	movl	36(%esp),%esi
204	xorl	%ecx,%ebx
205	movl	56(%esp),%edi
206	roll	$12,%ebx
207	movl	16(%esp),%ebp
208	addl	%ebx,%eax
209	xorl	%eax,%edx
210	movl	%eax,8(%esp)
211	roll	$8,%edx
212	movl	12(%esp),%eax
213	addl	%edx,%ecx
214	movl	%edx,52(%esp)
215	xorl	%ecx,%ebx
216	addl	%ebp,%eax
217	roll	$7,%ebx
218	xorl	%eax,%edi
219	roll	$16,%edi
220	movl	%ebx,28(%esp)
221	addl	%edi,%esi
222	xorl	%esi,%ebp
223	movl	48(%esp),%edx
224	roll	$12,%ebp
225	movl	128(%esp),%ebx
226	addl	%ebp,%eax
227	xorl	%eax,%edi
228	movl	%eax,12(%esp)
229	roll	$8,%edi
230	movl	(%esp),%eax
231	addl	%edi,%esi
232	movl	%edi,56(%esp)
233	xorl	%esi,%ebp
234	roll	$7,%ebp
235	decl	%ebx
236	jnz	.L002loop
237	movl	160(%esp),%ebx
238	addl	$1634760805,%eax
239	addl	80(%esp),%ebp
240	addl	96(%esp),%ecx
241	addl	100(%esp),%esi
242	cmpl	$64,%ebx
243	jb	.L003tail
244	movl	156(%esp),%ebx
245	addl	112(%esp),%edx
246	addl	120(%esp),%edi
247	xorl	(%ebx),%eax
248	xorl	16(%ebx),%ebp
249	movl	%eax,(%esp)
250	movl	152(%esp),%eax
251	xorl	32(%ebx),%ecx
252	xorl	36(%ebx),%esi
253	xorl	48(%ebx),%edx
254	xorl	56(%ebx),%edi
255	movl	%ebp,16(%eax)
256	movl	%ecx,32(%eax)
257	movl	%esi,36(%eax)
258	movl	%edx,48(%eax)
259	movl	%edi,56(%eax)
260	movl	4(%esp),%ebp
261	movl	8(%esp),%ecx
262	movl	12(%esp),%esi
263	movl	20(%esp),%edx
264	movl	24(%esp),%edi
265	addl	$857760878,%ebp
266	addl	$2036477234,%ecx
267	addl	$1797285236,%esi
268	addl	84(%esp),%edx
269	addl	88(%esp),%edi
270	xorl	4(%ebx),%ebp
271	xorl	8(%ebx),%ecx
272	xorl	12(%ebx),%esi
273	xorl	20(%ebx),%edx
274	xorl	24(%ebx),%edi
275	movl	%ebp,4(%eax)
276	movl	%ecx,8(%eax)
277	movl	%esi,12(%eax)
278	movl	%edx,20(%eax)
279	movl	%edi,24(%eax)
280	movl	28(%esp),%ebp
281	movl	40(%esp),%ecx
282	movl	44(%esp),%esi
283	movl	52(%esp),%edx
284	movl	60(%esp),%edi
285	addl	92(%esp),%ebp
286	addl	104(%esp),%ecx
287	addl	108(%esp),%esi
288	addl	116(%esp),%edx
289	addl	124(%esp),%edi
290	xorl	28(%ebx),%ebp
291	xorl	40(%ebx),%ecx
292	xorl	44(%ebx),%esi
293	xorl	52(%ebx),%edx
294	xorl	60(%ebx),%edi
295	leal	64(%ebx),%ebx
296	movl	%ebp,28(%eax)
297	movl	(%esp),%ebp
298	movl	%ecx,40(%eax)
299	movl	160(%esp),%ecx
300	movl	%esi,44(%eax)
301	movl	%edx,52(%eax)
302	movl	%edi,60(%eax)
303	movl	%ebp,(%eax)
304	leal	64(%eax),%eax
305	subl	$64,%ecx
306	jnz	.L001outer_loop
307	jmp	.L004done
308.L003tail:
309	addl	112(%esp),%edx
310	addl	120(%esp),%edi
311	movl	%eax,(%esp)
312	movl	%ebp,16(%esp)
313	movl	%ecx,32(%esp)
314	movl	%esi,36(%esp)
315	movl	%edx,48(%esp)
316	movl	%edi,56(%esp)
317	movl	4(%esp),%ebp
318	movl	8(%esp),%ecx
319	movl	12(%esp),%esi
320	movl	20(%esp),%edx
321	movl	24(%esp),%edi
322	addl	$857760878,%ebp
323	addl	$2036477234,%ecx
324	addl	$1797285236,%esi
325	addl	84(%esp),%edx
326	addl	88(%esp),%edi
327	movl	%ebp,4(%esp)
328	movl	%ecx,8(%esp)
329	movl	%esi,12(%esp)
330	movl	%edx,20(%esp)
331	movl	%edi,24(%esp)
332	movl	28(%esp),%ebp
333	movl	40(%esp),%ecx
334	movl	44(%esp),%esi
335	movl	52(%esp),%edx
336	movl	60(%esp),%edi
337	addl	92(%esp),%ebp
338	addl	104(%esp),%ecx
339	addl	108(%esp),%esi
340	addl	116(%esp),%edx
341	addl	124(%esp),%edi
342	movl	%ebp,28(%esp)
343	movl	156(%esp),%ebp
344	movl	%ecx,40(%esp)
345	movl	152(%esp),%ecx
346	movl	%esi,44(%esp)
347	xorl	%esi,%esi
348	movl	%edx,52(%esp)
349	movl	%edi,60(%esp)
350	xorl	%eax,%eax
351	xorl	%edx,%edx
352.L005tail_loop:
353	movb	(%esi,%ebp,1),%al
354	movb	(%esp,%esi,1),%dl
355	leal	1(%esi),%esi
356	xorb	%dl,%al
357	movb	%al,-1(%ecx,%esi,1)
358	decl	%ebx
359	jnz	.L005tail_loop
360.L004done:
361	addl	$132,%esp
362	popl	%edi
363	popl	%esi
364	popl	%ebx
365	popl	%ebp
366	ret
367.size	ChaCha20_ctr32_nohw,.-.L_ChaCha20_ctr32_nohw_begin
368.globl	ChaCha20_ctr32_ssse3
369.hidden	ChaCha20_ctr32_ssse3
370.type	ChaCha20_ctr32_ssse3,@function
371.align	16
372ChaCha20_ctr32_ssse3:
373.L_ChaCha20_ctr32_ssse3_begin:
374	pushl	%ebp
375	pushl	%ebx
376	pushl	%esi
377	pushl	%edi
378	call	.Lpic_point
379.Lpic_point:
380	popl	%eax
381	movl	20(%esp),%edi
382	movl	24(%esp),%esi
383	movl	28(%esp),%ecx
384	movl	32(%esp),%edx
385	movl	36(%esp),%ebx
386	movl	%esp,%ebp
387	subl	$524,%esp
388	andl	$-64,%esp
389	movl	%ebp,512(%esp)
390	leal	.Lssse3_data-.Lpic_point(%eax),%eax
391	movdqu	(%ebx),%xmm3
392	cmpl	$256,%ecx
393	jb	.L0061x
394	movl	%edx,516(%esp)
395	movl	%ebx,520(%esp)
396	subl	$256,%ecx
397	leal	384(%esp),%ebp
398	movdqu	(%edx),%xmm7
399	pshufd	$0,%xmm3,%xmm0
400	pshufd	$85,%xmm3,%xmm1
401	pshufd	$170,%xmm3,%xmm2
402	pshufd	$255,%xmm3,%xmm3
403	paddd	48(%eax),%xmm0
404	pshufd	$0,%xmm7,%xmm4
405	pshufd	$85,%xmm7,%xmm5
406	psubd	64(%eax),%xmm0
407	pshufd	$170,%xmm7,%xmm6
408	pshufd	$255,%xmm7,%xmm7
409	movdqa	%xmm0,64(%ebp)
410	movdqa	%xmm1,80(%ebp)
411	movdqa	%xmm2,96(%ebp)
412	movdqa	%xmm3,112(%ebp)
413	movdqu	16(%edx),%xmm3
414	movdqa	%xmm4,-64(%ebp)
415	movdqa	%xmm5,-48(%ebp)
416	movdqa	%xmm6,-32(%ebp)
417	movdqa	%xmm7,-16(%ebp)
418	movdqa	32(%eax),%xmm7
419	leal	128(%esp),%ebx
420	pshufd	$0,%xmm3,%xmm0
421	pshufd	$85,%xmm3,%xmm1
422	pshufd	$170,%xmm3,%xmm2
423	pshufd	$255,%xmm3,%xmm3
424	pshufd	$0,%xmm7,%xmm4
425	pshufd	$85,%xmm7,%xmm5
426	pshufd	$170,%xmm7,%xmm6
427	pshufd	$255,%xmm7,%xmm7
428	movdqa	%xmm0,(%ebp)
429	movdqa	%xmm1,16(%ebp)
430	movdqa	%xmm2,32(%ebp)
431	movdqa	%xmm3,48(%ebp)
432	movdqa	%xmm4,-128(%ebp)
433	movdqa	%xmm5,-112(%ebp)
434	movdqa	%xmm6,-96(%ebp)
435	movdqa	%xmm7,-80(%ebp)
436	leal	128(%esi),%esi
437	leal	128(%edi),%edi
438	jmp	.L007outer_loop
439.align	16
440.L007outer_loop:
441	movdqa	-112(%ebp),%xmm1
442	movdqa	-96(%ebp),%xmm2
443	movdqa	-80(%ebp),%xmm3
444	movdqa	-48(%ebp),%xmm5
445	movdqa	-32(%ebp),%xmm6
446	movdqa	-16(%ebp),%xmm7
447	movdqa	%xmm1,-112(%ebx)
448	movdqa	%xmm2,-96(%ebx)
449	movdqa	%xmm3,-80(%ebx)
450	movdqa	%xmm5,-48(%ebx)
451	movdqa	%xmm6,-32(%ebx)
452	movdqa	%xmm7,-16(%ebx)
453	movdqa	32(%ebp),%xmm2
454	movdqa	48(%ebp),%xmm3
455	movdqa	64(%ebp),%xmm4
456	movdqa	80(%ebp),%xmm5
457	movdqa	96(%ebp),%xmm6
458	movdqa	112(%ebp),%xmm7
459	paddd	64(%eax),%xmm4
460	movdqa	%xmm2,32(%ebx)
461	movdqa	%xmm3,48(%ebx)
462	movdqa	%xmm4,64(%ebx)
463	movdqa	%xmm5,80(%ebx)
464	movdqa	%xmm6,96(%ebx)
465	movdqa	%xmm7,112(%ebx)
466	movdqa	%xmm4,64(%ebp)
467	movdqa	-128(%ebp),%xmm0
468	movdqa	%xmm4,%xmm6
469	movdqa	-64(%ebp),%xmm3
470	movdqa	(%ebp),%xmm4
471	movdqa	16(%ebp),%xmm5
472	movl	$10,%edx
473	nop
474.align	16
475.L008loop:
476	paddd	%xmm3,%xmm0
477	movdqa	%xmm3,%xmm2
478	pxor	%xmm0,%xmm6
479	pshufb	(%eax),%xmm6
480	paddd	%xmm6,%xmm4
481	pxor	%xmm4,%xmm2
482	movdqa	-48(%ebx),%xmm3
483	movdqa	%xmm2,%xmm1
484	pslld	$12,%xmm2
485	psrld	$20,%xmm1
486	por	%xmm1,%xmm2
487	movdqa	-112(%ebx),%xmm1
488	paddd	%xmm2,%xmm0
489	movdqa	80(%ebx),%xmm7
490	pxor	%xmm0,%xmm6
491	movdqa	%xmm0,-128(%ebx)
492	pshufb	16(%eax),%xmm6
493	paddd	%xmm6,%xmm4
494	movdqa	%xmm6,64(%ebx)
495	pxor	%xmm4,%xmm2
496	paddd	%xmm3,%xmm1
497	movdqa	%xmm2,%xmm0
498	pslld	$7,%xmm2
499	psrld	$25,%xmm0
500	pxor	%xmm1,%xmm7
501	por	%xmm0,%xmm2
502	movdqa	%xmm4,(%ebx)
503	pshufb	(%eax),%xmm7
504	movdqa	%xmm2,-64(%ebx)
505	paddd	%xmm7,%xmm5
506	movdqa	32(%ebx),%xmm4
507	pxor	%xmm5,%xmm3
508	movdqa	-32(%ebx),%xmm2
509	movdqa	%xmm3,%xmm0
510	pslld	$12,%xmm3
511	psrld	$20,%xmm0
512	por	%xmm0,%xmm3
513	movdqa	-96(%ebx),%xmm0
514	paddd	%xmm3,%xmm1
515	movdqa	96(%ebx),%xmm6
516	pxor	%xmm1,%xmm7
517	movdqa	%xmm1,-112(%ebx)
518	pshufb	16(%eax),%xmm7
519	paddd	%xmm7,%xmm5
520	movdqa	%xmm7,80(%ebx)
521	pxor	%xmm5,%xmm3
522	paddd	%xmm2,%xmm0
523	movdqa	%xmm3,%xmm1
524	pslld	$7,%xmm3
525	psrld	$25,%xmm1
526	pxor	%xmm0,%xmm6
527	por	%xmm1,%xmm3
528	movdqa	%xmm5,16(%ebx)
529	pshufb	(%eax),%xmm6
530	movdqa	%xmm3,-48(%ebx)
531	paddd	%xmm6,%xmm4
532	movdqa	48(%ebx),%xmm5
533	pxor	%xmm4,%xmm2
534	movdqa	-16(%ebx),%xmm3
535	movdqa	%xmm2,%xmm1
536	pslld	$12,%xmm2
537	psrld	$20,%xmm1
538	por	%xmm1,%xmm2
539	movdqa	-80(%ebx),%xmm1
540	paddd	%xmm2,%xmm0
541	movdqa	112(%ebx),%xmm7
542	pxor	%xmm0,%xmm6
543	movdqa	%xmm0,-96(%ebx)
544	pshufb	16(%eax),%xmm6
545	paddd	%xmm6,%xmm4
546	movdqa	%xmm6,96(%ebx)
547	pxor	%xmm4,%xmm2
548	paddd	%xmm3,%xmm1
549	movdqa	%xmm2,%xmm0
550	pslld	$7,%xmm2
551	psrld	$25,%xmm0
552	pxor	%xmm1,%xmm7
553	por	%xmm0,%xmm2
554	pshufb	(%eax),%xmm7
555	movdqa	%xmm2,-32(%ebx)
556	paddd	%xmm7,%xmm5
557	pxor	%xmm5,%xmm3
558	movdqa	-48(%ebx),%xmm2
559	movdqa	%xmm3,%xmm0
560	pslld	$12,%xmm3
561	psrld	$20,%xmm0
562	por	%xmm0,%xmm3
563	movdqa	-128(%ebx),%xmm0
564	paddd	%xmm3,%xmm1
565	pxor	%xmm1,%xmm7
566	movdqa	%xmm1,-80(%ebx)
567	pshufb	16(%eax),%xmm7
568	paddd	%xmm7,%xmm5
569	movdqa	%xmm7,%xmm6
570	pxor	%xmm5,%xmm3
571	paddd	%xmm2,%xmm0
572	movdqa	%xmm3,%xmm1
573	pslld	$7,%xmm3
574	psrld	$25,%xmm1
575	pxor	%xmm0,%xmm6
576	por	%xmm1,%xmm3
577	pshufb	(%eax),%xmm6
578	movdqa	%xmm3,-16(%ebx)
579	paddd	%xmm6,%xmm4
580	pxor	%xmm4,%xmm2
581	movdqa	-32(%ebx),%xmm3
582	movdqa	%xmm2,%xmm1
583	pslld	$12,%xmm2
584	psrld	$20,%xmm1
585	por	%xmm1,%xmm2
586	movdqa	-112(%ebx),%xmm1
587	paddd	%xmm2,%xmm0
588	movdqa	64(%ebx),%xmm7
589	pxor	%xmm0,%xmm6
590	movdqa	%xmm0,-128(%ebx)
591	pshufb	16(%eax),%xmm6
592	paddd	%xmm6,%xmm4
593	movdqa	%xmm6,112(%ebx)
594	pxor	%xmm4,%xmm2
595	paddd	%xmm3,%xmm1
596	movdqa	%xmm2,%xmm0
597	pslld	$7,%xmm2
598	psrld	$25,%xmm0
599	pxor	%xmm1,%xmm7
600	por	%xmm0,%xmm2
601	movdqa	%xmm4,32(%ebx)
602	pshufb	(%eax),%xmm7
603	movdqa	%xmm2,-48(%ebx)
604	paddd	%xmm7,%xmm5
605	movdqa	(%ebx),%xmm4
606	pxor	%xmm5,%xmm3
607	movdqa	-16(%ebx),%xmm2
608	movdqa	%xmm3,%xmm0
609	pslld	$12,%xmm3
610	psrld	$20,%xmm0
611	por	%xmm0,%xmm3
612	movdqa	-96(%ebx),%xmm0
613	paddd	%xmm3,%xmm1
614	movdqa	80(%ebx),%xmm6
615	pxor	%xmm1,%xmm7
616	movdqa	%xmm1,-112(%ebx)
617	pshufb	16(%eax),%xmm7
618	paddd	%xmm7,%xmm5
619	movdqa	%xmm7,64(%ebx)
620	pxor	%xmm5,%xmm3
621	paddd	%xmm2,%xmm0
622	movdqa	%xmm3,%xmm1
623	pslld	$7,%xmm3
624	psrld	$25,%xmm1
625	pxor	%xmm0,%xmm6
626	por	%xmm1,%xmm3
627	movdqa	%xmm5,48(%ebx)
628	pshufb	(%eax),%xmm6
629	movdqa	%xmm3,-32(%ebx)
630	paddd	%xmm6,%xmm4
631	movdqa	16(%ebx),%xmm5
632	pxor	%xmm4,%xmm2
633	movdqa	-64(%ebx),%xmm3
634	movdqa	%xmm2,%xmm1
635	pslld	$12,%xmm2
636	psrld	$20,%xmm1
637	por	%xmm1,%xmm2
638	movdqa	-80(%ebx),%xmm1
639	paddd	%xmm2,%xmm0
640	movdqa	96(%ebx),%xmm7
641	pxor	%xmm0,%xmm6
642	movdqa	%xmm0,-96(%ebx)
643	pshufb	16(%eax),%xmm6
644	paddd	%xmm6,%xmm4
645	movdqa	%xmm6,80(%ebx)
646	pxor	%xmm4,%xmm2
647	paddd	%xmm3,%xmm1
648	movdqa	%xmm2,%xmm0
649	pslld	$7,%xmm2
650	psrld	$25,%xmm0
651	pxor	%xmm1,%xmm7
652	por	%xmm0,%xmm2
653	pshufb	(%eax),%xmm7
654	movdqa	%xmm2,-16(%ebx)
655	paddd	%xmm7,%xmm5
656	pxor	%xmm5,%xmm3
657	movdqa	%xmm3,%xmm0
658	pslld	$12,%xmm3
659	psrld	$20,%xmm0
660	por	%xmm0,%xmm3
661	movdqa	-128(%ebx),%xmm0
662	paddd	%xmm3,%xmm1
663	movdqa	64(%ebx),%xmm6
664	pxor	%xmm1,%xmm7
665	movdqa	%xmm1,-80(%ebx)
666	pshufb	16(%eax),%xmm7
667	paddd	%xmm7,%xmm5
668	movdqa	%xmm7,96(%ebx)
669	pxor	%xmm5,%xmm3
670	movdqa	%xmm3,%xmm1
671	pslld	$7,%xmm3
672	psrld	$25,%xmm1
673	por	%xmm1,%xmm3
674	decl	%edx
675	jnz	.L008loop
676	movdqa	%xmm3,-64(%ebx)
677	movdqa	%xmm4,(%ebx)
678	movdqa	%xmm5,16(%ebx)
679	movdqa	%xmm6,64(%ebx)
680	movdqa	%xmm7,96(%ebx)
681	movdqa	-112(%ebx),%xmm1
682	movdqa	-96(%ebx),%xmm2
683	movdqa	-80(%ebx),%xmm3
684	paddd	-128(%ebp),%xmm0
685	paddd	-112(%ebp),%xmm1
686	paddd	-96(%ebp),%xmm2
687	paddd	-80(%ebp),%xmm3
688	movdqa	%xmm0,%xmm6
689	punpckldq	%xmm1,%xmm0
690	movdqa	%xmm2,%xmm7
691	punpckldq	%xmm3,%xmm2
692	punpckhdq	%xmm1,%xmm6
693	punpckhdq	%xmm3,%xmm7
694	movdqa	%xmm0,%xmm1
695	punpcklqdq	%xmm2,%xmm0
696	movdqa	%xmm6,%xmm3
697	punpcklqdq	%xmm7,%xmm6
698	punpckhqdq	%xmm2,%xmm1
699	punpckhqdq	%xmm7,%xmm3
700	movdqu	-128(%esi),%xmm4
701	movdqu	-64(%esi),%xmm5
702	movdqu	(%esi),%xmm2
703	movdqu	64(%esi),%xmm7
704	leal	16(%esi),%esi
705	pxor	%xmm0,%xmm4
706	movdqa	-64(%ebx),%xmm0
707	pxor	%xmm1,%xmm5
708	movdqa	-48(%ebx),%xmm1
709	pxor	%xmm2,%xmm6
710	movdqa	-32(%ebx),%xmm2
711	pxor	%xmm3,%xmm7
712	movdqa	-16(%ebx),%xmm3
713	movdqu	%xmm4,-128(%edi)
714	movdqu	%xmm5,-64(%edi)
715	movdqu	%xmm6,(%edi)
716	movdqu	%xmm7,64(%edi)
717	leal	16(%edi),%edi
718	paddd	-64(%ebp),%xmm0
719	paddd	-48(%ebp),%xmm1
720	paddd	-32(%ebp),%xmm2
721	paddd	-16(%ebp),%xmm3
722	movdqa	%xmm0,%xmm6
723	punpckldq	%xmm1,%xmm0
724	movdqa	%xmm2,%xmm7
725	punpckldq	%xmm3,%xmm2
726	punpckhdq	%xmm1,%xmm6
727	punpckhdq	%xmm3,%xmm7
728	movdqa	%xmm0,%xmm1
729	punpcklqdq	%xmm2,%xmm0
730	movdqa	%xmm6,%xmm3
731	punpcklqdq	%xmm7,%xmm6
732	punpckhqdq	%xmm2,%xmm1
733	punpckhqdq	%xmm7,%xmm3
734	movdqu	-128(%esi),%xmm4
735	movdqu	-64(%esi),%xmm5
736	movdqu	(%esi),%xmm2
737	movdqu	64(%esi),%xmm7
738	leal	16(%esi),%esi
739	pxor	%xmm0,%xmm4
740	movdqa	(%ebx),%xmm0
741	pxor	%xmm1,%xmm5
742	movdqa	16(%ebx),%xmm1
743	pxor	%xmm2,%xmm6
744	movdqa	32(%ebx),%xmm2
745	pxor	%xmm3,%xmm7
746	movdqa	48(%ebx),%xmm3
747	movdqu	%xmm4,-128(%edi)
748	movdqu	%xmm5,-64(%edi)
749	movdqu	%xmm6,(%edi)
750	movdqu	%xmm7,64(%edi)
751	leal	16(%edi),%edi
752	paddd	(%ebp),%xmm0
753	paddd	16(%ebp),%xmm1
754	paddd	32(%ebp),%xmm2
755	paddd	48(%ebp),%xmm3
756	movdqa	%xmm0,%xmm6
757	punpckldq	%xmm1,%xmm0
758	movdqa	%xmm2,%xmm7
759	punpckldq	%xmm3,%xmm2
760	punpckhdq	%xmm1,%xmm6
761	punpckhdq	%xmm3,%xmm7
762	movdqa	%xmm0,%xmm1
763	punpcklqdq	%xmm2,%xmm0
764	movdqa	%xmm6,%xmm3
765	punpcklqdq	%xmm7,%xmm6
766	punpckhqdq	%xmm2,%xmm1
767	punpckhqdq	%xmm7,%xmm3
768	movdqu	-128(%esi),%xmm4
769	movdqu	-64(%esi),%xmm5
770	movdqu	(%esi),%xmm2
771	movdqu	64(%esi),%xmm7
772	leal	16(%esi),%esi
773	pxor	%xmm0,%xmm4
774	movdqa	64(%ebx),%xmm0
775	pxor	%xmm1,%xmm5
776	movdqa	80(%ebx),%xmm1
777	pxor	%xmm2,%xmm6
778	movdqa	96(%ebx),%xmm2
779	pxor	%xmm3,%xmm7
780	movdqa	112(%ebx),%xmm3
781	movdqu	%xmm4,-128(%edi)
782	movdqu	%xmm5,-64(%edi)
783	movdqu	%xmm6,(%edi)
784	movdqu	%xmm7,64(%edi)
785	leal	16(%edi),%edi
786	paddd	64(%ebp),%xmm0
787	paddd	80(%ebp),%xmm1
788	paddd	96(%ebp),%xmm2
789	paddd	112(%ebp),%xmm3
790	movdqa	%xmm0,%xmm6
791	punpckldq	%xmm1,%xmm0
792	movdqa	%xmm2,%xmm7
793	punpckldq	%xmm3,%xmm2
794	punpckhdq	%xmm1,%xmm6
795	punpckhdq	%xmm3,%xmm7
796	movdqa	%xmm0,%xmm1
797	punpcklqdq	%xmm2,%xmm0
798	movdqa	%xmm6,%xmm3
799	punpcklqdq	%xmm7,%xmm6
800	punpckhqdq	%xmm2,%xmm1
801	punpckhqdq	%xmm7,%xmm3
802	movdqu	-128(%esi),%xmm4
803	movdqu	-64(%esi),%xmm5
804	movdqu	(%esi),%xmm2
805	movdqu	64(%esi),%xmm7
806	leal	208(%esi),%esi
807	pxor	%xmm0,%xmm4
808	pxor	%xmm1,%xmm5
809	pxor	%xmm2,%xmm6
810	pxor	%xmm3,%xmm7
811	movdqu	%xmm4,-128(%edi)
812	movdqu	%xmm5,-64(%edi)
813	movdqu	%xmm6,(%edi)
814	movdqu	%xmm7,64(%edi)
815	leal	208(%edi),%edi
816	subl	$256,%ecx
817	jnc	.L007outer_loop
818	addl	$256,%ecx
819	jz	.L009done
820	movl	520(%esp),%ebx
821	leal	-128(%esi),%esi
822	movl	516(%esp),%edx
823	leal	-128(%edi),%edi
824	movd	64(%ebp),%xmm2
825	movdqu	(%ebx),%xmm3
826	paddd	96(%eax),%xmm2
827	pand	112(%eax),%xmm3
828	por	%xmm2,%xmm3
829.L0061x:
830	movdqa	32(%eax),%xmm0
831	movdqu	(%edx),%xmm1
832	movdqu	16(%edx),%xmm2
833	movdqa	(%eax),%xmm6
834	movdqa	16(%eax),%xmm7
835	movl	%ebp,48(%esp)
836	movdqa	%xmm0,(%esp)
837	movdqa	%xmm1,16(%esp)
838	movdqa	%xmm2,32(%esp)
839	movdqa	%xmm3,48(%esp)
840	movl	$10,%edx
841	jmp	.L010loop1x
842.align	16
843.L011outer1x:
844	movdqa	80(%eax),%xmm3
845	movdqa	(%esp),%xmm0
846	movdqa	16(%esp),%xmm1
847	movdqa	32(%esp),%xmm2
848	paddd	48(%esp),%xmm3
849	movl	$10,%edx
850	movdqa	%xmm3,48(%esp)
851	jmp	.L010loop1x
852.align	16
853.L010loop1x:
854	paddd	%xmm1,%xmm0
855	pxor	%xmm0,%xmm3
856.byte	102,15,56,0,222
857	paddd	%xmm3,%xmm2
858	pxor	%xmm2,%xmm1
859	movdqa	%xmm1,%xmm4
860	psrld	$20,%xmm1
861	pslld	$12,%xmm4
862	por	%xmm4,%xmm1
863	paddd	%xmm1,%xmm0
864	pxor	%xmm0,%xmm3
865.byte	102,15,56,0,223
866	paddd	%xmm3,%xmm2
867	pxor	%xmm2,%xmm1
868	movdqa	%xmm1,%xmm4
869	psrld	$25,%xmm1
870	pslld	$7,%xmm4
871	por	%xmm4,%xmm1
872	pshufd	$78,%xmm2,%xmm2
873	pshufd	$57,%xmm1,%xmm1
874	pshufd	$147,%xmm3,%xmm3
875	nop
876	paddd	%xmm1,%xmm0
877	pxor	%xmm0,%xmm3
878.byte	102,15,56,0,222
879	paddd	%xmm3,%xmm2
880	pxor	%xmm2,%xmm1
881	movdqa	%xmm1,%xmm4
882	psrld	$20,%xmm1
883	pslld	$12,%xmm4
884	por	%xmm4,%xmm1
885	paddd	%xmm1,%xmm0
886	pxor	%xmm0,%xmm3
887.byte	102,15,56,0,223
888	paddd	%xmm3,%xmm2
889	pxor	%xmm2,%xmm1
890	movdqa	%xmm1,%xmm4
891	psrld	$25,%xmm1
892	pslld	$7,%xmm4
893	por	%xmm4,%xmm1
894	pshufd	$78,%xmm2,%xmm2
895	pshufd	$147,%xmm1,%xmm1
896	pshufd	$57,%xmm3,%xmm3
897	decl	%edx
898	jnz	.L010loop1x
899	paddd	(%esp),%xmm0
900	paddd	16(%esp),%xmm1
901	paddd	32(%esp),%xmm2
902	paddd	48(%esp),%xmm3
903	cmpl	$64,%ecx
904	jb	.L012tail
905	movdqu	(%esi),%xmm4
906	movdqu	16(%esi),%xmm5
907	pxor	%xmm4,%xmm0
908	movdqu	32(%esi),%xmm4
909	pxor	%xmm5,%xmm1
910	movdqu	48(%esi),%xmm5
911	pxor	%xmm4,%xmm2
912	pxor	%xmm5,%xmm3
913	leal	64(%esi),%esi
914	movdqu	%xmm0,(%edi)
915	movdqu	%xmm1,16(%edi)
916	movdqu	%xmm2,32(%edi)
917	movdqu	%xmm3,48(%edi)
918	leal	64(%edi),%edi
919	subl	$64,%ecx
920	jnz	.L011outer1x
921	jmp	.L009done
922.L012tail:
923	movdqa	%xmm0,(%esp)
924	movdqa	%xmm1,16(%esp)
925	movdqa	%xmm2,32(%esp)
926	movdqa	%xmm3,48(%esp)
927	xorl	%eax,%eax
928	xorl	%edx,%edx
929	xorl	%ebp,%ebp
930.L013tail_loop:
931	movb	(%esp,%ebp,1),%al
932	movb	(%esi,%ebp,1),%dl
933	leal	1(%ebp),%ebp
934	xorb	%dl,%al
935	movb	%al,-1(%edi,%ebp,1)
936	decl	%ecx
937	jnz	.L013tail_loop
938.L009done:
939	movl	512(%esp),%esp
940	popl	%edi
941	popl	%esi
942	popl	%ebx
943	popl	%ebp
944	ret
945.size	ChaCha20_ctr32_ssse3,.-.L_ChaCha20_ctr32_ssse3_begin
946.align	64
947.Lssse3_data:
948.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
949.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
950.long	1634760805,857760878,2036477234,1797285236
951.long	0,1,2,3
952.long	4,4,4,4
953.long	1,0,0,0
954.long	4,0,0,0
955.long	0,-1,-1,-1
956.align	64
957.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
958.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
959.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
960.byte	114,103,62,0
961#endif  // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
962