1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
7.text
8.extern	OPENSSL_ia32cap_P
9.hidden OPENSSL_ia32cap_P
10
11
12.section	.rodata
13.align	64
14.Lpoly:
15.quad	0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
16
17.LOne:
18.long	1,1,1,1,1,1,1,1
19.LTwo:
20.long	2,2,2,2,2,2,2,2
21.LThree:
22.long	3,3,3,3,3,3,3,3
23.LONE_mont:
24.quad	0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
25
26
27.Lord:
28.quad	0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
29.LordK:
30.quad	0xccd1c8aaee00bc4f
31.text
32
33
34
35.globl	ecp_nistz256_neg
36.hidden ecp_nistz256_neg
37.type	ecp_nistz256_neg,@function
38.align	32
39ecp_nistz256_neg:
40.cfi_startproc
41_CET_ENDBR
42	pushq	%r12
43.cfi_adjust_cfa_offset	8
44.cfi_offset	%r12,-16
45	pushq	%r13
46.cfi_adjust_cfa_offset	8
47.cfi_offset	%r13,-24
48.Lneg_body:
49
50	xorq	%r8,%r8
51	xorq	%r9,%r9
52	xorq	%r10,%r10
53	xorq	%r11,%r11
54	xorq	%r13,%r13
55
56	subq	0(%rsi),%r8
57	sbbq	8(%rsi),%r9
58	sbbq	16(%rsi),%r10
59	movq	%r8,%rax
60	sbbq	24(%rsi),%r11
61	leaq	.Lpoly(%rip),%rsi
62	movq	%r9,%rdx
63	sbbq	$0,%r13
64
65	addq	0(%rsi),%r8
66	movq	%r10,%rcx
67	adcq	8(%rsi),%r9
68	adcq	16(%rsi),%r10
69	movq	%r11,%r12
70	adcq	24(%rsi),%r11
71	testq	%r13,%r13
72
73	cmovzq	%rax,%r8
74	cmovzq	%rdx,%r9
75	movq	%r8,0(%rdi)
76	cmovzq	%rcx,%r10
77	movq	%r9,8(%rdi)
78	cmovzq	%r12,%r11
79	movq	%r10,16(%rdi)
80	movq	%r11,24(%rdi)
81
82	movq	0(%rsp),%r13
83.cfi_restore	%r13
84	movq	8(%rsp),%r12
85.cfi_restore	%r12
86	leaq	16(%rsp),%rsp
87.cfi_adjust_cfa_offset	-16
88.Lneg_epilogue:
89	ret
90.cfi_endproc
91.size	ecp_nistz256_neg,.-ecp_nistz256_neg
92
93
94
95
96
97
98.globl	ecp_nistz256_ord_mul_mont
99.hidden ecp_nistz256_ord_mul_mont
100.type	ecp_nistz256_ord_mul_mont,@function
101.align	32
102ecp_nistz256_ord_mul_mont:
103.cfi_startproc
104_CET_ENDBR
105	leaq	OPENSSL_ia32cap_P(%rip),%rcx
106	movq	8(%rcx),%rcx
107	andl	$0x80100,%ecx
108	cmpl	$0x80100,%ecx
109	je	.Lecp_nistz256_ord_mul_montx
110	pushq	%rbp
111.cfi_adjust_cfa_offset	8
112.cfi_offset	%rbp,-16
113	pushq	%rbx
114.cfi_adjust_cfa_offset	8
115.cfi_offset	%rbx,-24
116	pushq	%r12
117.cfi_adjust_cfa_offset	8
118.cfi_offset	%r12,-32
119	pushq	%r13
120.cfi_adjust_cfa_offset	8
121.cfi_offset	%r13,-40
122	pushq	%r14
123.cfi_adjust_cfa_offset	8
124.cfi_offset	%r14,-48
125	pushq	%r15
126.cfi_adjust_cfa_offset	8
127.cfi_offset	%r15,-56
128.Lord_mul_body:
129
130	movq	0(%rdx),%rax
131	movq	%rdx,%rbx
132	leaq	.Lord(%rip),%r14
133	movq	.LordK(%rip),%r15
134
135
136	movq	%rax,%rcx
137	mulq	0(%rsi)
138	movq	%rax,%r8
139	movq	%rcx,%rax
140	movq	%rdx,%r9
141
142	mulq	8(%rsi)
143	addq	%rax,%r9
144	movq	%rcx,%rax
145	adcq	$0,%rdx
146	movq	%rdx,%r10
147
148	mulq	16(%rsi)
149	addq	%rax,%r10
150	movq	%rcx,%rax
151	adcq	$0,%rdx
152
153	movq	%r8,%r13
154	imulq	%r15,%r8
155
156	movq	%rdx,%r11
157	mulq	24(%rsi)
158	addq	%rax,%r11
159	movq	%r8,%rax
160	adcq	$0,%rdx
161	movq	%rdx,%r12
162
163
164	mulq	0(%r14)
165	movq	%r8,%rbp
166	addq	%rax,%r13
167	movq	%r8,%rax
168	adcq	$0,%rdx
169	movq	%rdx,%rcx
170
171	subq	%r8,%r10
172	sbbq	$0,%r8
173
174	mulq	8(%r14)
175	addq	%rcx,%r9
176	adcq	$0,%rdx
177	addq	%rax,%r9
178	movq	%rbp,%rax
179	adcq	%rdx,%r10
180	movq	%rbp,%rdx
181	adcq	$0,%r8
182
183	shlq	$32,%rax
184	shrq	$32,%rdx
185	subq	%rax,%r11
186	movq	8(%rbx),%rax
187	sbbq	%rdx,%rbp
188
189	addq	%r8,%r11
190	adcq	%rbp,%r12
191	adcq	$0,%r13
192
193
194	movq	%rax,%rcx
195	mulq	0(%rsi)
196	addq	%rax,%r9
197	movq	%rcx,%rax
198	adcq	$0,%rdx
199	movq	%rdx,%rbp
200
201	mulq	8(%rsi)
202	addq	%rbp,%r10
203	adcq	$0,%rdx
204	addq	%rax,%r10
205	movq	%rcx,%rax
206	adcq	$0,%rdx
207	movq	%rdx,%rbp
208
209	mulq	16(%rsi)
210	addq	%rbp,%r11
211	adcq	$0,%rdx
212	addq	%rax,%r11
213	movq	%rcx,%rax
214	adcq	$0,%rdx
215
216	movq	%r9,%rcx
217	imulq	%r15,%r9
218
219	movq	%rdx,%rbp
220	mulq	24(%rsi)
221	addq	%rbp,%r12
222	adcq	$0,%rdx
223	xorq	%r8,%r8
224	addq	%rax,%r12
225	movq	%r9,%rax
226	adcq	%rdx,%r13
227	adcq	$0,%r8
228
229
230	mulq	0(%r14)
231	movq	%r9,%rbp
232	addq	%rax,%rcx
233	movq	%r9,%rax
234	adcq	%rdx,%rcx
235
236	subq	%r9,%r11
237	sbbq	$0,%r9
238
239	mulq	8(%r14)
240	addq	%rcx,%r10
241	adcq	$0,%rdx
242	addq	%rax,%r10
243	movq	%rbp,%rax
244	adcq	%rdx,%r11
245	movq	%rbp,%rdx
246	adcq	$0,%r9
247
248	shlq	$32,%rax
249	shrq	$32,%rdx
250	subq	%rax,%r12
251	movq	16(%rbx),%rax
252	sbbq	%rdx,%rbp
253
254	addq	%r9,%r12
255	adcq	%rbp,%r13
256	adcq	$0,%r8
257
258
259	movq	%rax,%rcx
260	mulq	0(%rsi)
261	addq	%rax,%r10
262	movq	%rcx,%rax
263	adcq	$0,%rdx
264	movq	%rdx,%rbp
265
266	mulq	8(%rsi)
267	addq	%rbp,%r11
268	adcq	$0,%rdx
269	addq	%rax,%r11
270	movq	%rcx,%rax
271	adcq	$0,%rdx
272	movq	%rdx,%rbp
273
274	mulq	16(%rsi)
275	addq	%rbp,%r12
276	adcq	$0,%rdx
277	addq	%rax,%r12
278	movq	%rcx,%rax
279	adcq	$0,%rdx
280
281	movq	%r10,%rcx
282	imulq	%r15,%r10
283
284	movq	%rdx,%rbp
285	mulq	24(%rsi)
286	addq	%rbp,%r13
287	adcq	$0,%rdx
288	xorq	%r9,%r9
289	addq	%rax,%r13
290	movq	%r10,%rax
291	adcq	%rdx,%r8
292	adcq	$0,%r9
293
294
295	mulq	0(%r14)
296	movq	%r10,%rbp
297	addq	%rax,%rcx
298	movq	%r10,%rax
299	adcq	%rdx,%rcx
300
301	subq	%r10,%r12
302	sbbq	$0,%r10
303
304	mulq	8(%r14)
305	addq	%rcx,%r11
306	adcq	$0,%rdx
307	addq	%rax,%r11
308	movq	%rbp,%rax
309	adcq	%rdx,%r12
310	movq	%rbp,%rdx
311	adcq	$0,%r10
312
313	shlq	$32,%rax
314	shrq	$32,%rdx
315	subq	%rax,%r13
316	movq	24(%rbx),%rax
317	sbbq	%rdx,%rbp
318
319	addq	%r10,%r13
320	adcq	%rbp,%r8
321	adcq	$0,%r9
322
323
324	movq	%rax,%rcx
325	mulq	0(%rsi)
326	addq	%rax,%r11
327	movq	%rcx,%rax
328	adcq	$0,%rdx
329	movq	%rdx,%rbp
330
331	mulq	8(%rsi)
332	addq	%rbp,%r12
333	adcq	$0,%rdx
334	addq	%rax,%r12
335	movq	%rcx,%rax
336	adcq	$0,%rdx
337	movq	%rdx,%rbp
338
339	mulq	16(%rsi)
340	addq	%rbp,%r13
341	adcq	$0,%rdx
342	addq	%rax,%r13
343	movq	%rcx,%rax
344	adcq	$0,%rdx
345
346	movq	%r11,%rcx
347	imulq	%r15,%r11
348
349	movq	%rdx,%rbp
350	mulq	24(%rsi)
351	addq	%rbp,%r8
352	adcq	$0,%rdx
353	xorq	%r10,%r10
354	addq	%rax,%r8
355	movq	%r11,%rax
356	adcq	%rdx,%r9
357	adcq	$0,%r10
358
359
360	mulq	0(%r14)
361	movq	%r11,%rbp
362	addq	%rax,%rcx
363	movq	%r11,%rax
364	adcq	%rdx,%rcx
365
366	subq	%r11,%r13
367	sbbq	$0,%r11
368
369	mulq	8(%r14)
370	addq	%rcx,%r12
371	adcq	$0,%rdx
372	addq	%rax,%r12
373	movq	%rbp,%rax
374	adcq	%rdx,%r13
375	movq	%rbp,%rdx
376	adcq	$0,%r11
377
378	shlq	$32,%rax
379	shrq	$32,%rdx
380	subq	%rax,%r8
381	sbbq	%rdx,%rbp
382
383	addq	%r11,%r8
384	adcq	%rbp,%r9
385	adcq	$0,%r10
386
387
388	movq	%r12,%rsi
389	subq	0(%r14),%r12
390	movq	%r13,%r11
391	sbbq	8(%r14),%r13
392	movq	%r8,%rcx
393	sbbq	16(%r14),%r8
394	movq	%r9,%rbp
395	sbbq	24(%r14),%r9
396	sbbq	$0,%r10
397
398	cmovcq	%rsi,%r12
399	cmovcq	%r11,%r13
400	cmovcq	%rcx,%r8
401	cmovcq	%rbp,%r9
402
403	movq	%r12,0(%rdi)
404	movq	%r13,8(%rdi)
405	movq	%r8,16(%rdi)
406	movq	%r9,24(%rdi)
407
408	movq	0(%rsp),%r15
409.cfi_restore	%r15
410	movq	8(%rsp),%r14
411.cfi_restore	%r14
412	movq	16(%rsp),%r13
413.cfi_restore	%r13
414	movq	24(%rsp),%r12
415.cfi_restore	%r12
416	movq	32(%rsp),%rbx
417.cfi_restore	%rbx
418	movq	40(%rsp),%rbp
419.cfi_restore	%rbp
420	leaq	48(%rsp),%rsp
421.cfi_adjust_cfa_offset	-48
422.Lord_mul_epilogue:
423	ret
424.cfi_endproc
425.size	ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
426
427
428
429
430
431
432
433.globl	ecp_nistz256_ord_sqr_mont
434.hidden ecp_nistz256_ord_sqr_mont
435.type	ecp_nistz256_ord_sqr_mont,@function
436.align	32
437ecp_nistz256_ord_sqr_mont:
438.cfi_startproc
439_CET_ENDBR
440	leaq	OPENSSL_ia32cap_P(%rip),%rcx
441	movq	8(%rcx),%rcx
442	andl	$0x80100,%ecx
443	cmpl	$0x80100,%ecx
444	je	.Lecp_nistz256_ord_sqr_montx
445	pushq	%rbp
446.cfi_adjust_cfa_offset	8
447.cfi_offset	%rbp,-16
448	pushq	%rbx
449.cfi_adjust_cfa_offset	8
450.cfi_offset	%rbx,-24
451	pushq	%r12
452.cfi_adjust_cfa_offset	8
453.cfi_offset	%r12,-32
454	pushq	%r13
455.cfi_adjust_cfa_offset	8
456.cfi_offset	%r13,-40
457	pushq	%r14
458.cfi_adjust_cfa_offset	8
459.cfi_offset	%r14,-48
460	pushq	%r15
461.cfi_adjust_cfa_offset	8
462.cfi_offset	%r15,-56
463.Lord_sqr_body:
464
465	movq	0(%rsi),%r8
466	movq	8(%rsi),%rax
467	movq	16(%rsi),%r14
468	movq	24(%rsi),%r15
469	leaq	.Lord(%rip),%rsi
470	movq	%rdx,%rbx
471	jmp	.Loop_ord_sqr
472
473.align	32
474.Loop_ord_sqr:
475
476	movq	%rax,%rbp
477	mulq	%r8
478	movq	%rax,%r9
479.byte	102,72,15,110,205
480	movq	%r14,%rax
481	movq	%rdx,%r10
482
483	mulq	%r8
484	addq	%rax,%r10
485	movq	%r15,%rax
486.byte	102,73,15,110,214
487	adcq	$0,%rdx
488	movq	%rdx,%r11
489
490	mulq	%r8
491	addq	%rax,%r11
492	movq	%r15,%rax
493.byte	102,73,15,110,223
494	adcq	$0,%rdx
495	movq	%rdx,%r12
496
497
498	mulq	%r14
499	movq	%rax,%r13
500	movq	%r14,%rax
501	movq	%rdx,%r14
502
503
504	mulq	%rbp
505	addq	%rax,%r11
506	movq	%r15,%rax
507	adcq	$0,%rdx
508	movq	%rdx,%r15
509
510	mulq	%rbp
511	addq	%rax,%r12
512	adcq	$0,%rdx
513
514	addq	%r15,%r12
515	adcq	%rdx,%r13
516	adcq	$0,%r14
517
518
519	xorq	%r15,%r15
520	movq	%r8,%rax
521	addq	%r9,%r9
522	adcq	%r10,%r10
523	adcq	%r11,%r11
524	adcq	%r12,%r12
525	adcq	%r13,%r13
526	adcq	%r14,%r14
527	adcq	$0,%r15
528
529
530	mulq	%rax
531	movq	%rax,%r8
532.byte	102,72,15,126,200
533	movq	%rdx,%rbp
534
535	mulq	%rax
536	addq	%rbp,%r9
537	adcq	%rax,%r10
538.byte	102,72,15,126,208
539	adcq	$0,%rdx
540	movq	%rdx,%rbp
541
542	mulq	%rax
543	addq	%rbp,%r11
544	adcq	%rax,%r12
545.byte	102,72,15,126,216
546	adcq	$0,%rdx
547	movq	%rdx,%rbp
548
549	movq	%r8,%rcx
550	imulq	32(%rsi),%r8
551
552	mulq	%rax
553	addq	%rbp,%r13
554	adcq	%rax,%r14
555	movq	0(%rsi),%rax
556	adcq	%rdx,%r15
557
558
559	mulq	%r8
560	movq	%r8,%rbp
561	addq	%rax,%rcx
562	movq	8(%rsi),%rax
563	adcq	%rdx,%rcx
564
565	subq	%r8,%r10
566	sbbq	$0,%rbp
567
568	mulq	%r8
569	addq	%rcx,%r9
570	adcq	$0,%rdx
571	addq	%rax,%r9
572	movq	%r8,%rax
573	adcq	%rdx,%r10
574	movq	%r8,%rdx
575	adcq	$0,%rbp
576
577	movq	%r9,%rcx
578	imulq	32(%rsi),%r9
579
580	shlq	$32,%rax
581	shrq	$32,%rdx
582	subq	%rax,%r11
583	movq	0(%rsi),%rax
584	sbbq	%rdx,%r8
585
586	addq	%rbp,%r11
587	adcq	$0,%r8
588
589
590	mulq	%r9
591	movq	%r9,%rbp
592	addq	%rax,%rcx
593	movq	8(%rsi),%rax
594	adcq	%rdx,%rcx
595
596	subq	%r9,%r11
597	sbbq	$0,%rbp
598
599	mulq	%r9
600	addq	%rcx,%r10
601	adcq	$0,%rdx
602	addq	%rax,%r10
603	movq	%r9,%rax
604	adcq	%rdx,%r11
605	movq	%r9,%rdx
606	adcq	$0,%rbp
607
608	movq	%r10,%rcx
609	imulq	32(%rsi),%r10
610
611	shlq	$32,%rax
612	shrq	$32,%rdx
613	subq	%rax,%r8
614	movq	0(%rsi),%rax
615	sbbq	%rdx,%r9
616
617	addq	%rbp,%r8
618	adcq	$0,%r9
619
620
621	mulq	%r10
622	movq	%r10,%rbp
623	addq	%rax,%rcx
624	movq	8(%rsi),%rax
625	adcq	%rdx,%rcx
626
627	subq	%r10,%r8
628	sbbq	$0,%rbp
629
630	mulq	%r10
631	addq	%rcx,%r11
632	adcq	$0,%rdx
633	addq	%rax,%r11
634	movq	%r10,%rax
635	adcq	%rdx,%r8
636	movq	%r10,%rdx
637	adcq	$0,%rbp
638
639	movq	%r11,%rcx
640	imulq	32(%rsi),%r11
641
642	shlq	$32,%rax
643	shrq	$32,%rdx
644	subq	%rax,%r9
645	movq	0(%rsi),%rax
646	sbbq	%rdx,%r10
647
648	addq	%rbp,%r9
649	adcq	$0,%r10
650
651
652	mulq	%r11
653	movq	%r11,%rbp
654	addq	%rax,%rcx
655	movq	8(%rsi),%rax
656	adcq	%rdx,%rcx
657
658	subq	%r11,%r9
659	sbbq	$0,%rbp
660
661	mulq	%r11
662	addq	%rcx,%r8
663	adcq	$0,%rdx
664	addq	%rax,%r8
665	movq	%r11,%rax
666	adcq	%rdx,%r9
667	movq	%r11,%rdx
668	adcq	$0,%rbp
669
670	shlq	$32,%rax
671	shrq	$32,%rdx
672	subq	%rax,%r10
673	sbbq	%rdx,%r11
674
675	addq	%rbp,%r10
676	adcq	$0,%r11
677
678
679	xorq	%rdx,%rdx
680	addq	%r12,%r8
681	adcq	%r13,%r9
682	movq	%r8,%r12
683	adcq	%r14,%r10
684	adcq	%r15,%r11
685	movq	%r9,%rax
686	adcq	$0,%rdx
687
688
689	subq	0(%rsi),%r8
690	movq	%r10,%r14
691	sbbq	8(%rsi),%r9
692	sbbq	16(%rsi),%r10
693	movq	%r11,%r15
694	sbbq	24(%rsi),%r11
695	sbbq	$0,%rdx
696
697	cmovcq	%r12,%r8
698	cmovncq	%r9,%rax
699	cmovncq	%r10,%r14
700	cmovncq	%r11,%r15
701
702	decq	%rbx
703	jnz	.Loop_ord_sqr
704
705	movq	%r8,0(%rdi)
706	movq	%rax,8(%rdi)
707	pxor	%xmm1,%xmm1
708	movq	%r14,16(%rdi)
709	pxor	%xmm2,%xmm2
710	movq	%r15,24(%rdi)
711	pxor	%xmm3,%xmm3
712
713	movq	0(%rsp),%r15
714.cfi_restore	%r15
715	movq	8(%rsp),%r14
716.cfi_restore	%r14
717	movq	16(%rsp),%r13
718.cfi_restore	%r13
719	movq	24(%rsp),%r12
720.cfi_restore	%r12
721	movq	32(%rsp),%rbx
722.cfi_restore	%rbx
723	movq	40(%rsp),%rbp
724.cfi_restore	%rbp
725	leaq	48(%rsp),%rsp
726.cfi_adjust_cfa_offset	-48
727.Lord_sqr_epilogue:
728	ret
729.cfi_endproc
730.size	ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
731
732.type	ecp_nistz256_ord_mul_montx,@function
733.align	32
734ecp_nistz256_ord_mul_montx:
735.cfi_startproc
736.Lecp_nistz256_ord_mul_montx:
737	pushq	%rbp
738.cfi_adjust_cfa_offset	8
739.cfi_offset	%rbp,-16
740	pushq	%rbx
741.cfi_adjust_cfa_offset	8
742.cfi_offset	%rbx,-24
743	pushq	%r12
744.cfi_adjust_cfa_offset	8
745.cfi_offset	%r12,-32
746	pushq	%r13
747.cfi_adjust_cfa_offset	8
748.cfi_offset	%r13,-40
749	pushq	%r14
750.cfi_adjust_cfa_offset	8
751.cfi_offset	%r14,-48
752	pushq	%r15
753.cfi_adjust_cfa_offset	8
754.cfi_offset	%r15,-56
755.Lord_mulx_body:
756
757	movq	%rdx,%rbx
758	movq	0(%rdx),%rdx
759	movq	0(%rsi),%r9
760	movq	8(%rsi),%r10
761	movq	16(%rsi),%r11
762	movq	24(%rsi),%r12
763	leaq	-128(%rsi),%rsi
764	leaq	.Lord-128(%rip),%r14
765	movq	.LordK(%rip),%r15
766
767
768	mulxq	%r9,%r8,%r9
769	mulxq	%r10,%rcx,%r10
770	mulxq	%r11,%rbp,%r11
771	addq	%rcx,%r9
772	mulxq	%r12,%rcx,%r12
773	movq	%r8,%rdx
774	mulxq	%r15,%rdx,%rax
775	adcq	%rbp,%r10
776	adcq	%rcx,%r11
777	adcq	$0,%r12
778
779
780	xorq	%r13,%r13
781	mulxq	0+128(%r14),%rcx,%rbp
782	adcxq	%rcx,%r8
783	adoxq	%rbp,%r9
784
785	mulxq	8+128(%r14),%rcx,%rbp
786	adcxq	%rcx,%r9
787	adoxq	%rbp,%r10
788
789	mulxq	16+128(%r14),%rcx,%rbp
790	adcxq	%rcx,%r10
791	adoxq	%rbp,%r11
792
793	mulxq	24+128(%r14),%rcx,%rbp
794	movq	8(%rbx),%rdx
795	adcxq	%rcx,%r11
796	adoxq	%rbp,%r12
797	adcxq	%r8,%r12
798	adoxq	%r8,%r13
799	adcq	$0,%r13
800
801
802	mulxq	0+128(%rsi),%rcx,%rbp
803	adcxq	%rcx,%r9
804	adoxq	%rbp,%r10
805
806	mulxq	8+128(%rsi),%rcx,%rbp
807	adcxq	%rcx,%r10
808	adoxq	%rbp,%r11
809
810	mulxq	16+128(%rsi),%rcx,%rbp
811	adcxq	%rcx,%r11
812	adoxq	%rbp,%r12
813
814	mulxq	24+128(%rsi),%rcx,%rbp
815	movq	%r9,%rdx
816	mulxq	%r15,%rdx,%rax
817	adcxq	%rcx,%r12
818	adoxq	%rbp,%r13
819
820	adcxq	%r8,%r13
821	adoxq	%r8,%r8
822	adcq	$0,%r8
823
824
825	mulxq	0+128(%r14),%rcx,%rbp
826	adcxq	%rcx,%r9
827	adoxq	%rbp,%r10
828
829	mulxq	8+128(%r14),%rcx,%rbp
830	adcxq	%rcx,%r10
831	adoxq	%rbp,%r11
832
833	mulxq	16+128(%r14),%rcx,%rbp
834	adcxq	%rcx,%r11
835	adoxq	%rbp,%r12
836
837	mulxq	24+128(%r14),%rcx,%rbp
838	movq	16(%rbx),%rdx
839	adcxq	%rcx,%r12
840	adoxq	%rbp,%r13
841	adcxq	%r9,%r13
842	adoxq	%r9,%r8
843	adcq	$0,%r8
844
845
846	mulxq	0+128(%rsi),%rcx,%rbp
847	adcxq	%rcx,%r10
848	adoxq	%rbp,%r11
849
850	mulxq	8+128(%rsi),%rcx,%rbp
851	adcxq	%rcx,%r11
852	adoxq	%rbp,%r12
853
854	mulxq	16+128(%rsi),%rcx,%rbp
855	adcxq	%rcx,%r12
856	adoxq	%rbp,%r13
857
858	mulxq	24+128(%rsi),%rcx,%rbp
859	movq	%r10,%rdx
860	mulxq	%r15,%rdx,%rax
861	adcxq	%rcx,%r13
862	adoxq	%rbp,%r8
863
864	adcxq	%r9,%r8
865	adoxq	%r9,%r9
866	adcq	$0,%r9
867
868
869	mulxq	0+128(%r14),%rcx,%rbp
870	adcxq	%rcx,%r10
871	adoxq	%rbp,%r11
872
873	mulxq	8+128(%r14),%rcx,%rbp
874	adcxq	%rcx,%r11
875	adoxq	%rbp,%r12
876
877	mulxq	16+128(%r14),%rcx,%rbp
878	adcxq	%rcx,%r12
879	adoxq	%rbp,%r13
880
881	mulxq	24+128(%r14),%rcx,%rbp
882	movq	24(%rbx),%rdx
883	adcxq	%rcx,%r13
884	adoxq	%rbp,%r8
885	adcxq	%r10,%r8
886	adoxq	%r10,%r9
887	adcq	$0,%r9
888
889
890	mulxq	0+128(%rsi),%rcx,%rbp
891	adcxq	%rcx,%r11
892	adoxq	%rbp,%r12
893
894	mulxq	8+128(%rsi),%rcx,%rbp
895	adcxq	%rcx,%r12
896	adoxq	%rbp,%r13
897
898	mulxq	16+128(%rsi),%rcx,%rbp
899	adcxq	%rcx,%r13
900	adoxq	%rbp,%r8
901
902	mulxq	24+128(%rsi),%rcx,%rbp
903	movq	%r11,%rdx
904	mulxq	%r15,%rdx,%rax
905	adcxq	%rcx,%r8
906	adoxq	%rbp,%r9
907
908	adcxq	%r10,%r9
909	adoxq	%r10,%r10
910	adcq	$0,%r10
911
912
913	mulxq	0+128(%r14),%rcx,%rbp
914	adcxq	%rcx,%r11
915	adoxq	%rbp,%r12
916
917	mulxq	8+128(%r14),%rcx,%rbp
918	adcxq	%rcx,%r12
919	adoxq	%rbp,%r13
920
921	mulxq	16+128(%r14),%rcx,%rbp
922	adcxq	%rcx,%r13
923	adoxq	%rbp,%r8
924
925	mulxq	24+128(%r14),%rcx,%rbp
926	leaq	128(%r14),%r14
927	movq	%r12,%rbx
928	adcxq	%rcx,%r8
929	adoxq	%rbp,%r9
930	movq	%r13,%rdx
931	adcxq	%r11,%r9
932	adoxq	%r11,%r10
933	adcq	$0,%r10
934
935
936
937	movq	%r8,%rcx
938	subq	0(%r14),%r12
939	sbbq	8(%r14),%r13
940	sbbq	16(%r14),%r8
941	movq	%r9,%rbp
942	sbbq	24(%r14),%r9
943	sbbq	$0,%r10
944
945	cmovcq	%rbx,%r12
946	cmovcq	%rdx,%r13
947	cmovcq	%rcx,%r8
948	cmovcq	%rbp,%r9
949
950	movq	%r12,0(%rdi)
951	movq	%r13,8(%rdi)
952	movq	%r8,16(%rdi)
953	movq	%r9,24(%rdi)
954
955	movq	0(%rsp),%r15
956.cfi_restore	%r15
957	movq	8(%rsp),%r14
958.cfi_restore	%r14
959	movq	16(%rsp),%r13
960.cfi_restore	%r13
961	movq	24(%rsp),%r12
962.cfi_restore	%r12
963	movq	32(%rsp),%rbx
964.cfi_restore	%rbx
965	movq	40(%rsp),%rbp
966.cfi_restore	%rbp
967	leaq	48(%rsp),%rsp
968.cfi_adjust_cfa_offset	-48
969.Lord_mulx_epilogue:
970	ret
971.cfi_endproc
972.size	ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx
973
974.type	ecp_nistz256_ord_sqr_montx,@function
975.align	32
976ecp_nistz256_ord_sqr_montx:
977.cfi_startproc
978.Lecp_nistz256_ord_sqr_montx:
979	pushq	%rbp
980.cfi_adjust_cfa_offset	8
981.cfi_offset	%rbp,-16
982	pushq	%rbx
983.cfi_adjust_cfa_offset	8
984.cfi_offset	%rbx,-24
985	pushq	%r12
986.cfi_adjust_cfa_offset	8
987.cfi_offset	%r12,-32
988	pushq	%r13
989.cfi_adjust_cfa_offset	8
990.cfi_offset	%r13,-40
991	pushq	%r14
992.cfi_adjust_cfa_offset	8
993.cfi_offset	%r14,-48
994	pushq	%r15
995.cfi_adjust_cfa_offset	8
996.cfi_offset	%r15,-56
997.Lord_sqrx_body:
998
999	movq	%rdx,%rbx
1000	movq	0(%rsi),%rdx
1001	movq	8(%rsi),%r14
1002	movq	16(%rsi),%r15
1003	movq	24(%rsi),%r8
1004	leaq	.Lord(%rip),%rsi
1005	jmp	.Loop_ord_sqrx
1006
1007.align	32
1008.Loop_ord_sqrx:
1009	mulxq	%r14,%r9,%r10
1010	mulxq	%r15,%rcx,%r11
1011	movq	%rdx,%rax
1012.byte	102,73,15,110,206
1013	mulxq	%r8,%rbp,%r12
1014	movq	%r14,%rdx
1015	addq	%rcx,%r10
1016.byte	102,73,15,110,215
1017	adcq	%rbp,%r11
1018	adcq	$0,%r12
1019	xorq	%r13,%r13
1020
1021	mulxq	%r15,%rcx,%rbp
1022	adcxq	%rcx,%r11
1023	adoxq	%rbp,%r12
1024
1025	mulxq	%r8,%rcx,%rbp
1026	movq	%r15,%rdx
1027	adcxq	%rcx,%r12
1028	adoxq	%rbp,%r13
1029	adcq	$0,%r13
1030
1031	mulxq	%r8,%rcx,%r14
1032	movq	%rax,%rdx
1033.byte	102,73,15,110,216
1034	xorq	%r15,%r15
1035	adcxq	%r9,%r9
1036	adoxq	%rcx,%r13
1037	adcxq	%r10,%r10
1038	adoxq	%r15,%r14
1039
1040
1041	mulxq	%rdx,%r8,%rbp
1042.byte	102,72,15,126,202
1043	adcxq	%r11,%r11
1044	adoxq	%rbp,%r9
1045	adcxq	%r12,%r12
1046	mulxq	%rdx,%rcx,%rax
1047.byte	102,72,15,126,210
1048	adcxq	%r13,%r13
1049	adoxq	%rcx,%r10
1050	adcxq	%r14,%r14
1051	mulxq	%rdx,%rcx,%rbp
1052.byte	0x67
1053.byte	102,72,15,126,218
1054	adoxq	%rax,%r11
1055	adcxq	%r15,%r15
1056	adoxq	%rcx,%r12
1057	adoxq	%rbp,%r13
1058	mulxq	%rdx,%rcx,%rax
1059	adoxq	%rcx,%r14
1060	adoxq	%rax,%r15
1061
1062
1063	movq	%r8,%rdx
1064	mulxq	32(%rsi),%rdx,%rcx
1065
1066	xorq	%rax,%rax
1067	mulxq	0(%rsi),%rcx,%rbp
1068	adcxq	%rcx,%r8
1069	adoxq	%rbp,%r9
1070	mulxq	8(%rsi),%rcx,%rbp
1071	adcxq	%rcx,%r9
1072	adoxq	%rbp,%r10
1073	mulxq	16(%rsi),%rcx,%rbp
1074	adcxq	%rcx,%r10
1075	adoxq	%rbp,%r11
1076	mulxq	24(%rsi),%rcx,%rbp
1077	adcxq	%rcx,%r11
1078	adoxq	%rbp,%r8
1079	adcxq	%rax,%r8
1080
1081
1082	movq	%r9,%rdx
1083	mulxq	32(%rsi),%rdx,%rcx
1084
1085	mulxq	0(%rsi),%rcx,%rbp
1086	adoxq	%rcx,%r9
1087	adcxq	%rbp,%r10
1088	mulxq	8(%rsi),%rcx,%rbp
1089	adoxq	%rcx,%r10
1090	adcxq	%rbp,%r11
1091	mulxq	16(%rsi),%rcx,%rbp
1092	adoxq	%rcx,%r11
1093	adcxq	%rbp,%r8
1094	mulxq	24(%rsi),%rcx,%rbp
1095	adoxq	%rcx,%r8
1096	adcxq	%rbp,%r9
1097	adoxq	%rax,%r9
1098
1099
1100	movq	%r10,%rdx
1101	mulxq	32(%rsi),%rdx,%rcx
1102
1103	mulxq	0(%rsi),%rcx,%rbp
1104	adcxq	%rcx,%r10
1105	adoxq	%rbp,%r11
1106	mulxq	8(%rsi),%rcx,%rbp
1107	adcxq	%rcx,%r11
1108	adoxq	%rbp,%r8
1109	mulxq	16(%rsi),%rcx,%rbp
1110	adcxq	%rcx,%r8
1111	adoxq	%rbp,%r9
1112	mulxq	24(%rsi),%rcx,%rbp
1113	adcxq	%rcx,%r9
1114	adoxq	%rbp,%r10
1115	adcxq	%rax,%r10
1116
1117
1118	movq	%r11,%rdx
1119	mulxq	32(%rsi),%rdx,%rcx
1120
1121	mulxq	0(%rsi),%rcx,%rbp
1122	adoxq	%rcx,%r11
1123	adcxq	%rbp,%r8
1124	mulxq	8(%rsi),%rcx,%rbp
1125	adoxq	%rcx,%r8
1126	adcxq	%rbp,%r9
1127	mulxq	16(%rsi),%rcx,%rbp
1128	adoxq	%rcx,%r9
1129	adcxq	%rbp,%r10
1130	mulxq	24(%rsi),%rcx,%rbp
1131	adoxq	%rcx,%r10
1132	adcxq	%rbp,%r11
1133	adoxq	%rax,%r11
1134
1135
1136	addq	%r8,%r12
1137	adcq	%r13,%r9
1138	movq	%r12,%rdx
1139	adcq	%r14,%r10
1140	adcq	%r15,%r11
1141	movq	%r9,%r14
1142	adcq	$0,%rax
1143
1144
1145	subq	0(%rsi),%r12
1146	movq	%r10,%r15
1147	sbbq	8(%rsi),%r9
1148	sbbq	16(%rsi),%r10
1149	movq	%r11,%r8
1150	sbbq	24(%rsi),%r11
1151	sbbq	$0,%rax
1152
1153	cmovncq	%r12,%rdx
1154	cmovncq	%r9,%r14
1155	cmovncq	%r10,%r15
1156	cmovncq	%r11,%r8
1157
1158	decq	%rbx
1159	jnz	.Loop_ord_sqrx
1160
1161	movq	%rdx,0(%rdi)
1162	movq	%r14,8(%rdi)
1163	pxor	%xmm1,%xmm1
1164	movq	%r15,16(%rdi)
1165	pxor	%xmm2,%xmm2
1166	movq	%r8,24(%rdi)
1167	pxor	%xmm3,%xmm3
1168
1169	movq	0(%rsp),%r15
1170.cfi_restore	%r15
1171	movq	8(%rsp),%r14
1172.cfi_restore	%r14
1173	movq	16(%rsp),%r13
1174.cfi_restore	%r13
1175	movq	24(%rsp),%r12
1176.cfi_restore	%r12
1177	movq	32(%rsp),%rbx
1178.cfi_restore	%rbx
1179	movq	40(%rsp),%rbp
1180.cfi_restore	%rbp
1181	leaq	48(%rsp),%rsp
1182.cfi_adjust_cfa_offset	-48
1183.Lord_sqrx_epilogue:
1184	ret
1185.cfi_endproc
1186.size	ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
1187
1188
1189
1190
1191
1192
1193.globl	ecp_nistz256_mul_mont
1194.hidden ecp_nistz256_mul_mont
1195.type	ecp_nistz256_mul_mont,@function
1196.align	32
1197ecp_nistz256_mul_mont:
1198.cfi_startproc
1199_CET_ENDBR
1200	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1201	movq	8(%rcx),%rcx
1202	andl	$0x80100,%ecx
1203.Lmul_mont:
1204	pushq	%rbp
1205.cfi_adjust_cfa_offset	8
1206.cfi_offset	%rbp,-16
1207	pushq	%rbx
1208.cfi_adjust_cfa_offset	8
1209.cfi_offset	%rbx,-24
1210	pushq	%r12
1211.cfi_adjust_cfa_offset	8
1212.cfi_offset	%r12,-32
1213	pushq	%r13
1214.cfi_adjust_cfa_offset	8
1215.cfi_offset	%r13,-40
1216	pushq	%r14
1217.cfi_adjust_cfa_offset	8
1218.cfi_offset	%r14,-48
1219	pushq	%r15
1220.cfi_adjust_cfa_offset	8
1221.cfi_offset	%r15,-56
1222.Lmul_body:
1223	cmpl	$0x80100,%ecx
1224	je	.Lmul_montx
1225	movq	%rdx,%rbx
1226	movq	0(%rdx),%rax
1227	movq	0(%rsi),%r9
1228	movq	8(%rsi),%r10
1229	movq	16(%rsi),%r11
1230	movq	24(%rsi),%r12
1231
1232	call	__ecp_nistz256_mul_montq
1233	jmp	.Lmul_mont_done
1234
1235.align	32
1236.Lmul_montx:
1237	movq	%rdx,%rbx
1238	movq	0(%rdx),%rdx
1239	movq	0(%rsi),%r9
1240	movq	8(%rsi),%r10
1241	movq	16(%rsi),%r11
1242	movq	24(%rsi),%r12
1243	leaq	-128(%rsi),%rsi
1244
1245	call	__ecp_nistz256_mul_montx
1246.Lmul_mont_done:
1247	movq	0(%rsp),%r15
1248.cfi_restore	%r15
1249	movq	8(%rsp),%r14
1250.cfi_restore	%r14
1251	movq	16(%rsp),%r13
1252.cfi_restore	%r13
1253	movq	24(%rsp),%r12
1254.cfi_restore	%r12
1255	movq	32(%rsp),%rbx
1256.cfi_restore	%rbx
1257	movq	40(%rsp),%rbp
1258.cfi_restore	%rbp
1259	leaq	48(%rsp),%rsp
1260.cfi_adjust_cfa_offset	-48
1261.Lmul_epilogue:
1262	ret
1263.cfi_endproc
1264.size	ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
1265
1266.type	__ecp_nistz256_mul_montq,@function
1267.align	32
1268__ecp_nistz256_mul_montq:
1269.cfi_startproc
1270
1271
1272	movq	%rax,%rbp
1273	mulq	%r9
1274	movq	.Lpoly+8(%rip),%r14
1275	movq	%rax,%r8
1276	movq	%rbp,%rax
1277	movq	%rdx,%r9
1278
1279	mulq	%r10
1280	movq	.Lpoly+24(%rip),%r15
1281	addq	%rax,%r9
1282	movq	%rbp,%rax
1283	adcq	$0,%rdx
1284	movq	%rdx,%r10
1285
1286	mulq	%r11
1287	addq	%rax,%r10
1288	movq	%rbp,%rax
1289	adcq	$0,%rdx
1290	movq	%rdx,%r11
1291
1292	mulq	%r12
1293	addq	%rax,%r11
1294	movq	%r8,%rax
1295	adcq	$0,%rdx
1296	xorq	%r13,%r13
1297	movq	%rdx,%r12
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308	movq	%r8,%rbp
1309	shlq	$32,%r8
1310	mulq	%r15
1311	shrq	$32,%rbp
1312	addq	%r8,%r9
1313	adcq	%rbp,%r10
1314	adcq	%rax,%r11
1315	movq	8(%rbx),%rax
1316	adcq	%rdx,%r12
1317	adcq	$0,%r13
1318	xorq	%r8,%r8
1319
1320
1321
1322	movq	%rax,%rbp
1323	mulq	0(%rsi)
1324	addq	%rax,%r9
1325	movq	%rbp,%rax
1326	adcq	$0,%rdx
1327	movq	%rdx,%rcx
1328
1329	mulq	8(%rsi)
1330	addq	%rcx,%r10
1331	adcq	$0,%rdx
1332	addq	%rax,%r10
1333	movq	%rbp,%rax
1334	adcq	$0,%rdx
1335	movq	%rdx,%rcx
1336
1337	mulq	16(%rsi)
1338	addq	%rcx,%r11
1339	adcq	$0,%rdx
1340	addq	%rax,%r11
1341	movq	%rbp,%rax
1342	adcq	$0,%rdx
1343	movq	%rdx,%rcx
1344
1345	mulq	24(%rsi)
1346	addq	%rcx,%r12
1347	adcq	$0,%rdx
1348	addq	%rax,%r12
1349	movq	%r9,%rax
1350	adcq	%rdx,%r13
1351	adcq	$0,%r8
1352
1353
1354
1355	movq	%r9,%rbp
1356	shlq	$32,%r9
1357	mulq	%r15
1358	shrq	$32,%rbp
1359	addq	%r9,%r10
1360	adcq	%rbp,%r11
1361	adcq	%rax,%r12
1362	movq	16(%rbx),%rax
1363	adcq	%rdx,%r13
1364	adcq	$0,%r8
1365	xorq	%r9,%r9
1366
1367
1368
1369	movq	%rax,%rbp
1370	mulq	0(%rsi)
1371	addq	%rax,%r10
1372	movq	%rbp,%rax
1373	adcq	$0,%rdx
1374	movq	%rdx,%rcx
1375
1376	mulq	8(%rsi)
1377	addq	%rcx,%r11
1378	adcq	$0,%rdx
1379	addq	%rax,%r11
1380	movq	%rbp,%rax
1381	adcq	$0,%rdx
1382	movq	%rdx,%rcx
1383
1384	mulq	16(%rsi)
1385	addq	%rcx,%r12
1386	adcq	$0,%rdx
1387	addq	%rax,%r12
1388	movq	%rbp,%rax
1389	adcq	$0,%rdx
1390	movq	%rdx,%rcx
1391
1392	mulq	24(%rsi)
1393	addq	%rcx,%r13
1394	adcq	$0,%rdx
1395	addq	%rax,%r13
1396	movq	%r10,%rax
1397	adcq	%rdx,%r8
1398	adcq	$0,%r9
1399
1400
1401
1402	movq	%r10,%rbp
1403	shlq	$32,%r10
1404	mulq	%r15
1405	shrq	$32,%rbp
1406	addq	%r10,%r11
1407	adcq	%rbp,%r12
1408	adcq	%rax,%r13
1409	movq	24(%rbx),%rax
1410	adcq	%rdx,%r8
1411	adcq	$0,%r9
1412	xorq	%r10,%r10
1413
1414
1415
1416	movq	%rax,%rbp
1417	mulq	0(%rsi)
1418	addq	%rax,%r11
1419	movq	%rbp,%rax
1420	adcq	$0,%rdx
1421	movq	%rdx,%rcx
1422
1423	mulq	8(%rsi)
1424	addq	%rcx,%r12
1425	adcq	$0,%rdx
1426	addq	%rax,%r12
1427	movq	%rbp,%rax
1428	adcq	$0,%rdx
1429	movq	%rdx,%rcx
1430
1431	mulq	16(%rsi)
1432	addq	%rcx,%r13
1433	adcq	$0,%rdx
1434	addq	%rax,%r13
1435	movq	%rbp,%rax
1436	adcq	$0,%rdx
1437	movq	%rdx,%rcx
1438
1439	mulq	24(%rsi)
1440	addq	%rcx,%r8
1441	adcq	$0,%rdx
1442	addq	%rax,%r8
1443	movq	%r11,%rax
1444	adcq	%rdx,%r9
1445	adcq	$0,%r10
1446
1447
1448
1449	movq	%r11,%rbp
1450	shlq	$32,%r11
1451	mulq	%r15
1452	shrq	$32,%rbp
1453	addq	%r11,%r12
1454	adcq	%rbp,%r13
1455	movq	%r12,%rcx
1456	adcq	%rax,%r8
1457	adcq	%rdx,%r9
1458	movq	%r13,%rbp
1459	adcq	$0,%r10
1460
1461
1462
1463	subq	$-1,%r12
1464	movq	%r8,%rbx
1465	sbbq	%r14,%r13
1466	sbbq	$0,%r8
1467	movq	%r9,%rdx
1468	sbbq	%r15,%r9
1469	sbbq	$0,%r10
1470
1471	cmovcq	%rcx,%r12
1472	cmovcq	%rbp,%r13
1473	movq	%r12,0(%rdi)
1474	cmovcq	%rbx,%r8
1475	movq	%r13,8(%rdi)
1476	cmovcq	%rdx,%r9
1477	movq	%r8,16(%rdi)
1478	movq	%r9,24(%rdi)
1479
1480	ret
1481.cfi_endproc
1482.size	__ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
1483
1484
1485
1486
1487
1488
1489
1490
1491.globl	ecp_nistz256_sqr_mont
1492.hidden ecp_nistz256_sqr_mont
1493.type	ecp_nistz256_sqr_mont,@function
1494.align	32
1495ecp_nistz256_sqr_mont:
1496.cfi_startproc
1497_CET_ENDBR
1498	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1499	movq	8(%rcx),%rcx
1500	andl	$0x80100,%ecx
1501	pushq	%rbp
1502.cfi_adjust_cfa_offset	8
1503.cfi_offset	%rbp,-16
1504	pushq	%rbx
1505.cfi_adjust_cfa_offset	8
1506.cfi_offset	%rbx,-24
1507	pushq	%r12
1508.cfi_adjust_cfa_offset	8
1509.cfi_offset	%r12,-32
1510	pushq	%r13
1511.cfi_adjust_cfa_offset	8
1512.cfi_offset	%r13,-40
1513	pushq	%r14
1514.cfi_adjust_cfa_offset	8
1515.cfi_offset	%r14,-48
1516	pushq	%r15
1517.cfi_adjust_cfa_offset	8
1518.cfi_offset	%r15,-56
1519.Lsqr_body:
1520	cmpl	$0x80100,%ecx
1521	je	.Lsqr_montx
1522	movq	0(%rsi),%rax
1523	movq	8(%rsi),%r14
1524	movq	16(%rsi),%r15
1525	movq	24(%rsi),%r8
1526
1527	call	__ecp_nistz256_sqr_montq
1528	jmp	.Lsqr_mont_done
1529
1530.align	32
1531.Lsqr_montx:
1532	movq	0(%rsi),%rdx
1533	movq	8(%rsi),%r14
1534	movq	16(%rsi),%r15
1535	movq	24(%rsi),%r8
1536	leaq	-128(%rsi),%rsi
1537
1538	call	__ecp_nistz256_sqr_montx
1539.Lsqr_mont_done:
1540	movq	0(%rsp),%r15
1541.cfi_restore	%r15
1542	movq	8(%rsp),%r14
1543.cfi_restore	%r14
1544	movq	16(%rsp),%r13
1545.cfi_restore	%r13
1546	movq	24(%rsp),%r12
1547.cfi_restore	%r12
1548	movq	32(%rsp),%rbx
1549.cfi_restore	%rbx
1550	movq	40(%rsp),%rbp
1551.cfi_restore	%rbp
1552	leaq	48(%rsp),%rsp
1553.cfi_adjust_cfa_offset	-48
1554.Lsqr_epilogue:
1555	ret
1556.cfi_endproc
1557.size	ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
1558
1559.type	__ecp_nistz256_sqr_montq,@function
1560.align	32
1561__ecp_nistz256_sqr_montq:
1562.cfi_startproc
1563	movq	%rax,%r13
1564	mulq	%r14
1565	movq	%rax,%r9
1566	movq	%r15,%rax
1567	movq	%rdx,%r10
1568
1569	mulq	%r13
1570	addq	%rax,%r10
1571	movq	%r8,%rax
1572	adcq	$0,%rdx
1573	movq	%rdx,%r11
1574
1575	mulq	%r13
1576	addq	%rax,%r11
1577	movq	%r15,%rax
1578	adcq	$0,%rdx
1579	movq	%rdx,%r12
1580
1581
1582	mulq	%r14
1583	addq	%rax,%r11
1584	movq	%r8,%rax
1585	adcq	$0,%rdx
1586	movq	%rdx,%rbp
1587
1588	mulq	%r14
1589	addq	%rax,%r12
1590	movq	%r8,%rax
1591	adcq	$0,%rdx
1592	addq	%rbp,%r12
1593	movq	%rdx,%r13
1594	adcq	$0,%r13
1595
1596
1597	mulq	%r15
1598	xorq	%r15,%r15
1599	addq	%rax,%r13
1600	movq	0(%rsi),%rax
1601	movq	%rdx,%r14
1602	adcq	$0,%r14
1603
1604	addq	%r9,%r9
1605	adcq	%r10,%r10
1606	adcq	%r11,%r11
1607	adcq	%r12,%r12
1608	adcq	%r13,%r13
1609	adcq	%r14,%r14
1610	adcq	$0,%r15
1611
1612	mulq	%rax
1613	movq	%rax,%r8
1614	movq	8(%rsi),%rax
1615	movq	%rdx,%rcx
1616
1617	mulq	%rax
1618	addq	%rcx,%r9
1619	adcq	%rax,%r10
1620	movq	16(%rsi),%rax
1621	adcq	$0,%rdx
1622	movq	%rdx,%rcx
1623
1624	mulq	%rax
1625	addq	%rcx,%r11
1626	adcq	%rax,%r12
1627	movq	24(%rsi),%rax
1628	adcq	$0,%rdx
1629	movq	%rdx,%rcx
1630
1631	mulq	%rax
1632	addq	%rcx,%r13
1633	adcq	%rax,%r14
1634	movq	%r8,%rax
1635	adcq	%rdx,%r15
1636
1637	movq	.Lpoly+8(%rip),%rsi
1638	movq	.Lpoly+24(%rip),%rbp
1639
1640
1641
1642
1643	movq	%r8,%rcx
1644	shlq	$32,%r8
1645	mulq	%rbp
1646	shrq	$32,%rcx
1647	addq	%r8,%r9
1648	adcq	%rcx,%r10
1649	adcq	%rax,%r11
1650	movq	%r9,%rax
1651	adcq	$0,%rdx
1652
1653
1654
1655	movq	%r9,%rcx
1656	shlq	$32,%r9
1657	movq	%rdx,%r8
1658	mulq	%rbp
1659	shrq	$32,%rcx
1660	addq	%r9,%r10
1661	adcq	%rcx,%r11
1662	adcq	%rax,%r8
1663	movq	%r10,%rax
1664	adcq	$0,%rdx
1665
1666
1667
1668	movq	%r10,%rcx
1669	shlq	$32,%r10
1670	movq	%rdx,%r9
1671	mulq	%rbp
1672	shrq	$32,%rcx
1673	addq	%r10,%r11
1674	adcq	%rcx,%r8
1675	adcq	%rax,%r9
1676	movq	%r11,%rax
1677	adcq	$0,%rdx
1678
1679
1680
1681	movq	%r11,%rcx
1682	shlq	$32,%r11
1683	movq	%rdx,%r10
1684	mulq	%rbp
1685	shrq	$32,%rcx
1686	addq	%r11,%r8
1687	adcq	%rcx,%r9
1688	adcq	%rax,%r10
1689	adcq	$0,%rdx
1690	xorq	%r11,%r11
1691
1692
1693
1694	addq	%r8,%r12
1695	adcq	%r9,%r13
1696	movq	%r12,%r8
1697	adcq	%r10,%r14
1698	adcq	%rdx,%r15
1699	movq	%r13,%r9
1700	adcq	$0,%r11
1701
1702	subq	$-1,%r12
1703	movq	%r14,%r10
1704	sbbq	%rsi,%r13
1705	sbbq	$0,%r14
1706	movq	%r15,%rcx
1707	sbbq	%rbp,%r15
1708	sbbq	$0,%r11
1709
1710	cmovcq	%r8,%r12
1711	cmovcq	%r9,%r13
1712	movq	%r12,0(%rdi)
1713	cmovcq	%r10,%r14
1714	movq	%r13,8(%rdi)
1715	cmovcq	%rcx,%r15
1716	movq	%r14,16(%rdi)
1717	movq	%r15,24(%rdi)
1718
1719	ret
1720.cfi_endproc
1721.size	__ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
1722.type	__ecp_nistz256_mul_montx,@function
1723.align	32
1724__ecp_nistz256_mul_montx:
1725.cfi_startproc
1726
1727
1728	mulxq	%r9,%r8,%r9
1729	mulxq	%r10,%rcx,%r10
1730	movq	$32,%r14
1731	xorq	%r13,%r13
1732	mulxq	%r11,%rbp,%r11
1733	movq	.Lpoly+24(%rip),%r15
1734	adcq	%rcx,%r9
1735	mulxq	%r12,%rcx,%r12
1736	movq	%r8,%rdx
1737	adcq	%rbp,%r10
1738	shlxq	%r14,%r8,%rbp
1739	adcq	%rcx,%r11
1740	shrxq	%r14,%r8,%rcx
1741	adcq	$0,%r12
1742
1743
1744
1745	addq	%rbp,%r9
1746	adcq	%rcx,%r10
1747
1748	mulxq	%r15,%rcx,%rbp
1749	movq	8(%rbx),%rdx
1750	adcq	%rcx,%r11
1751	adcq	%rbp,%r12
1752	adcq	$0,%r13
1753	xorq	%r8,%r8
1754
1755
1756
1757	mulxq	0+128(%rsi),%rcx,%rbp
1758	adcxq	%rcx,%r9
1759	adoxq	%rbp,%r10
1760
1761	mulxq	8+128(%rsi),%rcx,%rbp
1762	adcxq	%rcx,%r10
1763	adoxq	%rbp,%r11
1764
1765	mulxq	16+128(%rsi),%rcx,%rbp
1766	adcxq	%rcx,%r11
1767	adoxq	%rbp,%r12
1768
1769	mulxq	24+128(%rsi),%rcx,%rbp
1770	movq	%r9,%rdx
1771	adcxq	%rcx,%r12
1772	shlxq	%r14,%r9,%rcx
1773	adoxq	%rbp,%r13
1774	shrxq	%r14,%r9,%rbp
1775
1776	adcxq	%r8,%r13
1777	adoxq	%r8,%r8
1778	adcq	$0,%r8
1779
1780
1781
1782	addq	%rcx,%r10
1783	adcq	%rbp,%r11
1784
1785	mulxq	%r15,%rcx,%rbp
1786	movq	16(%rbx),%rdx
1787	adcq	%rcx,%r12
1788	adcq	%rbp,%r13
1789	adcq	$0,%r8
1790	xorq	%r9,%r9
1791
1792
1793
1794	mulxq	0+128(%rsi),%rcx,%rbp
1795	adcxq	%rcx,%r10
1796	adoxq	%rbp,%r11
1797
1798	mulxq	8+128(%rsi),%rcx,%rbp
1799	adcxq	%rcx,%r11
1800	adoxq	%rbp,%r12
1801
1802	mulxq	16+128(%rsi),%rcx,%rbp
1803	adcxq	%rcx,%r12
1804	adoxq	%rbp,%r13
1805
1806	mulxq	24+128(%rsi),%rcx,%rbp
1807	movq	%r10,%rdx
1808	adcxq	%rcx,%r13
1809	shlxq	%r14,%r10,%rcx
1810	adoxq	%rbp,%r8
1811	shrxq	%r14,%r10,%rbp
1812
1813	adcxq	%r9,%r8
1814	adoxq	%r9,%r9
1815	adcq	$0,%r9
1816
1817
1818
1819	addq	%rcx,%r11
1820	adcq	%rbp,%r12
1821
1822	mulxq	%r15,%rcx,%rbp
1823	movq	24(%rbx),%rdx
1824	adcq	%rcx,%r13
1825	adcq	%rbp,%r8
1826	adcq	$0,%r9
1827	xorq	%r10,%r10
1828
1829
1830
1831	mulxq	0+128(%rsi),%rcx,%rbp
1832	adcxq	%rcx,%r11
1833	adoxq	%rbp,%r12
1834
1835	mulxq	8+128(%rsi),%rcx,%rbp
1836	adcxq	%rcx,%r12
1837	adoxq	%rbp,%r13
1838
1839	mulxq	16+128(%rsi),%rcx,%rbp
1840	adcxq	%rcx,%r13
1841	adoxq	%rbp,%r8
1842
1843	mulxq	24+128(%rsi),%rcx,%rbp
1844	movq	%r11,%rdx
1845	adcxq	%rcx,%r8
1846	shlxq	%r14,%r11,%rcx
1847	adoxq	%rbp,%r9
1848	shrxq	%r14,%r11,%rbp
1849
1850	adcxq	%r10,%r9
1851	adoxq	%r10,%r10
1852	adcq	$0,%r10
1853
1854
1855
1856	addq	%rcx,%r12
1857	adcq	%rbp,%r13
1858
1859	mulxq	%r15,%rcx,%rbp
1860	movq	%r12,%rbx
1861	movq	.Lpoly+8(%rip),%r14
1862	adcq	%rcx,%r8
1863	movq	%r13,%rdx
1864	adcq	%rbp,%r9
1865	adcq	$0,%r10
1866
1867
1868
1869	xorl	%eax,%eax
1870	movq	%r8,%rcx
1871	sbbq	$-1,%r12
1872	sbbq	%r14,%r13
1873	sbbq	$0,%r8
1874	movq	%r9,%rbp
1875	sbbq	%r15,%r9
1876	sbbq	$0,%r10
1877
1878	cmovcq	%rbx,%r12
1879	cmovcq	%rdx,%r13
1880	movq	%r12,0(%rdi)
1881	cmovcq	%rcx,%r8
1882	movq	%r13,8(%rdi)
1883	cmovcq	%rbp,%r9
1884	movq	%r8,16(%rdi)
1885	movq	%r9,24(%rdi)
1886
1887	ret
1888.cfi_endproc
1889.size	__ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
1890
1891.type	__ecp_nistz256_sqr_montx,@function
1892.align	32
1893__ecp_nistz256_sqr_montx:
1894.cfi_startproc
1895	mulxq	%r14,%r9,%r10
1896	mulxq	%r15,%rcx,%r11
1897	xorl	%eax,%eax
1898	adcq	%rcx,%r10
1899	mulxq	%r8,%rbp,%r12
1900	movq	%r14,%rdx
1901	adcq	%rbp,%r11
1902	adcq	$0,%r12
1903	xorq	%r13,%r13
1904
1905
1906	mulxq	%r15,%rcx,%rbp
1907	adcxq	%rcx,%r11
1908	adoxq	%rbp,%r12
1909
1910	mulxq	%r8,%rcx,%rbp
1911	movq	%r15,%rdx
1912	adcxq	%rcx,%r12
1913	adoxq	%rbp,%r13
1914	adcq	$0,%r13
1915
1916
1917	mulxq	%r8,%rcx,%r14
1918	movq	0+128(%rsi),%rdx
1919	xorq	%r15,%r15
1920	adcxq	%r9,%r9
1921	adoxq	%rcx,%r13
1922	adcxq	%r10,%r10
1923	adoxq	%r15,%r14
1924
1925	mulxq	%rdx,%r8,%rbp
1926	movq	8+128(%rsi),%rdx
1927	adcxq	%r11,%r11
1928	adoxq	%rbp,%r9
1929	adcxq	%r12,%r12
1930	mulxq	%rdx,%rcx,%rax
1931	movq	16+128(%rsi),%rdx
1932	adcxq	%r13,%r13
1933	adoxq	%rcx,%r10
1934	adcxq	%r14,%r14
1935.byte	0x67
1936	mulxq	%rdx,%rcx,%rbp
1937	movq	24+128(%rsi),%rdx
1938	adoxq	%rax,%r11
1939	adcxq	%r15,%r15
1940	adoxq	%rcx,%r12
1941	movq	$32,%rsi
1942	adoxq	%rbp,%r13
1943.byte	0x67,0x67
1944	mulxq	%rdx,%rcx,%rax
1945	movq	.Lpoly+24(%rip),%rdx
1946	adoxq	%rcx,%r14
1947	shlxq	%rsi,%r8,%rcx
1948	adoxq	%rax,%r15
1949	shrxq	%rsi,%r8,%rax
1950	movq	%rdx,%rbp
1951
1952
1953	addq	%rcx,%r9
1954	adcq	%rax,%r10
1955
1956	mulxq	%r8,%rcx,%r8
1957	adcq	%rcx,%r11
1958	shlxq	%rsi,%r9,%rcx
1959	adcq	$0,%r8
1960	shrxq	%rsi,%r9,%rax
1961
1962
1963	addq	%rcx,%r10
1964	adcq	%rax,%r11
1965
1966	mulxq	%r9,%rcx,%r9
1967	adcq	%rcx,%r8
1968	shlxq	%rsi,%r10,%rcx
1969	adcq	$0,%r9
1970	shrxq	%rsi,%r10,%rax
1971
1972
1973	addq	%rcx,%r11
1974	adcq	%rax,%r8
1975
1976	mulxq	%r10,%rcx,%r10
1977	adcq	%rcx,%r9
1978	shlxq	%rsi,%r11,%rcx
1979	adcq	$0,%r10
1980	shrxq	%rsi,%r11,%rax
1981
1982
1983	addq	%rcx,%r8
1984	adcq	%rax,%r9
1985
1986	mulxq	%r11,%rcx,%r11
1987	adcq	%rcx,%r10
1988	adcq	$0,%r11
1989
1990	xorq	%rdx,%rdx
1991	addq	%r8,%r12
1992	movq	.Lpoly+8(%rip),%rsi
1993	adcq	%r9,%r13
1994	movq	%r12,%r8
1995	adcq	%r10,%r14
1996	adcq	%r11,%r15
1997	movq	%r13,%r9
1998	adcq	$0,%rdx
1999
2000	subq	$-1,%r12
2001	movq	%r14,%r10
2002	sbbq	%rsi,%r13
2003	sbbq	$0,%r14
2004	movq	%r15,%r11
2005	sbbq	%rbp,%r15
2006	sbbq	$0,%rdx
2007
2008	cmovcq	%r8,%r12
2009	cmovcq	%r9,%r13
2010	movq	%r12,0(%rdi)
2011	cmovcq	%r10,%r14
2012	movq	%r13,8(%rdi)
2013	cmovcq	%r11,%r15
2014	movq	%r14,16(%rdi)
2015	movq	%r15,24(%rdi)
2016
2017	ret
2018.cfi_endproc
2019.size	__ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
2020
2021
2022.globl	ecp_nistz256_select_w5
2023.hidden ecp_nistz256_select_w5
2024.type	ecp_nistz256_select_w5,@function
2025.align	32
2026ecp_nistz256_select_w5:
2027.cfi_startproc
2028_CET_ENDBR
2029	leaq	OPENSSL_ia32cap_P(%rip),%rax
2030	movq	8(%rax),%rax
2031	testl	$32,%eax
2032	jnz	.Lavx2_select_w5
2033	movdqa	.LOne(%rip),%xmm0
2034	movd	%edx,%xmm1
2035
2036	pxor	%xmm2,%xmm2
2037	pxor	%xmm3,%xmm3
2038	pxor	%xmm4,%xmm4
2039	pxor	%xmm5,%xmm5
2040	pxor	%xmm6,%xmm6
2041	pxor	%xmm7,%xmm7
2042
2043	movdqa	%xmm0,%xmm8
2044	pshufd	$0,%xmm1,%xmm1
2045
2046	movq	$16,%rax
2047.Lselect_loop_sse_w5:
2048
2049	movdqa	%xmm8,%xmm15
2050	paddd	%xmm0,%xmm8
2051	pcmpeqd	%xmm1,%xmm15
2052
2053	movdqa	0(%rsi),%xmm9
2054	movdqa	16(%rsi),%xmm10
2055	movdqa	32(%rsi),%xmm11
2056	movdqa	48(%rsi),%xmm12
2057	movdqa	64(%rsi),%xmm13
2058	movdqa	80(%rsi),%xmm14
2059	leaq	96(%rsi),%rsi
2060
2061	pand	%xmm15,%xmm9
2062	pand	%xmm15,%xmm10
2063	por	%xmm9,%xmm2
2064	pand	%xmm15,%xmm11
2065	por	%xmm10,%xmm3
2066	pand	%xmm15,%xmm12
2067	por	%xmm11,%xmm4
2068	pand	%xmm15,%xmm13
2069	por	%xmm12,%xmm5
2070	pand	%xmm15,%xmm14
2071	por	%xmm13,%xmm6
2072	por	%xmm14,%xmm7
2073
2074	decq	%rax
2075	jnz	.Lselect_loop_sse_w5
2076
2077	movdqu	%xmm2,0(%rdi)
2078	movdqu	%xmm3,16(%rdi)
2079	movdqu	%xmm4,32(%rdi)
2080	movdqu	%xmm5,48(%rdi)
2081	movdqu	%xmm6,64(%rdi)
2082	movdqu	%xmm7,80(%rdi)
2083	ret
2084.cfi_endproc
2085.LSEH_end_ecp_nistz256_select_w5:
2086.size	ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
2087
2088
2089
2090.globl	ecp_nistz256_select_w7
2091.hidden ecp_nistz256_select_w7
2092.type	ecp_nistz256_select_w7,@function
2093.align	32
2094ecp_nistz256_select_w7:
2095.cfi_startproc
2096_CET_ENDBR
2097	leaq	OPENSSL_ia32cap_P(%rip),%rax
2098	movq	8(%rax),%rax
2099	testl	$32,%eax
2100	jnz	.Lavx2_select_w7
2101	movdqa	.LOne(%rip),%xmm8
2102	movd	%edx,%xmm1
2103
2104	pxor	%xmm2,%xmm2
2105	pxor	%xmm3,%xmm3
2106	pxor	%xmm4,%xmm4
2107	pxor	%xmm5,%xmm5
2108
2109	movdqa	%xmm8,%xmm0
2110	pshufd	$0,%xmm1,%xmm1
2111	movq	$64,%rax
2112
2113.Lselect_loop_sse_w7:
2114	movdqa	%xmm8,%xmm15
2115	paddd	%xmm0,%xmm8
2116	movdqa	0(%rsi),%xmm9
2117	movdqa	16(%rsi),%xmm10
2118	pcmpeqd	%xmm1,%xmm15
2119	movdqa	32(%rsi),%xmm11
2120	movdqa	48(%rsi),%xmm12
2121	leaq	64(%rsi),%rsi
2122
2123	pand	%xmm15,%xmm9
2124	pand	%xmm15,%xmm10
2125	por	%xmm9,%xmm2
2126	pand	%xmm15,%xmm11
2127	por	%xmm10,%xmm3
2128	pand	%xmm15,%xmm12
2129	por	%xmm11,%xmm4
2130	prefetcht0	255(%rsi)
2131	por	%xmm12,%xmm5
2132
2133	decq	%rax
2134	jnz	.Lselect_loop_sse_w7
2135
2136	movdqu	%xmm2,0(%rdi)
2137	movdqu	%xmm3,16(%rdi)
2138	movdqu	%xmm4,32(%rdi)
2139	movdqu	%xmm5,48(%rdi)
2140	ret
2141.cfi_endproc
2142.LSEH_end_ecp_nistz256_select_w7:
2143.size	ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
2144
2145
2146.type	ecp_nistz256_avx2_select_w5,@function
2147.align	32
2148ecp_nistz256_avx2_select_w5:
2149.cfi_startproc
2150.Lavx2_select_w5:
2151	vzeroupper
2152	vmovdqa	.LTwo(%rip),%ymm0
2153
2154	vpxor	%ymm2,%ymm2,%ymm2
2155	vpxor	%ymm3,%ymm3,%ymm3
2156	vpxor	%ymm4,%ymm4,%ymm4
2157
2158	vmovdqa	.LOne(%rip),%ymm5
2159	vmovdqa	.LTwo(%rip),%ymm10
2160
2161	vmovd	%edx,%xmm1
2162	vpermd	%ymm1,%ymm2,%ymm1
2163
2164	movq	$8,%rax
2165.Lselect_loop_avx2_w5:
2166
2167	vmovdqa	0(%rsi),%ymm6
2168	vmovdqa	32(%rsi),%ymm7
2169	vmovdqa	64(%rsi),%ymm8
2170
2171	vmovdqa	96(%rsi),%ymm11
2172	vmovdqa	128(%rsi),%ymm12
2173	vmovdqa	160(%rsi),%ymm13
2174
2175	vpcmpeqd	%ymm1,%ymm5,%ymm9
2176	vpcmpeqd	%ymm1,%ymm10,%ymm14
2177
2178	vpaddd	%ymm0,%ymm5,%ymm5
2179	vpaddd	%ymm0,%ymm10,%ymm10
2180	leaq	192(%rsi),%rsi
2181
2182	vpand	%ymm9,%ymm6,%ymm6
2183	vpand	%ymm9,%ymm7,%ymm7
2184	vpand	%ymm9,%ymm8,%ymm8
2185	vpand	%ymm14,%ymm11,%ymm11
2186	vpand	%ymm14,%ymm12,%ymm12
2187	vpand	%ymm14,%ymm13,%ymm13
2188
2189	vpxor	%ymm6,%ymm2,%ymm2
2190	vpxor	%ymm7,%ymm3,%ymm3
2191	vpxor	%ymm8,%ymm4,%ymm4
2192	vpxor	%ymm11,%ymm2,%ymm2
2193	vpxor	%ymm12,%ymm3,%ymm3
2194	vpxor	%ymm13,%ymm4,%ymm4
2195
2196	decq	%rax
2197	jnz	.Lselect_loop_avx2_w5
2198
2199	vmovdqu	%ymm2,0(%rdi)
2200	vmovdqu	%ymm3,32(%rdi)
2201	vmovdqu	%ymm4,64(%rdi)
2202	vzeroupper
2203	ret
2204.cfi_endproc
2205.LSEH_end_ecp_nistz256_avx2_select_w5:
2206.size	ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
2207
2208
2209
2210.type	ecp_nistz256_avx2_select_w7,@function
2211.align	32
2212ecp_nistz256_avx2_select_w7:
2213.cfi_startproc
2214.Lavx2_select_w7:
2215_CET_ENDBR
2216	vzeroupper
2217	vmovdqa	.LThree(%rip),%ymm0
2218
2219	vpxor	%ymm2,%ymm2,%ymm2
2220	vpxor	%ymm3,%ymm3,%ymm3
2221
2222	vmovdqa	.LOne(%rip),%ymm4
2223	vmovdqa	.LTwo(%rip),%ymm8
2224	vmovdqa	.LThree(%rip),%ymm12
2225
2226	vmovd	%edx,%xmm1
2227	vpermd	%ymm1,%ymm2,%ymm1
2228
2229
2230	movq	$21,%rax
2231.Lselect_loop_avx2_w7:
2232
2233	vmovdqa	0(%rsi),%ymm5
2234	vmovdqa	32(%rsi),%ymm6
2235
2236	vmovdqa	64(%rsi),%ymm9
2237	vmovdqa	96(%rsi),%ymm10
2238
2239	vmovdqa	128(%rsi),%ymm13
2240	vmovdqa	160(%rsi),%ymm14
2241
2242	vpcmpeqd	%ymm1,%ymm4,%ymm7
2243	vpcmpeqd	%ymm1,%ymm8,%ymm11
2244	vpcmpeqd	%ymm1,%ymm12,%ymm15
2245
2246	vpaddd	%ymm0,%ymm4,%ymm4
2247	vpaddd	%ymm0,%ymm8,%ymm8
2248	vpaddd	%ymm0,%ymm12,%ymm12
2249	leaq	192(%rsi),%rsi
2250
2251	vpand	%ymm7,%ymm5,%ymm5
2252	vpand	%ymm7,%ymm6,%ymm6
2253	vpand	%ymm11,%ymm9,%ymm9
2254	vpand	%ymm11,%ymm10,%ymm10
2255	vpand	%ymm15,%ymm13,%ymm13
2256	vpand	%ymm15,%ymm14,%ymm14
2257
2258	vpxor	%ymm5,%ymm2,%ymm2
2259	vpxor	%ymm6,%ymm3,%ymm3
2260	vpxor	%ymm9,%ymm2,%ymm2
2261	vpxor	%ymm10,%ymm3,%ymm3
2262	vpxor	%ymm13,%ymm2,%ymm2
2263	vpxor	%ymm14,%ymm3,%ymm3
2264
2265	decq	%rax
2266	jnz	.Lselect_loop_avx2_w7
2267
2268
2269	vmovdqa	0(%rsi),%ymm5
2270	vmovdqa	32(%rsi),%ymm6
2271
2272	vpcmpeqd	%ymm1,%ymm4,%ymm7
2273
2274	vpand	%ymm7,%ymm5,%ymm5
2275	vpand	%ymm7,%ymm6,%ymm6
2276
2277	vpxor	%ymm5,%ymm2,%ymm2
2278	vpxor	%ymm6,%ymm3,%ymm3
2279
2280	vmovdqu	%ymm2,0(%rdi)
2281	vmovdqu	%ymm3,32(%rdi)
2282	vzeroupper
2283	ret
2284.cfi_endproc
2285.LSEH_end_ecp_nistz256_avx2_select_w7:
2286.size	ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
2287.type	__ecp_nistz256_add_toq,@function
2288.align	32
2289__ecp_nistz256_add_toq:
2290.cfi_startproc
2291	xorq	%r11,%r11
2292	addq	0(%rbx),%r12
2293	adcq	8(%rbx),%r13
2294	movq	%r12,%rax
2295	adcq	16(%rbx),%r8
2296	adcq	24(%rbx),%r9
2297	movq	%r13,%rbp
2298	adcq	$0,%r11
2299
2300	subq	$-1,%r12
2301	movq	%r8,%rcx
2302	sbbq	%r14,%r13
2303	sbbq	$0,%r8
2304	movq	%r9,%r10
2305	sbbq	%r15,%r9
2306	sbbq	$0,%r11
2307
2308	cmovcq	%rax,%r12
2309	cmovcq	%rbp,%r13
2310	movq	%r12,0(%rdi)
2311	cmovcq	%rcx,%r8
2312	movq	%r13,8(%rdi)
2313	cmovcq	%r10,%r9
2314	movq	%r8,16(%rdi)
2315	movq	%r9,24(%rdi)
2316
2317	ret
2318.cfi_endproc
2319.size	__ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
2320
2321.type	__ecp_nistz256_sub_fromq,@function
2322.align	32
2323__ecp_nistz256_sub_fromq:
2324.cfi_startproc
2325	subq	0(%rbx),%r12
2326	sbbq	8(%rbx),%r13
2327	movq	%r12,%rax
2328	sbbq	16(%rbx),%r8
2329	sbbq	24(%rbx),%r9
2330	movq	%r13,%rbp
2331	sbbq	%r11,%r11
2332
2333	addq	$-1,%r12
2334	movq	%r8,%rcx
2335	adcq	%r14,%r13
2336	adcq	$0,%r8
2337	movq	%r9,%r10
2338	adcq	%r15,%r9
2339	testq	%r11,%r11
2340
2341	cmovzq	%rax,%r12
2342	cmovzq	%rbp,%r13
2343	movq	%r12,0(%rdi)
2344	cmovzq	%rcx,%r8
2345	movq	%r13,8(%rdi)
2346	cmovzq	%r10,%r9
2347	movq	%r8,16(%rdi)
2348	movq	%r9,24(%rdi)
2349
2350	ret
2351.cfi_endproc
2352.size	__ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
2353
2354.type	__ecp_nistz256_subq,@function
2355.align	32
2356__ecp_nistz256_subq:
2357.cfi_startproc
2358	subq	%r12,%rax
2359	sbbq	%r13,%rbp
2360	movq	%rax,%r12
2361	sbbq	%r8,%rcx
2362	sbbq	%r9,%r10
2363	movq	%rbp,%r13
2364	sbbq	%r11,%r11
2365
2366	addq	$-1,%rax
2367	movq	%rcx,%r8
2368	adcq	%r14,%rbp
2369	adcq	$0,%rcx
2370	movq	%r10,%r9
2371	adcq	%r15,%r10
2372	testq	%r11,%r11
2373
2374	cmovnzq	%rax,%r12
2375	cmovnzq	%rbp,%r13
2376	cmovnzq	%rcx,%r8
2377	cmovnzq	%r10,%r9
2378
2379	ret
2380.cfi_endproc
2381.size	__ecp_nistz256_subq,.-__ecp_nistz256_subq
2382
2383.type	__ecp_nistz256_mul_by_2q,@function
2384.align	32
2385__ecp_nistz256_mul_by_2q:
2386.cfi_startproc
2387	xorq	%r11,%r11
2388	addq	%r12,%r12
2389	adcq	%r13,%r13
2390	movq	%r12,%rax
2391	adcq	%r8,%r8
2392	adcq	%r9,%r9
2393	movq	%r13,%rbp
2394	adcq	$0,%r11
2395
2396	subq	$-1,%r12
2397	movq	%r8,%rcx
2398	sbbq	%r14,%r13
2399	sbbq	$0,%r8
2400	movq	%r9,%r10
2401	sbbq	%r15,%r9
2402	sbbq	$0,%r11
2403
2404	cmovcq	%rax,%r12
2405	cmovcq	%rbp,%r13
2406	movq	%r12,0(%rdi)
2407	cmovcq	%rcx,%r8
2408	movq	%r13,8(%rdi)
2409	cmovcq	%r10,%r9
2410	movq	%r8,16(%rdi)
2411	movq	%r9,24(%rdi)
2412
2413	ret
2414.cfi_endproc
2415.size	__ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
2416.globl	ecp_nistz256_point_double
2417.hidden ecp_nistz256_point_double
2418.type	ecp_nistz256_point_double,@function
2419.align	32
2420ecp_nistz256_point_double:
2421.cfi_startproc
2422_CET_ENDBR
2423	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2424	movq	8(%rcx),%rcx
2425	andl	$0x80100,%ecx
2426	cmpl	$0x80100,%ecx
2427	je	.Lpoint_doublex
2428	pushq	%rbp
2429.cfi_adjust_cfa_offset	8
2430.cfi_offset	%rbp,-16
2431	pushq	%rbx
2432.cfi_adjust_cfa_offset	8
2433.cfi_offset	%rbx,-24
2434	pushq	%r12
2435.cfi_adjust_cfa_offset	8
2436.cfi_offset	%r12,-32
2437	pushq	%r13
2438.cfi_adjust_cfa_offset	8
2439.cfi_offset	%r13,-40
2440	pushq	%r14
2441.cfi_adjust_cfa_offset	8
2442.cfi_offset	%r14,-48
2443	pushq	%r15
2444.cfi_adjust_cfa_offset	8
2445.cfi_offset	%r15,-56
2446	subq	$160+8,%rsp
2447.cfi_adjust_cfa_offset	32*5+8
2448.Lpoint_doubleq_body:
2449
2450.Lpoint_double_shortcutq:
2451	movdqu	0(%rsi),%xmm0
2452	movq	%rsi,%rbx
2453	movdqu	16(%rsi),%xmm1
2454	movq	32+0(%rsi),%r12
2455	movq	32+8(%rsi),%r13
2456	movq	32+16(%rsi),%r8
2457	movq	32+24(%rsi),%r9
2458	movq	.Lpoly+8(%rip),%r14
2459	movq	.Lpoly+24(%rip),%r15
2460	movdqa	%xmm0,96(%rsp)
2461	movdqa	%xmm1,96+16(%rsp)
2462	leaq	32(%rdi),%r10
2463	leaq	64(%rdi),%r11
2464.byte	102,72,15,110,199
2465.byte	102,73,15,110,202
2466.byte	102,73,15,110,211
2467
2468	leaq	0(%rsp),%rdi
2469	call	__ecp_nistz256_mul_by_2q
2470
2471	movq	64+0(%rsi),%rax
2472	movq	64+8(%rsi),%r14
2473	movq	64+16(%rsi),%r15
2474	movq	64+24(%rsi),%r8
2475	leaq	64-0(%rsi),%rsi
2476	leaq	64(%rsp),%rdi
2477	call	__ecp_nistz256_sqr_montq
2478
2479	movq	0+0(%rsp),%rax
2480	movq	8+0(%rsp),%r14
2481	leaq	0+0(%rsp),%rsi
2482	movq	16+0(%rsp),%r15
2483	movq	24+0(%rsp),%r8
2484	leaq	0(%rsp),%rdi
2485	call	__ecp_nistz256_sqr_montq
2486
2487	movq	32(%rbx),%rax
2488	movq	64+0(%rbx),%r9
2489	movq	64+8(%rbx),%r10
2490	movq	64+16(%rbx),%r11
2491	movq	64+24(%rbx),%r12
2492	leaq	64-0(%rbx),%rsi
2493	leaq	32(%rbx),%rbx
2494.byte	102,72,15,126,215
2495	call	__ecp_nistz256_mul_montq
2496	call	__ecp_nistz256_mul_by_2q
2497
2498	movq	96+0(%rsp),%r12
2499	movq	96+8(%rsp),%r13
2500	leaq	64(%rsp),%rbx
2501	movq	96+16(%rsp),%r8
2502	movq	96+24(%rsp),%r9
2503	leaq	32(%rsp),%rdi
2504	call	__ecp_nistz256_add_toq
2505
2506	movq	96+0(%rsp),%r12
2507	movq	96+8(%rsp),%r13
2508	leaq	64(%rsp),%rbx
2509	movq	96+16(%rsp),%r8
2510	movq	96+24(%rsp),%r9
2511	leaq	64(%rsp),%rdi
2512	call	__ecp_nistz256_sub_fromq
2513
2514	movq	0+0(%rsp),%rax
2515	movq	8+0(%rsp),%r14
2516	leaq	0+0(%rsp),%rsi
2517	movq	16+0(%rsp),%r15
2518	movq	24+0(%rsp),%r8
2519.byte	102,72,15,126,207
2520	call	__ecp_nistz256_sqr_montq
2521	xorq	%r9,%r9
2522	movq	%r12,%rax
2523	addq	$-1,%r12
2524	movq	%r13,%r10
2525	adcq	%rsi,%r13
2526	movq	%r14,%rcx
2527	adcq	$0,%r14
2528	movq	%r15,%r8
2529	adcq	%rbp,%r15
2530	adcq	$0,%r9
2531	xorq	%rsi,%rsi
2532	testq	$1,%rax
2533
2534	cmovzq	%rax,%r12
2535	cmovzq	%r10,%r13
2536	cmovzq	%rcx,%r14
2537	cmovzq	%r8,%r15
2538	cmovzq	%rsi,%r9
2539
2540	movq	%r13,%rax
2541	shrq	$1,%r12
2542	shlq	$63,%rax
2543	movq	%r14,%r10
2544	shrq	$1,%r13
2545	orq	%rax,%r12
2546	shlq	$63,%r10
2547	movq	%r15,%rcx
2548	shrq	$1,%r14
2549	orq	%r10,%r13
2550	shlq	$63,%rcx
2551	movq	%r12,0(%rdi)
2552	shrq	$1,%r15
2553	movq	%r13,8(%rdi)
2554	shlq	$63,%r9
2555	orq	%rcx,%r14
2556	orq	%r9,%r15
2557	movq	%r14,16(%rdi)
2558	movq	%r15,24(%rdi)
2559	movq	64(%rsp),%rax
2560	leaq	64(%rsp),%rbx
2561	movq	0+32(%rsp),%r9
2562	movq	8+32(%rsp),%r10
2563	leaq	0+32(%rsp),%rsi
2564	movq	16+32(%rsp),%r11
2565	movq	24+32(%rsp),%r12
2566	leaq	32(%rsp),%rdi
2567	call	__ecp_nistz256_mul_montq
2568
2569	leaq	128(%rsp),%rdi
2570	call	__ecp_nistz256_mul_by_2q
2571
2572	leaq	32(%rsp),%rbx
2573	leaq	32(%rsp),%rdi
2574	call	__ecp_nistz256_add_toq
2575
2576	movq	96(%rsp),%rax
2577	leaq	96(%rsp),%rbx
2578	movq	0+0(%rsp),%r9
2579	movq	8+0(%rsp),%r10
2580	leaq	0+0(%rsp),%rsi
2581	movq	16+0(%rsp),%r11
2582	movq	24+0(%rsp),%r12
2583	leaq	0(%rsp),%rdi
2584	call	__ecp_nistz256_mul_montq
2585
2586	leaq	128(%rsp),%rdi
2587	call	__ecp_nistz256_mul_by_2q
2588
2589	movq	0+32(%rsp),%rax
2590	movq	8+32(%rsp),%r14
2591	leaq	0+32(%rsp),%rsi
2592	movq	16+32(%rsp),%r15
2593	movq	24+32(%rsp),%r8
2594.byte	102,72,15,126,199
2595	call	__ecp_nistz256_sqr_montq
2596
2597	leaq	128(%rsp),%rbx
2598	movq	%r14,%r8
2599	movq	%r15,%r9
2600	movq	%rsi,%r14
2601	movq	%rbp,%r15
2602	call	__ecp_nistz256_sub_fromq
2603
2604	movq	0+0(%rsp),%rax
2605	movq	0+8(%rsp),%rbp
2606	movq	0+16(%rsp),%rcx
2607	movq	0+24(%rsp),%r10
2608	leaq	0(%rsp),%rdi
2609	call	__ecp_nistz256_subq
2610
2611	movq	32(%rsp),%rax
2612	leaq	32(%rsp),%rbx
2613	movq	%r12,%r14
2614	xorl	%ecx,%ecx
2615	movq	%r12,0+0(%rsp)
2616	movq	%r13,%r10
2617	movq	%r13,0+8(%rsp)
2618	cmovzq	%r8,%r11
2619	movq	%r8,0+16(%rsp)
2620	leaq	0-0(%rsp),%rsi
2621	cmovzq	%r9,%r12
2622	movq	%r9,0+24(%rsp)
2623	movq	%r14,%r9
2624	leaq	0(%rsp),%rdi
2625	call	__ecp_nistz256_mul_montq
2626
2627.byte	102,72,15,126,203
2628.byte	102,72,15,126,207
2629	call	__ecp_nistz256_sub_fromq
2630
2631	leaq	160+56(%rsp),%rsi
2632.cfi_def_cfa	%rsi,8
2633	movq	-48(%rsi),%r15
2634.cfi_restore	%r15
2635	movq	-40(%rsi),%r14
2636.cfi_restore	%r14
2637	movq	-32(%rsi),%r13
2638.cfi_restore	%r13
2639	movq	-24(%rsi),%r12
2640.cfi_restore	%r12
2641	movq	-16(%rsi),%rbx
2642.cfi_restore	%rbx
2643	movq	-8(%rsi),%rbp
2644.cfi_restore	%rbp
2645	leaq	(%rsi),%rsp
2646.cfi_def_cfa_register	%rsp
2647.Lpoint_doubleq_epilogue:
2648	ret
2649.cfi_endproc
2650.size	ecp_nistz256_point_double,.-ecp_nistz256_point_double
2651.globl	ecp_nistz256_point_add
2652.hidden ecp_nistz256_point_add
2653.type	ecp_nistz256_point_add,@function
2654.align	32
2655ecp_nistz256_point_add:
2656.cfi_startproc
2657_CET_ENDBR
2658	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2659	movq	8(%rcx),%rcx
2660	andl	$0x80100,%ecx
2661	cmpl	$0x80100,%ecx
2662	je	.Lpoint_addx
2663	pushq	%rbp
2664.cfi_adjust_cfa_offset	8
2665.cfi_offset	%rbp,-16
2666	pushq	%rbx
2667.cfi_adjust_cfa_offset	8
2668.cfi_offset	%rbx,-24
2669	pushq	%r12
2670.cfi_adjust_cfa_offset	8
2671.cfi_offset	%r12,-32
2672	pushq	%r13
2673.cfi_adjust_cfa_offset	8
2674.cfi_offset	%r13,-40
2675	pushq	%r14
2676.cfi_adjust_cfa_offset	8
2677.cfi_offset	%r14,-48
2678	pushq	%r15
2679.cfi_adjust_cfa_offset	8
2680.cfi_offset	%r15,-56
2681	subq	$576+8,%rsp
2682.cfi_adjust_cfa_offset	32*18+8
2683.Lpoint_addq_body:
2684
2685	movdqu	0(%rsi),%xmm0
2686	movdqu	16(%rsi),%xmm1
2687	movdqu	32(%rsi),%xmm2
2688	movdqu	48(%rsi),%xmm3
2689	movdqu	64(%rsi),%xmm4
2690	movdqu	80(%rsi),%xmm5
2691	movq	%rsi,%rbx
2692	movq	%rdx,%rsi
2693	movdqa	%xmm0,384(%rsp)
2694	movdqa	%xmm1,384+16(%rsp)
2695	movdqa	%xmm2,416(%rsp)
2696	movdqa	%xmm3,416+16(%rsp)
2697	movdqa	%xmm4,448(%rsp)
2698	movdqa	%xmm5,448+16(%rsp)
2699	por	%xmm4,%xmm5
2700
2701	movdqu	0(%rsi),%xmm0
2702	pshufd	$0xb1,%xmm5,%xmm3
2703	movdqu	16(%rsi),%xmm1
2704	movdqu	32(%rsi),%xmm2
2705	por	%xmm3,%xmm5
2706	movdqu	48(%rsi),%xmm3
2707	movq	64+0(%rsi),%rax
2708	movq	64+8(%rsi),%r14
2709	movq	64+16(%rsi),%r15
2710	movq	64+24(%rsi),%r8
2711	movdqa	%xmm0,480(%rsp)
2712	pshufd	$0x1e,%xmm5,%xmm4
2713	movdqa	%xmm1,480+16(%rsp)
2714	movdqu	64(%rsi),%xmm0
2715	movdqu	80(%rsi),%xmm1
2716	movdqa	%xmm2,512(%rsp)
2717	movdqa	%xmm3,512+16(%rsp)
2718	por	%xmm4,%xmm5
2719	pxor	%xmm4,%xmm4
2720	por	%xmm0,%xmm1
2721.byte	102,72,15,110,199
2722
2723	leaq	64-0(%rsi),%rsi
2724	movq	%rax,544+0(%rsp)
2725	movq	%r14,544+8(%rsp)
2726	movq	%r15,544+16(%rsp)
2727	movq	%r8,544+24(%rsp)
2728	leaq	96(%rsp),%rdi
2729	call	__ecp_nistz256_sqr_montq
2730
2731	pcmpeqd	%xmm4,%xmm5
2732	pshufd	$0xb1,%xmm1,%xmm4
2733	por	%xmm1,%xmm4
2734	pshufd	$0,%xmm5,%xmm5
2735	pshufd	$0x1e,%xmm4,%xmm3
2736	por	%xmm3,%xmm4
2737	pxor	%xmm3,%xmm3
2738	pcmpeqd	%xmm3,%xmm4
2739	pshufd	$0,%xmm4,%xmm4
2740	movq	64+0(%rbx),%rax
2741	movq	64+8(%rbx),%r14
2742	movq	64+16(%rbx),%r15
2743	movq	64+24(%rbx),%r8
2744.byte	102,72,15,110,203
2745
2746	leaq	64-0(%rbx),%rsi
2747	leaq	32(%rsp),%rdi
2748	call	__ecp_nistz256_sqr_montq
2749
2750	movq	544(%rsp),%rax
2751	leaq	544(%rsp),%rbx
2752	movq	0+96(%rsp),%r9
2753	movq	8+96(%rsp),%r10
2754	leaq	0+96(%rsp),%rsi
2755	movq	16+96(%rsp),%r11
2756	movq	24+96(%rsp),%r12
2757	leaq	224(%rsp),%rdi
2758	call	__ecp_nistz256_mul_montq
2759
2760	movq	448(%rsp),%rax
2761	leaq	448(%rsp),%rbx
2762	movq	0+32(%rsp),%r9
2763	movq	8+32(%rsp),%r10
2764	leaq	0+32(%rsp),%rsi
2765	movq	16+32(%rsp),%r11
2766	movq	24+32(%rsp),%r12
2767	leaq	256(%rsp),%rdi
2768	call	__ecp_nistz256_mul_montq
2769
2770	movq	416(%rsp),%rax
2771	leaq	416(%rsp),%rbx
2772	movq	0+224(%rsp),%r9
2773	movq	8+224(%rsp),%r10
2774	leaq	0+224(%rsp),%rsi
2775	movq	16+224(%rsp),%r11
2776	movq	24+224(%rsp),%r12
2777	leaq	224(%rsp),%rdi
2778	call	__ecp_nistz256_mul_montq
2779
2780	movq	512(%rsp),%rax
2781	leaq	512(%rsp),%rbx
2782	movq	0+256(%rsp),%r9
2783	movq	8+256(%rsp),%r10
2784	leaq	0+256(%rsp),%rsi
2785	movq	16+256(%rsp),%r11
2786	movq	24+256(%rsp),%r12
2787	leaq	256(%rsp),%rdi
2788	call	__ecp_nistz256_mul_montq
2789
2790	leaq	224(%rsp),%rbx
2791	leaq	64(%rsp),%rdi
2792	call	__ecp_nistz256_sub_fromq
2793
2794	orq	%r13,%r12
2795	movdqa	%xmm4,%xmm2
2796	orq	%r8,%r12
2797	orq	%r9,%r12
2798	por	%xmm5,%xmm2
2799.byte	102,73,15,110,220
2800
2801	movq	384(%rsp),%rax
2802	leaq	384(%rsp),%rbx
2803	movq	0+96(%rsp),%r9
2804	movq	8+96(%rsp),%r10
2805	leaq	0+96(%rsp),%rsi
2806	movq	16+96(%rsp),%r11
2807	movq	24+96(%rsp),%r12
2808	leaq	160(%rsp),%rdi
2809	call	__ecp_nistz256_mul_montq
2810
2811	movq	480(%rsp),%rax
2812	leaq	480(%rsp),%rbx
2813	movq	0+32(%rsp),%r9
2814	movq	8+32(%rsp),%r10
2815	leaq	0+32(%rsp),%rsi
2816	movq	16+32(%rsp),%r11
2817	movq	24+32(%rsp),%r12
2818	leaq	192(%rsp),%rdi
2819	call	__ecp_nistz256_mul_montq
2820
2821	leaq	160(%rsp),%rbx
2822	leaq	0(%rsp),%rdi
2823	call	__ecp_nistz256_sub_fromq
2824
2825	orq	%r13,%r12
2826	orq	%r8,%r12
2827	orq	%r9,%r12
2828
2829.byte	102,73,15,126,208
2830.byte	102,73,15,126,217
2831	orq	%r8,%r12
2832.byte	0x3e
2833	jnz	.Ladd_proceedq
2834
2835
2836
2837	testq	%r9,%r9
2838	jz	.Ladd_doubleq
2839
2840
2841
2842
2843
2844
2845.byte	102,72,15,126,199
2846	pxor	%xmm0,%xmm0
2847	movdqu	%xmm0,0(%rdi)
2848	movdqu	%xmm0,16(%rdi)
2849	movdqu	%xmm0,32(%rdi)
2850	movdqu	%xmm0,48(%rdi)
2851	movdqu	%xmm0,64(%rdi)
2852	movdqu	%xmm0,80(%rdi)
2853	jmp	.Ladd_doneq
2854
2855.align	32
2856.Ladd_doubleq:
2857.byte	102,72,15,126,206
2858.byte	102,72,15,126,199
2859	addq	$416,%rsp
2860.cfi_adjust_cfa_offset	-416
2861	jmp	.Lpoint_double_shortcutq
2862.cfi_adjust_cfa_offset	416
2863
2864.align	32
2865.Ladd_proceedq:
2866	movq	0+64(%rsp),%rax
2867	movq	8+64(%rsp),%r14
2868	leaq	0+64(%rsp),%rsi
2869	movq	16+64(%rsp),%r15
2870	movq	24+64(%rsp),%r8
2871	leaq	96(%rsp),%rdi
2872	call	__ecp_nistz256_sqr_montq
2873
2874	movq	448(%rsp),%rax
2875	leaq	448(%rsp),%rbx
2876	movq	0+0(%rsp),%r9
2877	movq	8+0(%rsp),%r10
2878	leaq	0+0(%rsp),%rsi
2879	movq	16+0(%rsp),%r11
2880	movq	24+0(%rsp),%r12
2881	leaq	352(%rsp),%rdi
2882	call	__ecp_nistz256_mul_montq
2883
2884	movq	0+0(%rsp),%rax
2885	movq	8+0(%rsp),%r14
2886	leaq	0+0(%rsp),%rsi
2887	movq	16+0(%rsp),%r15
2888	movq	24+0(%rsp),%r8
2889	leaq	32(%rsp),%rdi
2890	call	__ecp_nistz256_sqr_montq
2891
2892	movq	544(%rsp),%rax
2893	leaq	544(%rsp),%rbx
2894	movq	0+352(%rsp),%r9
2895	movq	8+352(%rsp),%r10
2896	leaq	0+352(%rsp),%rsi
2897	movq	16+352(%rsp),%r11
2898	movq	24+352(%rsp),%r12
2899	leaq	352(%rsp),%rdi
2900	call	__ecp_nistz256_mul_montq
2901
2902	movq	0(%rsp),%rax
2903	leaq	0(%rsp),%rbx
2904	movq	0+32(%rsp),%r9
2905	movq	8+32(%rsp),%r10
2906	leaq	0+32(%rsp),%rsi
2907	movq	16+32(%rsp),%r11
2908	movq	24+32(%rsp),%r12
2909	leaq	128(%rsp),%rdi
2910	call	__ecp_nistz256_mul_montq
2911
2912	movq	160(%rsp),%rax
2913	leaq	160(%rsp),%rbx
2914	movq	0+32(%rsp),%r9
2915	movq	8+32(%rsp),%r10
2916	leaq	0+32(%rsp),%rsi
2917	movq	16+32(%rsp),%r11
2918	movq	24+32(%rsp),%r12
2919	leaq	192(%rsp),%rdi
2920	call	__ecp_nistz256_mul_montq
2921
2922
2923
2924
2925	xorq	%r11,%r11
2926	addq	%r12,%r12
2927	leaq	96(%rsp),%rsi
2928	adcq	%r13,%r13
2929	movq	%r12,%rax
2930	adcq	%r8,%r8
2931	adcq	%r9,%r9
2932	movq	%r13,%rbp
2933	adcq	$0,%r11
2934
2935	subq	$-1,%r12
2936	movq	%r8,%rcx
2937	sbbq	%r14,%r13
2938	sbbq	$0,%r8
2939	movq	%r9,%r10
2940	sbbq	%r15,%r9
2941	sbbq	$0,%r11
2942
2943	cmovcq	%rax,%r12
2944	movq	0(%rsi),%rax
2945	cmovcq	%rbp,%r13
2946	movq	8(%rsi),%rbp
2947	cmovcq	%rcx,%r8
2948	movq	16(%rsi),%rcx
2949	cmovcq	%r10,%r9
2950	movq	24(%rsi),%r10
2951
2952	call	__ecp_nistz256_subq
2953
2954	leaq	128(%rsp),%rbx
2955	leaq	288(%rsp),%rdi
2956	call	__ecp_nistz256_sub_fromq
2957
2958	movq	192+0(%rsp),%rax
2959	movq	192+8(%rsp),%rbp
2960	movq	192+16(%rsp),%rcx
2961	movq	192+24(%rsp),%r10
2962	leaq	320(%rsp),%rdi
2963
2964	call	__ecp_nistz256_subq
2965
2966	movq	%r12,0(%rdi)
2967	movq	%r13,8(%rdi)
2968	movq	%r8,16(%rdi)
2969	movq	%r9,24(%rdi)
2970	movq	128(%rsp),%rax
2971	leaq	128(%rsp),%rbx
2972	movq	0+224(%rsp),%r9
2973	movq	8+224(%rsp),%r10
2974	leaq	0+224(%rsp),%rsi
2975	movq	16+224(%rsp),%r11
2976	movq	24+224(%rsp),%r12
2977	leaq	256(%rsp),%rdi
2978	call	__ecp_nistz256_mul_montq
2979
2980	movq	320(%rsp),%rax
2981	leaq	320(%rsp),%rbx
2982	movq	0+64(%rsp),%r9
2983	movq	8+64(%rsp),%r10
2984	leaq	0+64(%rsp),%rsi
2985	movq	16+64(%rsp),%r11
2986	movq	24+64(%rsp),%r12
2987	leaq	320(%rsp),%rdi
2988	call	__ecp_nistz256_mul_montq
2989
2990	leaq	256(%rsp),%rbx
2991	leaq	320(%rsp),%rdi
2992	call	__ecp_nistz256_sub_fromq
2993
2994.byte	102,72,15,126,199
2995
2996	movdqa	%xmm5,%xmm0
2997	movdqa	%xmm5,%xmm1
2998	pandn	352(%rsp),%xmm0
2999	movdqa	%xmm5,%xmm2
3000	pandn	352+16(%rsp),%xmm1
3001	movdqa	%xmm5,%xmm3
3002	pand	544(%rsp),%xmm2
3003	pand	544+16(%rsp),%xmm3
3004	por	%xmm0,%xmm2
3005	por	%xmm1,%xmm3
3006
3007	movdqa	%xmm4,%xmm0
3008	movdqa	%xmm4,%xmm1
3009	pandn	%xmm2,%xmm0
3010	movdqa	%xmm4,%xmm2
3011	pandn	%xmm3,%xmm1
3012	movdqa	%xmm4,%xmm3
3013	pand	448(%rsp),%xmm2
3014	pand	448+16(%rsp),%xmm3
3015	por	%xmm0,%xmm2
3016	por	%xmm1,%xmm3
3017	movdqu	%xmm2,64(%rdi)
3018	movdqu	%xmm3,80(%rdi)
3019
3020	movdqa	%xmm5,%xmm0
3021	movdqa	%xmm5,%xmm1
3022	pandn	288(%rsp),%xmm0
3023	movdqa	%xmm5,%xmm2
3024	pandn	288+16(%rsp),%xmm1
3025	movdqa	%xmm5,%xmm3
3026	pand	480(%rsp),%xmm2
3027	pand	480+16(%rsp),%xmm3
3028	por	%xmm0,%xmm2
3029	por	%xmm1,%xmm3
3030
3031	movdqa	%xmm4,%xmm0
3032	movdqa	%xmm4,%xmm1
3033	pandn	%xmm2,%xmm0
3034	movdqa	%xmm4,%xmm2
3035	pandn	%xmm3,%xmm1
3036	movdqa	%xmm4,%xmm3
3037	pand	384(%rsp),%xmm2
3038	pand	384+16(%rsp),%xmm3
3039	por	%xmm0,%xmm2
3040	por	%xmm1,%xmm3
3041	movdqu	%xmm2,0(%rdi)
3042	movdqu	%xmm3,16(%rdi)
3043
3044	movdqa	%xmm5,%xmm0
3045	movdqa	%xmm5,%xmm1
3046	pandn	320(%rsp),%xmm0
3047	movdqa	%xmm5,%xmm2
3048	pandn	320+16(%rsp),%xmm1
3049	movdqa	%xmm5,%xmm3
3050	pand	512(%rsp),%xmm2
3051	pand	512+16(%rsp),%xmm3
3052	por	%xmm0,%xmm2
3053	por	%xmm1,%xmm3
3054
3055	movdqa	%xmm4,%xmm0
3056	movdqa	%xmm4,%xmm1
3057	pandn	%xmm2,%xmm0
3058	movdqa	%xmm4,%xmm2
3059	pandn	%xmm3,%xmm1
3060	movdqa	%xmm4,%xmm3
3061	pand	416(%rsp),%xmm2
3062	pand	416+16(%rsp),%xmm3
3063	por	%xmm0,%xmm2
3064	por	%xmm1,%xmm3
3065	movdqu	%xmm2,32(%rdi)
3066	movdqu	%xmm3,48(%rdi)
3067
3068.Ladd_doneq:
3069	leaq	576+56(%rsp),%rsi
3070.cfi_def_cfa	%rsi,8
3071	movq	-48(%rsi),%r15
3072.cfi_restore	%r15
3073	movq	-40(%rsi),%r14
3074.cfi_restore	%r14
3075	movq	-32(%rsi),%r13
3076.cfi_restore	%r13
3077	movq	-24(%rsi),%r12
3078.cfi_restore	%r12
3079	movq	-16(%rsi),%rbx
3080.cfi_restore	%rbx
3081	movq	-8(%rsi),%rbp
3082.cfi_restore	%rbp
3083	leaq	(%rsi),%rsp
3084.cfi_def_cfa_register	%rsp
3085.Lpoint_addq_epilogue:
3086	ret
3087.cfi_endproc
3088.size	ecp_nistz256_point_add,.-ecp_nistz256_point_add
3089.globl	ecp_nistz256_point_add_affine
3090.hidden ecp_nistz256_point_add_affine
3091.type	ecp_nistz256_point_add_affine,@function
3092.align	32
3093ecp_nistz256_point_add_affine:
3094.cfi_startproc
3095_CET_ENDBR
3096	leaq	OPENSSL_ia32cap_P(%rip),%rcx
3097	movq	8(%rcx),%rcx
3098	andl	$0x80100,%ecx
3099	cmpl	$0x80100,%ecx
3100	je	.Lpoint_add_affinex
3101	pushq	%rbp
3102.cfi_adjust_cfa_offset	8
3103.cfi_offset	%rbp,-16
3104	pushq	%rbx
3105.cfi_adjust_cfa_offset	8
3106.cfi_offset	%rbx,-24
3107	pushq	%r12
3108.cfi_adjust_cfa_offset	8
3109.cfi_offset	%r12,-32
3110	pushq	%r13
3111.cfi_adjust_cfa_offset	8
3112.cfi_offset	%r13,-40
3113	pushq	%r14
3114.cfi_adjust_cfa_offset	8
3115.cfi_offset	%r14,-48
3116	pushq	%r15
3117.cfi_adjust_cfa_offset	8
3118.cfi_offset	%r15,-56
3119	subq	$480+8,%rsp
3120.cfi_adjust_cfa_offset	32*15+8
3121.Ladd_affineq_body:
3122
3123	movdqu	0(%rsi),%xmm0
3124	movq	%rdx,%rbx
3125	movdqu	16(%rsi),%xmm1
3126	movdqu	32(%rsi),%xmm2
3127	movdqu	48(%rsi),%xmm3
3128	movdqu	64(%rsi),%xmm4
3129	movdqu	80(%rsi),%xmm5
3130	movq	64+0(%rsi),%rax
3131	movq	64+8(%rsi),%r14
3132	movq	64+16(%rsi),%r15
3133	movq	64+24(%rsi),%r8
3134	movdqa	%xmm0,320(%rsp)
3135	movdqa	%xmm1,320+16(%rsp)
3136	movdqa	%xmm2,352(%rsp)
3137	movdqa	%xmm3,352+16(%rsp)
3138	movdqa	%xmm4,384(%rsp)
3139	movdqa	%xmm5,384+16(%rsp)
3140	por	%xmm4,%xmm5
3141
3142	movdqu	0(%rbx),%xmm0
3143	pshufd	$0xb1,%xmm5,%xmm3
3144	movdqu	16(%rbx),%xmm1
3145	movdqu	32(%rbx),%xmm2
3146	por	%xmm3,%xmm5
3147	movdqu	48(%rbx),%xmm3
3148	movdqa	%xmm0,416(%rsp)
3149	pshufd	$0x1e,%xmm5,%xmm4
3150	movdqa	%xmm1,416+16(%rsp)
3151	por	%xmm0,%xmm1
3152.byte	102,72,15,110,199
3153	movdqa	%xmm2,448(%rsp)
3154	movdqa	%xmm3,448+16(%rsp)
3155	por	%xmm2,%xmm3
3156	por	%xmm4,%xmm5
3157	pxor	%xmm4,%xmm4
3158	por	%xmm1,%xmm3
3159
3160	leaq	64-0(%rsi),%rsi
3161	leaq	32(%rsp),%rdi
3162	call	__ecp_nistz256_sqr_montq
3163
3164	pcmpeqd	%xmm4,%xmm5
3165	pshufd	$0xb1,%xmm3,%xmm4
3166	movq	0(%rbx),%rax
3167
3168	movq	%r12,%r9
3169	por	%xmm3,%xmm4
3170	pshufd	$0,%xmm5,%xmm5
3171	pshufd	$0x1e,%xmm4,%xmm3
3172	movq	%r13,%r10
3173	por	%xmm3,%xmm4
3174	pxor	%xmm3,%xmm3
3175	movq	%r14,%r11
3176	pcmpeqd	%xmm3,%xmm4
3177	pshufd	$0,%xmm4,%xmm4
3178
3179	leaq	32-0(%rsp),%rsi
3180	movq	%r15,%r12
3181	leaq	0(%rsp),%rdi
3182	call	__ecp_nistz256_mul_montq
3183
3184	leaq	320(%rsp),%rbx
3185	leaq	64(%rsp),%rdi
3186	call	__ecp_nistz256_sub_fromq
3187
3188	movq	384(%rsp),%rax
3189	leaq	384(%rsp),%rbx
3190	movq	0+32(%rsp),%r9
3191	movq	8+32(%rsp),%r10
3192	leaq	0+32(%rsp),%rsi
3193	movq	16+32(%rsp),%r11
3194	movq	24+32(%rsp),%r12
3195	leaq	32(%rsp),%rdi
3196	call	__ecp_nistz256_mul_montq
3197
3198	movq	384(%rsp),%rax
3199	leaq	384(%rsp),%rbx
3200	movq	0+64(%rsp),%r9
3201	movq	8+64(%rsp),%r10
3202	leaq	0+64(%rsp),%rsi
3203	movq	16+64(%rsp),%r11
3204	movq	24+64(%rsp),%r12
3205	leaq	288(%rsp),%rdi
3206	call	__ecp_nistz256_mul_montq
3207
3208	movq	448(%rsp),%rax
3209	leaq	448(%rsp),%rbx
3210	movq	0+32(%rsp),%r9
3211	movq	8+32(%rsp),%r10
3212	leaq	0+32(%rsp),%rsi
3213	movq	16+32(%rsp),%r11
3214	movq	24+32(%rsp),%r12
3215	leaq	32(%rsp),%rdi
3216	call	__ecp_nistz256_mul_montq
3217
3218	leaq	352(%rsp),%rbx
3219	leaq	96(%rsp),%rdi
3220	call	__ecp_nistz256_sub_fromq
3221
3222	movq	0+64(%rsp),%rax
3223	movq	8+64(%rsp),%r14
3224	leaq	0+64(%rsp),%rsi
3225	movq	16+64(%rsp),%r15
3226	movq	24+64(%rsp),%r8
3227	leaq	128(%rsp),%rdi
3228	call	__ecp_nistz256_sqr_montq
3229
3230	movq	0+96(%rsp),%rax
3231	movq	8+96(%rsp),%r14
3232	leaq	0+96(%rsp),%rsi
3233	movq	16+96(%rsp),%r15
3234	movq	24+96(%rsp),%r8
3235	leaq	192(%rsp),%rdi
3236	call	__ecp_nistz256_sqr_montq
3237
3238	movq	128(%rsp),%rax
3239	leaq	128(%rsp),%rbx
3240	movq	0+64(%rsp),%r9
3241	movq	8+64(%rsp),%r10
3242	leaq	0+64(%rsp),%rsi
3243	movq	16+64(%rsp),%r11
3244	movq	24+64(%rsp),%r12
3245	leaq	160(%rsp),%rdi
3246	call	__ecp_nistz256_mul_montq
3247
3248	movq	320(%rsp),%rax
3249	leaq	320(%rsp),%rbx
3250	movq	0+128(%rsp),%r9
3251	movq	8+128(%rsp),%r10
3252	leaq	0+128(%rsp),%rsi
3253	movq	16+128(%rsp),%r11
3254	movq	24+128(%rsp),%r12
3255	leaq	0(%rsp),%rdi
3256	call	__ecp_nistz256_mul_montq
3257
3258
3259
3260
3261	xorq	%r11,%r11
3262	addq	%r12,%r12
3263	leaq	192(%rsp),%rsi
3264	adcq	%r13,%r13
3265	movq	%r12,%rax
3266	adcq	%r8,%r8
3267	adcq	%r9,%r9
3268	movq	%r13,%rbp
3269	adcq	$0,%r11
3270
3271	subq	$-1,%r12
3272	movq	%r8,%rcx
3273	sbbq	%r14,%r13
3274	sbbq	$0,%r8
3275	movq	%r9,%r10
3276	sbbq	%r15,%r9
3277	sbbq	$0,%r11
3278
3279	cmovcq	%rax,%r12
3280	movq	0(%rsi),%rax
3281	cmovcq	%rbp,%r13
3282	movq	8(%rsi),%rbp
3283	cmovcq	%rcx,%r8
3284	movq	16(%rsi),%rcx
3285	cmovcq	%r10,%r9
3286	movq	24(%rsi),%r10
3287
3288	call	__ecp_nistz256_subq
3289
3290	leaq	160(%rsp),%rbx
3291	leaq	224(%rsp),%rdi
3292	call	__ecp_nistz256_sub_fromq
3293
3294	movq	0+0(%rsp),%rax
3295	movq	0+8(%rsp),%rbp
3296	movq	0+16(%rsp),%rcx
3297	movq	0+24(%rsp),%r10
3298	leaq	64(%rsp),%rdi
3299
3300	call	__ecp_nistz256_subq
3301
3302	movq	%r12,0(%rdi)
3303	movq	%r13,8(%rdi)
3304	movq	%r8,16(%rdi)
3305	movq	%r9,24(%rdi)
3306	movq	352(%rsp),%rax
3307	leaq	352(%rsp),%rbx
3308	movq	0+160(%rsp),%r9
3309	movq	8+160(%rsp),%r10
3310	leaq	0+160(%rsp),%rsi
3311	movq	16+160(%rsp),%r11
3312	movq	24+160(%rsp),%r12
3313	leaq	32(%rsp),%rdi
3314	call	__ecp_nistz256_mul_montq
3315
3316	movq	96(%rsp),%rax
3317	leaq	96(%rsp),%rbx
3318	movq	0+64(%rsp),%r9
3319	movq	8+64(%rsp),%r10
3320	leaq	0+64(%rsp),%rsi
3321	movq	16+64(%rsp),%r11
3322	movq	24+64(%rsp),%r12
3323	leaq	64(%rsp),%rdi
3324	call	__ecp_nistz256_mul_montq
3325
3326	leaq	32(%rsp),%rbx
3327	leaq	256(%rsp),%rdi
3328	call	__ecp_nistz256_sub_fromq
3329
3330.byte	102,72,15,126,199
3331
3332	movdqa	%xmm5,%xmm0
3333	movdqa	%xmm5,%xmm1
3334	pandn	288(%rsp),%xmm0
3335	movdqa	%xmm5,%xmm2
3336	pandn	288+16(%rsp),%xmm1
3337	movdqa	%xmm5,%xmm3
3338	pand	.LONE_mont(%rip),%xmm2
3339	pand	.LONE_mont+16(%rip),%xmm3
3340	por	%xmm0,%xmm2
3341	por	%xmm1,%xmm3
3342
3343	movdqa	%xmm4,%xmm0
3344	movdqa	%xmm4,%xmm1
3345	pandn	%xmm2,%xmm0
3346	movdqa	%xmm4,%xmm2
3347	pandn	%xmm3,%xmm1
3348	movdqa	%xmm4,%xmm3
3349	pand	384(%rsp),%xmm2
3350	pand	384+16(%rsp),%xmm3
3351	por	%xmm0,%xmm2
3352	por	%xmm1,%xmm3
3353	movdqu	%xmm2,64(%rdi)
3354	movdqu	%xmm3,80(%rdi)
3355
3356	movdqa	%xmm5,%xmm0
3357	movdqa	%xmm5,%xmm1
3358	pandn	224(%rsp),%xmm0
3359	movdqa	%xmm5,%xmm2
3360	pandn	224+16(%rsp),%xmm1
3361	movdqa	%xmm5,%xmm3
3362	pand	416(%rsp),%xmm2
3363	pand	416+16(%rsp),%xmm3
3364	por	%xmm0,%xmm2
3365	por	%xmm1,%xmm3
3366
3367	movdqa	%xmm4,%xmm0
3368	movdqa	%xmm4,%xmm1
3369	pandn	%xmm2,%xmm0
3370	movdqa	%xmm4,%xmm2
3371	pandn	%xmm3,%xmm1
3372	movdqa	%xmm4,%xmm3
3373	pand	320(%rsp),%xmm2
3374	pand	320+16(%rsp),%xmm3
3375	por	%xmm0,%xmm2
3376	por	%xmm1,%xmm3
3377	movdqu	%xmm2,0(%rdi)
3378	movdqu	%xmm3,16(%rdi)
3379
3380	movdqa	%xmm5,%xmm0
3381	movdqa	%xmm5,%xmm1
3382	pandn	256(%rsp),%xmm0
3383	movdqa	%xmm5,%xmm2
3384	pandn	256+16(%rsp),%xmm1
3385	movdqa	%xmm5,%xmm3
3386	pand	448(%rsp),%xmm2
3387	pand	448+16(%rsp),%xmm3
3388	por	%xmm0,%xmm2
3389	por	%xmm1,%xmm3
3390
3391	movdqa	%xmm4,%xmm0
3392	movdqa	%xmm4,%xmm1
3393	pandn	%xmm2,%xmm0
3394	movdqa	%xmm4,%xmm2
3395	pandn	%xmm3,%xmm1
3396	movdqa	%xmm4,%xmm3
3397	pand	352(%rsp),%xmm2
3398	pand	352+16(%rsp),%xmm3
3399	por	%xmm0,%xmm2
3400	por	%xmm1,%xmm3
3401	movdqu	%xmm2,32(%rdi)
3402	movdqu	%xmm3,48(%rdi)
3403
3404	leaq	480+56(%rsp),%rsi
3405.cfi_def_cfa	%rsi,8
3406	movq	-48(%rsi),%r15
3407.cfi_restore	%r15
3408	movq	-40(%rsi),%r14
3409.cfi_restore	%r14
3410	movq	-32(%rsi),%r13
3411.cfi_restore	%r13
3412	movq	-24(%rsi),%r12
3413.cfi_restore	%r12
3414	movq	-16(%rsi),%rbx
3415.cfi_restore	%rbx
3416	movq	-8(%rsi),%rbp
3417.cfi_restore	%rbp
3418	leaq	(%rsi),%rsp
3419.cfi_def_cfa_register	%rsp
3420.Ladd_affineq_epilogue:
3421	ret
3422.cfi_endproc
3423.size	ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
3424.type	__ecp_nistz256_add_tox,@function
3425.align	32
3426__ecp_nistz256_add_tox:
3427.cfi_startproc
3428	xorq	%r11,%r11
3429	adcq	0(%rbx),%r12
3430	adcq	8(%rbx),%r13
3431	movq	%r12,%rax
3432	adcq	16(%rbx),%r8
3433	adcq	24(%rbx),%r9
3434	movq	%r13,%rbp
3435	adcq	$0,%r11
3436
3437	xorq	%r10,%r10
3438	sbbq	$-1,%r12
3439	movq	%r8,%rcx
3440	sbbq	%r14,%r13
3441	sbbq	$0,%r8
3442	movq	%r9,%r10
3443	sbbq	%r15,%r9
3444	sbbq	$0,%r11
3445
3446	cmovcq	%rax,%r12
3447	cmovcq	%rbp,%r13
3448	movq	%r12,0(%rdi)
3449	cmovcq	%rcx,%r8
3450	movq	%r13,8(%rdi)
3451	cmovcq	%r10,%r9
3452	movq	%r8,16(%rdi)
3453	movq	%r9,24(%rdi)
3454
3455	ret
3456.cfi_endproc
3457.size	__ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
3458
3459.type	__ecp_nistz256_sub_fromx,@function
3460.align	32
3461__ecp_nistz256_sub_fromx:
3462.cfi_startproc
3463	xorq	%r11,%r11
3464	sbbq	0(%rbx),%r12
3465	sbbq	8(%rbx),%r13
3466	movq	%r12,%rax
3467	sbbq	16(%rbx),%r8
3468	sbbq	24(%rbx),%r9
3469	movq	%r13,%rbp
3470	sbbq	$0,%r11
3471
3472	xorq	%r10,%r10
3473	adcq	$-1,%r12
3474	movq	%r8,%rcx
3475	adcq	%r14,%r13
3476	adcq	$0,%r8
3477	movq	%r9,%r10
3478	adcq	%r15,%r9
3479
3480	btq	$0,%r11
3481	cmovncq	%rax,%r12
3482	cmovncq	%rbp,%r13
3483	movq	%r12,0(%rdi)
3484	cmovncq	%rcx,%r8
3485	movq	%r13,8(%rdi)
3486	cmovncq	%r10,%r9
3487	movq	%r8,16(%rdi)
3488	movq	%r9,24(%rdi)
3489
3490	ret
3491.cfi_endproc
3492.size	__ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
3493
3494.type	__ecp_nistz256_subx,@function
3495.align	32
3496__ecp_nistz256_subx:
3497.cfi_startproc
3498	xorq	%r11,%r11
3499	sbbq	%r12,%rax
3500	sbbq	%r13,%rbp
3501	movq	%rax,%r12
3502	sbbq	%r8,%rcx
3503	sbbq	%r9,%r10
3504	movq	%rbp,%r13
3505	sbbq	$0,%r11
3506
3507	xorq	%r9,%r9
3508	adcq	$-1,%rax
3509	movq	%rcx,%r8
3510	adcq	%r14,%rbp
3511	adcq	$0,%rcx
3512	movq	%r10,%r9
3513	adcq	%r15,%r10
3514
3515	btq	$0,%r11
3516	cmovcq	%rax,%r12
3517	cmovcq	%rbp,%r13
3518	cmovcq	%rcx,%r8
3519	cmovcq	%r10,%r9
3520
3521	ret
3522.cfi_endproc
3523.size	__ecp_nistz256_subx,.-__ecp_nistz256_subx
3524
3525.type	__ecp_nistz256_mul_by_2x,@function
3526.align	32
3527__ecp_nistz256_mul_by_2x:
3528.cfi_startproc
3529	xorq	%r11,%r11
3530	adcq	%r12,%r12
3531	adcq	%r13,%r13
3532	movq	%r12,%rax
3533	adcq	%r8,%r8
3534	adcq	%r9,%r9
3535	movq	%r13,%rbp
3536	adcq	$0,%r11
3537
3538	xorq	%r10,%r10
3539	sbbq	$-1,%r12
3540	movq	%r8,%rcx
3541	sbbq	%r14,%r13
3542	sbbq	$0,%r8
3543	movq	%r9,%r10
3544	sbbq	%r15,%r9
3545	sbbq	$0,%r11
3546
3547	cmovcq	%rax,%r12
3548	cmovcq	%rbp,%r13
3549	movq	%r12,0(%rdi)
3550	cmovcq	%rcx,%r8
3551	movq	%r13,8(%rdi)
3552	cmovcq	%r10,%r9
3553	movq	%r8,16(%rdi)
3554	movq	%r9,24(%rdi)
3555
3556	ret
3557.cfi_endproc
3558.size	__ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
3559.type	ecp_nistz256_point_doublex,@function
3560.align	32
3561ecp_nistz256_point_doublex:
3562.cfi_startproc
3563.Lpoint_doublex:
3564	pushq	%rbp
3565.cfi_adjust_cfa_offset	8
3566.cfi_offset	%rbp,-16
3567	pushq	%rbx
3568.cfi_adjust_cfa_offset	8
3569.cfi_offset	%rbx,-24
3570	pushq	%r12
3571.cfi_adjust_cfa_offset	8
3572.cfi_offset	%r12,-32
3573	pushq	%r13
3574.cfi_adjust_cfa_offset	8
3575.cfi_offset	%r13,-40
3576	pushq	%r14
3577.cfi_adjust_cfa_offset	8
3578.cfi_offset	%r14,-48
3579	pushq	%r15
3580.cfi_adjust_cfa_offset	8
3581.cfi_offset	%r15,-56
3582	subq	$160+8,%rsp
3583.cfi_adjust_cfa_offset	32*5+8
3584.Lpoint_doublex_body:
3585
3586.Lpoint_double_shortcutx:
3587	movdqu	0(%rsi),%xmm0
3588	movq	%rsi,%rbx
3589	movdqu	16(%rsi),%xmm1
3590	movq	32+0(%rsi),%r12
3591	movq	32+8(%rsi),%r13
3592	movq	32+16(%rsi),%r8
3593	movq	32+24(%rsi),%r9
3594	movq	.Lpoly+8(%rip),%r14
3595	movq	.Lpoly+24(%rip),%r15
3596	movdqa	%xmm0,96(%rsp)
3597	movdqa	%xmm1,96+16(%rsp)
3598	leaq	32(%rdi),%r10
3599	leaq	64(%rdi),%r11
3600.byte	102,72,15,110,199
3601.byte	102,73,15,110,202
3602.byte	102,73,15,110,211
3603
3604	leaq	0(%rsp),%rdi
3605	call	__ecp_nistz256_mul_by_2x
3606
3607	movq	64+0(%rsi),%rdx
3608	movq	64+8(%rsi),%r14
3609	movq	64+16(%rsi),%r15
3610	movq	64+24(%rsi),%r8
3611	leaq	64-128(%rsi),%rsi
3612	leaq	64(%rsp),%rdi
3613	call	__ecp_nistz256_sqr_montx
3614
3615	movq	0+0(%rsp),%rdx
3616	movq	8+0(%rsp),%r14
3617	leaq	-128+0(%rsp),%rsi
3618	movq	16+0(%rsp),%r15
3619	movq	24+0(%rsp),%r8
3620	leaq	0(%rsp),%rdi
3621	call	__ecp_nistz256_sqr_montx
3622
3623	movq	32(%rbx),%rdx
3624	movq	64+0(%rbx),%r9
3625	movq	64+8(%rbx),%r10
3626	movq	64+16(%rbx),%r11
3627	movq	64+24(%rbx),%r12
3628	leaq	64-128(%rbx),%rsi
3629	leaq	32(%rbx),%rbx
3630.byte	102,72,15,126,215
3631	call	__ecp_nistz256_mul_montx
3632	call	__ecp_nistz256_mul_by_2x
3633
3634	movq	96+0(%rsp),%r12
3635	movq	96+8(%rsp),%r13
3636	leaq	64(%rsp),%rbx
3637	movq	96+16(%rsp),%r8
3638	movq	96+24(%rsp),%r9
3639	leaq	32(%rsp),%rdi
3640	call	__ecp_nistz256_add_tox
3641
3642	movq	96+0(%rsp),%r12
3643	movq	96+8(%rsp),%r13
3644	leaq	64(%rsp),%rbx
3645	movq	96+16(%rsp),%r8
3646	movq	96+24(%rsp),%r9
3647	leaq	64(%rsp),%rdi
3648	call	__ecp_nistz256_sub_fromx
3649
3650	movq	0+0(%rsp),%rdx
3651	movq	8+0(%rsp),%r14
3652	leaq	-128+0(%rsp),%rsi
3653	movq	16+0(%rsp),%r15
3654	movq	24+0(%rsp),%r8
3655.byte	102,72,15,126,207
3656	call	__ecp_nistz256_sqr_montx
3657	xorq	%r9,%r9
3658	movq	%r12,%rax
3659	addq	$-1,%r12
3660	movq	%r13,%r10
3661	adcq	%rsi,%r13
3662	movq	%r14,%rcx
3663	adcq	$0,%r14
3664	movq	%r15,%r8
3665	adcq	%rbp,%r15
3666	adcq	$0,%r9
3667	xorq	%rsi,%rsi
3668	testq	$1,%rax
3669
3670	cmovzq	%rax,%r12
3671	cmovzq	%r10,%r13
3672	cmovzq	%rcx,%r14
3673	cmovzq	%r8,%r15
3674	cmovzq	%rsi,%r9
3675
3676	movq	%r13,%rax
3677	shrq	$1,%r12
3678	shlq	$63,%rax
3679	movq	%r14,%r10
3680	shrq	$1,%r13
3681	orq	%rax,%r12
3682	shlq	$63,%r10
3683	movq	%r15,%rcx
3684	shrq	$1,%r14
3685	orq	%r10,%r13
3686	shlq	$63,%rcx
3687	movq	%r12,0(%rdi)
3688	shrq	$1,%r15
3689	movq	%r13,8(%rdi)
3690	shlq	$63,%r9
3691	orq	%rcx,%r14
3692	orq	%r9,%r15
3693	movq	%r14,16(%rdi)
3694	movq	%r15,24(%rdi)
3695	movq	64(%rsp),%rdx
3696	leaq	64(%rsp),%rbx
3697	movq	0+32(%rsp),%r9
3698	movq	8+32(%rsp),%r10
3699	leaq	-128+32(%rsp),%rsi
3700	movq	16+32(%rsp),%r11
3701	movq	24+32(%rsp),%r12
3702	leaq	32(%rsp),%rdi
3703	call	__ecp_nistz256_mul_montx
3704
3705	leaq	128(%rsp),%rdi
3706	call	__ecp_nistz256_mul_by_2x
3707
3708	leaq	32(%rsp),%rbx
3709	leaq	32(%rsp),%rdi
3710	call	__ecp_nistz256_add_tox
3711
3712	movq	96(%rsp),%rdx
3713	leaq	96(%rsp),%rbx
3714	movq	0+0(%rsp),%r9
3715	movq	8+0(%rsp),%r10
3716	leaq	-128+0(%rsp),%rsi
3717	movq	16+0(%rsp),%r11
3718	movq	24+0(%rsp),%r12
3719	leaq	0(%rsp),%rdi
3720	call	__ecp_nistz256_mul_montx
3721
3722	leaq	128(%rsp),%rdi
3723	call	__ecp_nistz256_mul_by_2x
3724
3725	movq	0+32(%rsp),%rdx
3726	movq	8+32(%rsp),%r14
3727	leaq	-128+32(%rsp),%rsi
3728	movq	16+32(%rsp),%r15
3729	movq	24+32(%rsp),%r8
3730.byte	102,72,15,126,199
3731	call	__ecp_nistz256_sqr_montx
3732
3733	leaq	128(%rsp),%rbx
3734	movq	%r14,%r8
3735	movq	%r15,%r9
3736	movq	%rsi,%r14
3737	movq	%rbp,%r15
3738	call	__ecp_nistz256_sub_fromx
3739
3740	movq	0+0(%rsp),%rax
3741	movq	0+8(%rsp),%rbp
3742	movq	0+16(%rsp),%rcx
3743	movq	0+24(%rsp),%r10
3744	leaq	0(%rsp),%rdi
3745	call	__ecp_nistz256_subx
3746
3747	movq	32(%rsp),%rdx
3748	leaq	32(%rsp),%rbx
3749	movq	%r12,%r14
3750	xorl	%ecx,%ecx
3751	movq	%r12,0+0(%rsp)
3752	movq	%r13,%r10
3753	movq	%r13,0+8(%rsp)
3754	cmovzq	%r8,%r11
3755	movq	%r8,0+16(%rsp)
3756	leaq	0-128(%rsp),%rsi
3757	cmovzq	%r9,%r12
3758	movq	%r9,0+24(%rsp)
3759	movq	%r14,%r9
3760	leaq	0(%rsp),%rdi
3761	call	__ecp_nistz256_mul_montx
3762
3763.byte	102,72,15,126,203
3764.byte	102,72,15,126,207
3765	call	__ecp_nistz256_sub_fromx
3766
3767	leaq	160+56(%rsp),%rsi
3768.cfi_def_cfa	%rsi,8
3769	movq	-48(%rsi),%r15
3770.cfi_restore	%r15
3771	movq	-40(%rsi),%r14
3772.cfi_restore	%r14
3773	movq	-32(%rsi),%r13
3774.cfi_restore	%r13
3775	movq	-24(%rsi),%r12
3776.cfi_restore	%r12
3777	movq	-16(%rsi),%rbx
3778.cfi_restore	%rbx
3779	movq	-8(%rsi),%rbp
3780.cfi_restore	%rbp
3781	leaq	(%rsi),%rsp
3782.cfi_def_cfa_register	%rsp
3783.Lpoint_doublex_epilogue:
3784	ret
3785.cfi_endproc
3786.size	ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
3787.type	ecp_nistz256_point_addx,@function
3788.align	32
3789ecp_nistz256_point_addx:
3790.cfi_startproc
3791.Lpoint_addx:
3792	pushq	%rbp
3793.cfi_adjust_cfa_offset	8
3794.cfi_offset	%rbp,-16
3795	pushq	%rbx
3796.cfi_adjust_cfa_offset	8
3797.cfi_offset	%rbx,-24
3798	pushq	%r12
3799.cfi_adjust_cfa_offset	8
3800.cfi_offset	%r12,-32
3801	pushq	%r13
3802.cfi_adjust_cfa_offset	8
3803.cfi_offset	%r13,-40
3804	pushq	%r14
3805.cfi_adjust_cfa_offset	8
3806.cfi_offset	%r14,-48
3807	pushq	%r15
3808.cfi_adjust_cfa_offset	8
3809.cfi_offset	%r15,-56
3810	subq	$576+8,%rsp
3811.cfi_adjust_cfa_offset	32*18+8
3812.Lpoint_addx_body:
3813
3814	movdqu	0(%rsi),%xmm0
3815	movdqu	16(%rsi),%xmm1
3816	movdqu	32(%rsi),%xmm2
3817	movdqu	48(%rsi),%xmm3
3818	movdqu	64(%rsi),%xmm4
3819	movdqu	80(%rsi),%xmm5
3820	movq	%rsi,%rbx
3821	movq	%rdx,%rsi
3822	movdqa	%xmm0,384(%rsp)
3823	movdqa	%xmm1,384+16(%rsp)
3824	movdqa	%xmm2,416(%rsp)
3825	movdqa	%xmm3,416+16(%rsp)
3826	movdqa	%xmm4,448(%rsp)
3827	movdqa	%xmm5,448+16(%rsp)
3828	por	%xmm4,%xmm5
3829
3830	movdqu	0(%rsi),%xmm0
3831	pshufd	$0xb1,%xmm5,%xmm3
3832	movdqu	16(%rsi),%xmm1
3833	movdqu	32(%rsi),%xmm2
3834	por	%xmm3,%xmm5
3835	movdqu	48(%rsi),%xmm3
3836	movq	64+0(%rsi),%rdx
3837	movq	64+8(%rsi),%r14
3838	movq	64+16(%rsi),%r15
3839	movq	64+24(%rsi),%r8
3840	movdqa	%xmm0,480(%rsp)
3841	pshufd	$0x1e,%xmm5,%xmm4
3842	movdqa	%xmm1,480+16(%rsp)
3843	movdqu	64(%rsi),%xmm0
3844	movdqu	80(%rsi),%xmm1
3845	movdqa	%xmm2,512(%rsp)
3846	movdqa	%xmm3,512+16(%rsp)
3847	por	%xmm4,%xmm5
3848	pxor	%xmm4,%xmm4
3849	por	%xmm0,%xmm1
3850.byte	102,72,15,110,199
3851
3852	leaq	64-128(%rsi),%rsi
3853	movq	%rdx,544+0(%rsp)
3854	movq	%r14,544+8(%rsp)
3855	movq	%r15,544+16(%rsp)
3856	movq	%r8,544+24(%rsp)
3857	leaq	96(%rsp),%rdi
3858	call	__ecp_nistz256_sqr_montx
3859
3860	pcmpeqd	%xmm4,%xmm5
3861	pshufd	$0xb1,%xmm1,%xmm4
3862	por	%xmm1,%xmm4
3863	pshufd	$0,%xmm5,%xmm5
3864	pshufd	$0x1e,%xmm4,%xmm3
3865	por	%xmm3,%xmm4
3866	pxor	%xmm3,%xmm3
3867	pcmpeqd	%xmm3,%xmm4
3868	pshufd	$0,%xmm4,%xmm4
3869	movq	64+0(%rbx),%rdx
3870	movq	64+8(%rbx),%r14
3871	movq	64+16(%rbx),%r15
3872	movq	64+24(%rbx),%r8
3873.byte	102,72,15,110,203
3874
3875	leaq	64-128(%rbx),%rsi
3876	leaq	32(%rsp),%rdi
3877	call	__ecp_nistz256_sqr_montx
3878
3879	movq	544(%rsp),%rdx
3880	leaq	544(%rsp),%rbx
3881	movq	0+96(%rsp),%r9
3882	movq	8+96(%rsp),%r10
3883	leaq	-128+96(%rsp),%rsi
3884	movq	16+96(%rsp),%r11
3885	movq	24+96(%rsp),%r12
3886	leaq	224(%rsp),%rdi
3887	call	__ecp_nistz256_mul_montx
3888
3889	movq	448(%rsp),%rdx
3890	leaq	448(%rsp),%rbx
3891	movq	0+32(%rsp),%r9
3892	movq	8+32(%rsp),%r10
3893	leaq	-128+32(%rsp),%rsi
3894	movq	16+32(%rsp),%r11
3895	movq	24+32(%rsp),%r12
3896	leaq	256(%rsp),%rdi
3897	call	__ecp_nistz256_mul_montx
3898
3899	movq	416(%rsp),%rdx
3900	leaq	416(%rsp),%rbx
3901	movq	0+224(%rsp),%r9
3902	movq	8+224(%rsp),%r10
3903	leaq	-128+224(%rsp),%rsi
3904	movq	16+224(%rsp),%r11
3905	movq	24+224(%rsp),%r12
3906	leaq	224(%rsp),%rdi
3907	call	__ecp_nistz256_mul_montx
3908
3909	movq	512(%rsp),%rdx
3910	leaq	512(%rsp),%rbx
3911	movq	0+256(%rsp),%r9
3912	movq	8+256(%rsp),%r10
3913	leaq	-128+256(%rsp),%rsi
3914	movq	16+256(%rsp),%r11
3915	movq	24+256(%rsp),%r12
3916	leaq	256(%rsp),%rdi
3917	call	__ecp_nistz256_mul_montx
3918
3919	leaq	224(%rsp),%rbx
3920	leaq	64(%rsp),%rdi
3921	call	__ecp_nistz256_sub_fromx
3922
3923	orq	%r13,%r12
3924	movdqa	%xmm4,%xmm2
3925	orq	%r8,%r12
3926	orq	%r9,%r12
3927	por	%xmm5,%xmm2
3928.byte	102,73,15,110,220
3929
3930	movq	384(%rsp),%rdx
3931	leaq	384(%rsp),%rbx
3932	movq	0+96(%rsp),%r9
3933	movq	8+96(%rsp),%r10
3934	leaq	-128+96(%rsp),%rsi
3935	movq	16+96(%rsp),%r11
3936	movq	24+96(%rsp),%r12
3937	leaq	160(%rsp),%rdi
3938	call	__ecp_nistz256_mul_montx
3939
3940	movq	480(%rsp),%rdx
3941	leaq	480(%rsp),%rbx
3942	movq	0+32(%rsp),%r9
3943	movq	8+32(%rsp),%r10
3944	leaq	-128+32(%rsp),%rsi
3945	movq	16+32(%rsp),%r11
3946	movq	24+32(%rsp),%r12
3947	leaq	192(%rsp),%rdi
3948	call	__ecp_nistz256_mul_montx
3949
3950	leaq	160(%rsp),%rbx
3951	leaq	0(%rsp),%rdi
3952	call	__ecp_nistz256_sub_fromx
3953
3954	orq	%r13,%r12
3955	orq	%r8,%r12
3956	orq	%r9,%r12
3957
3958.byte	102,73,15,126,208
3959.byte	102,73,15,126,217
3960	orq	%r8,%r12
3961.byte	0x3e
3962	jnz	.Ladd_proceedx
3963
3964
3965
3966	testq	%r9,%r9
3967	jz	.Ladd_doublex
3968
3969
3970
3971
3972
3973
3974.byte	102,72,15,126,199
3975	pxor	%xmm0,%xmm0
3976	movdqu	%xmm0,0(%rdi)
3977	movdqu	%xmm0,16(%rdi)
3978	movdqu	%xmm0,32(%rdi)
3979	movdqu	%xmm0,48(%rdi)
3980	movdqu	%xmm0,64(%rdi)
3981	movdqu	%xmm0,80(%rdi)
3982	jmp	.Ladd_donex
3983
3984.align	32
3985.Ladd_doublex:
3986.byte	102,72,15,126,206
3987.byte	102,72,15,126,199
3988	addq	$416,%rsp
3989.cfi_adjust_cfa_offset	-416
3990	jmp	.Lpoint_double_shortcutx
3991.cfi_adjust_cfa_offset	416
3992
3993.align	32
3994.Ladd_proceedx:
3995	movq	0+64(%rsp),%rdx
3996	movq	8+64(%rsp),%r14
3997	leaq	-128+64(%rsp),%rsi
3998	movq	16+64(%rsp),%r15
3999	movq	24+64(%rsp),%r8
4000	leaq	96(%rsp),%rdi
4001	call	__ecp_nistz256_sqr_montx
4002
4003	movq	448(%rsp),%rdx
4004	leaq	448(%rsp),%rbx
4005	movq	0+0(%rsp),%r9
4006	movq	8+0(%rsp),%r10
4007	leaq	-128+0(%rsp),%rsi
4008	movq	16+0(%rsp),%r11
4009	movq	24+0(%rsp),%r12
4010	leaq	352(%rsp),%rdi
4011	call	__ecp_nistz256_mul_montx
4012
4013	movq	0+0(%rsp),%rdx
4014	movq	8+0(%rsp),%r14
4015	leaq	-128+0(%rsp),%rsi
4016	movq	16+0(%rsp),%r15
4017	movq	24+0(%rsp),%r8
4018	leaq	32(%rsp),%rdi
4019	call	__ecp_nistz256_sqr_montx
4020
4021	movq	544(%rsp),%rdx
4022	leaq	544(%rsp),%rbx
4023	movq	0+352(%rsp),%r9
4024	movq	8+352(%rsp),%r10
4025	leaq	-128+352(%rsp),%rsi
4026	movq	16+352(%rsp),%r11
4027	movq	24+352(%rsp),%r12
4028	leaq	352(%rsp),%rdi
4029	call	__ecp_nistz256_mul_montx
4030
4031	movq	0(%rsp),%rdx
4032	leaq	0(%rsp),%rbx
4033	movq	0+32(%rsp),%r9
4034	movq	8+32(%rsp),%r10
4035	leaq	-128+32(%rsp),%rsi
4036	movq	16+32(%rsp),%r11
4037	movq	24+32(%rsp),%r12
4038	leaq	128(%rsp),%rdi
4039	call	__ecp_nistz256_mul_montx
4040
4041	movq	160(%rsp),%rdx
4042	leaq	160(%rsp),%rbx
4043	movq	0+32(%rsp),%r9
4044	movq	8+32(%rsp),%r10
4045	leaq	-128+32(%rsp),%rsi
4046	movq	16+32(%rsp),%r11
4047	movq	24+32(%rsp),%r12
4048	leaq	192(%rsp),%rdi
4049	call	__ecp_nistz256_mul_montx
4050
4051
4052
4053
4054	xorq	%r11,%r11
4055	addq	%r12,%r12
4056	leaq	96(%rsp),%rsi
4057	adcq	%r13,%r13
4058	movq	%r12,%rax
4059	adcq	%r8,%r8
4060	adcq	%r9,%r9
4061	movq	%r13,%rbp
4062	adcq	$0,%r11
4063
4064	subq	$-1,%r12
4065	movq	%r8,%rcx
4066	sbbq	%r14,%r13
4067	sbbq	$0,%r8
4068	movq	%r9,%r10
4069	sbbq	%r15,%r9
4070	sbbq	$0,%r11
4071
4072	cmovcq	%rax,%r12
4073	movq	0(%rsi),%rax
4074	cmovcq	%rbp,%r13
4075	movq	8(%rsi),%rbp
4076	cmovcq	%rcx,%r8
4077	movq	16(%rsi),%rcx
4078	cmovcq	%r10,%r9
4079	movq	24(%rsi),%r10
4080
4081	call	__ecp_nistz256_subx
4082
4083	leaq	128(%rsp),%rbx
4084	leaq	288(%rsp),%rdi
4085	call	__ecp_nistz256_sub_fromx
4086
4087	movq	192+0(%rsp),%rax
4088	movq	192+8(%rsp),%rbp
4089	movq	192+16(%rsp),%rcx
4090	movq	192+24(%rsp),%r10
4091	leaq	320(%rsp),%rdi
4092
4093	call	__ecp_nistz256_subx
4094
4095	movq	%r12,0(%rdi)
4096	movq	%r13,8(%rdi)
4097	movq	%r8,16(%rdi)
4098	movq	%r9,24(%rdi)
4099	movq	128(%rsp),%rdx
4100	leaq	128(%rsp),%rbx
4101	movq	0+224(%rsp),%r9
4102	movq	8+224(%rsp),%r10
4103	leaq	-128+224(%rsp),%rsi
4104	movq	16+224(%rsp),%r11
4105	movq	24+224(%rsp),%r12
4106	leaq	256(%rsp),%rdi
4107	call	__ecp_nistz256_mul_montx
4108
4109	movq	320(%rsp),%rdx
4110	leaq	320(%rsp),%rbx
4111	movq	0+64(%rsp),%r9
4112	movq	8+64(%rsp),%r10
4113	leaq	-128+64(%rsp),%rsi
4114	movq	16+64(%rsp),%r11
4115	movq	24+64(%rsp),%r12
4116	leaq	320(%rsp),%rdi
4117	call	__ecp_nistz256_mul_montx
4118
4119	leaq	256(%rsp),%rbx
4120	leaq	320(%rsp),%rdi
4121	call	__ecp_nistz256_sub_fromx
4122
4123.byte	102,72,15,126,199
4124
4125	movdqa	%xmm5,%xmm0
4126	movdqa	%xmm5,%xmm1
4127	pandn	352(%rsp),%xmm0
4128	movdqa	%xmm5,%xmm2
4129	pandn	352+16(%rsp),%xmm1
4130	movdqa	%xmm5,%xmm3
4131	pand	544(%rsp),%xmm2
4132	pand	544+16(%rsp),%xmm3
4133	por	%xmm0,%xmm2
4134	por	%xmm1,%xmm3
4135
4136	movdqa	%xmm4,%xmm0
4137	movdqa	%xmm4,%xmm1
4138	pandn	%xmm2,%xmm0
4139	movdqa	%xmm4,%xmm2
4140	pandn	%xmm3,%xmm1
4141	movdqa	%xmm4,%xmm3
4142	pand	448(%rsp),%xmm2
4143	pand	448+16(%rsp),%xmm3
4144	por	%xmm0,%xmm2
4145	por	%xmm1,%xmm3
4146	movdqu	%xmm2,64(%rdi)
4147	movdqu	%xmm3,80(%rdi)
4148
4149	movdqa	%xmm5,%xmm0
4150	movdqa	%xmm5,%xmm1
4151	pandn	288(%rsp),%xmm0
4152	movdqa	%xmm5,%xmm2
4153	pandn	288+16(%rsp),%xmm1
4154	movdqa	%xmm5,%xmm3
4155	pand	480(%rsp),%xmm2
4156	pand	480+16(%rsp),%xmm3
4157	por	%xmm0,%xmm2
4158	por	%xmm1,%xmm3
4159
4160	movdqa	%xmm4,%xmm0
4161	movdqa	%xmm4,%xmm1
4162	pandn	%xmm2,%xmm0
4163	movdqa	%xmm4,%xmm2
4164	pandn	%xmm3,%xmm1
4165	movdqa	%xmm4,%xmm3
4166	pand	384(%rsp),%xmm2
4167	pand	384+16(%rsp),%xmm3
4168	por	%xmm0,%xmm2
4169	por	%xmm1,%xmm3
4170	movdqu	%xmm2,0(%rdi)
4171	movdqu	%xmm3,16(%rdi)
4172
4173	movdqa	%xmm5,%xmm0
4174	movdqa	%xmm5,%xmm1
4175	pandn	320(%rsp),%xmm0
4176	movdqa	%xmm5,%xmm2
4177	pandn	320+16(%rsp),%xmm1
4178	movdqa	%xmm5,%xmm3
4179	pand	512(%rsp),%xmm2
4180	pand	512+16(%rsp),%xmm3
4181	por	%xmm0,%xmm2
4182	por	%xmm1,%xmm3
4183
4184	movdqa	%xmm4,%xmm0
4185	movdqa	%xmm4,%xmm1
4186	pandn	%xmm2,%xmm0
4187	movdqa	%xmm4,%xmm2
4188	pandn	%xmm3,%xmm1
4189	movdqa	%xmm4,%xmm3
4190	pand	416(%rsp),%xmm2
4191	pand	416+16(%rsp),%xmm3
4192	por	%xmm0,%xmm2
4193	por	%xmm1,%xmm3
4194	movdqu	%xmm2,32(%rdi)
4195	movdqu	%xmm3,48(%rdi)
4196
4197.Ladd_donex:
4198	leaq	576+56(%rsp),%rsi
4199.cfi_def_cfa	%rsi,8
4200	movq	-48(%rsi),%r15
4201.cfi_restore	%r15
4202	movq	-40(%rsi),%r14
4203.cfi_restore	%r14
4204	movq	-32(%rsi),%r13
4205.cfi_restore	%r13
4206	movq	-24(%rsi),%r12
4207.cfi_restore	%r12
4208	movq	-16(%rsi),%rbx
4209.cfi_restore	%rbx
4210	movq	-8(%rsi),%rbp
4211.cfi_restore	%rbp
4212	leaq	(%rsi),%rsp
4213.cfi_def_cfa_register	%rsp
4214.Lpoint_addx_epilogue:
4215	ret
4216.cfi_endproc
4217.size	ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
4218.type	ecp_nistz256_point_add_affinex,@function
4219.align	32
4220ecp_nistz256_point_add_affinex:
4221.cfi_startproc
4222.Lpoint_add_affinex:
4223	pushq	%rbp
4224.cfi_adjust_cfa_offset	8
4225.cfi_offset	%rbp,-16
4226	pushq	%rbx
4227.cfi_adjust_cfa_offset	8
4228.cfi_offset	%rbx,-24
4229	pushq	%r12
4230.cfi_adjust_cfa_offset	8
4231.cfi_offset	%r12,-32
4232	pushq	%r13
4233.cfi_adjust_cfa_offset	8
4234.cfi_offset	%r13,-40
4235	pushq	%r14
4236.cfi_adjust_cfa_offset	8
4237.cfi_offset	%r14,-48
4238	pushq	%r15
4239.cfi_adjust_cfa_offset	8
4240.cfi_offset	%r15,-56
4241	subq	$480+8,%rsp
4242.cfi_adjust_cfa_offset	32*15+8
4243.Ladd_affinex_body:
4244
4245	movdqu	0(%rsi),%xmm0
4246	movq	%rdx,%rbx
4247	movdqu	16(%rsi),%xmm1
4248	movdqu	32(%rsi),%xmm2
4249	movdqu	48(%rsi),%xmm3
4250	movdqu	64(%rsi),%xmm4
4251	movdqu	80(%rsi),%xmm5
4252	movq	64+0(%rsi),%rdx
4253	movq	64+8(%rsi),%r14
4254	movq	64+16(%rsi),%r15
4255	movq	64+24(%rsi),%r8
4256	movdqa	%xmm0,320(%rsp)
4257	movdqa	%xmm1,320+16(%rsp)
4258	movdqa	%xmm2,352(%rsp)
4259	movdqa	%xmm3,352+16(%rsp)
4260	movdqa	%xmm4,384(%rsp)
4261	movdqa	%xmm5,384+16(%rsp)
4262	por	%xmm4,%xmm5
4263
4264	movdqu	0(%rbx),%xmm0
4265	pshufd	$0xb1,%xmm5,%xmm3
4266	movdqu	16(%rbx),%xmm1
4267	movdqu	32(%rbx),%xmm2
4268	por	%xmm3,%xmm5
4269	movdqu	48(%rbx),%xmm3
4270	movdqa	%xmm0,416(%rsp)
4271	pshufd	$0x1e,%xmm5,%xmm4
4272	movdqa	%xmm1,416+16(%rsp)
4273	por	%xmm0,%xmm1
4274.byte	102,72,15,110,199
4275	movdqa	%xmm2,448(%rsp)
4276	movdqa	%xmm3,448+16(%rsp)
4277	por	%xmm2,%xmm3
4278	por	%xmm4,%xmm5
4279	pxor	%xmm4,%xmm4
4280	por	%xmm1,%xmm3
4281
4282	leaq	64-128(%rsi),%rsi
4283	leaq	32(%rsp),%rdi
4284	call	__ecp_nistz256_sqr_montx
4285
4286	pcmpeqd	%xmm4,%xmm5
4287	pshufd	$0xb1,%xmm3,%xmm4
4288	movq	0(%rbx),%rdx
4289
4290	movq	%r12,%r9
4291	por	%xmm3,%xmm4
4292	pshufd	$0,%xmm5,%xmm5
4293	pshufd	$0x1e,%xmm4,%xmm3
4294	movq	%r13,%r10
4295	por	%xmm3,%xmm4
4296	pxor	%xmm3,%xmm3
4297	movq	%r14,%r11
4298	pcmpeqd	%xmm3,%xmm4
4299	pshufd	$0,%xmm4,%xmm4
4300
4301	leaq	32-128(%rsp),%rsi
4302	movq	%r15,%r12
4303	leaq	0(%rsp),%rdi
4304	call	__ecp_nistz256_mul_montx
4305
4306	leaq	320(%rsp),%rbx
4307	leaq	64(%rsp),%rdi
4308	call	__ecp_nistz256_sub_fromx
4309
4310	movq	384(%rsp),%rdx
4311	leaq	384(%rsp),%rbx
4312	movq	0+32(%rsp),%r9
4313	movq	8+32(%rsp),%r10
4314	leaq	-128+32(%rsp),%rsi
4315	movq	16+32(%rsp),%r11
4316	movq	24+32(%rsp),%r12
4317	leaq	32(%rsp),%rdi
4318	call	__ecp_nistz256_mul_montx
4319
4320	movq	384(%rsp),%rdx
4321	leaq	384(%rsp),%rbx
4322	movq	0+64(%rsp),%r9
4323	movq	8+64(%rsp),%r10
4324	leaq	-128+64(%rsp),%rsi
4325	movq	16+64(%rsp),%r11
4326	movq	24+64(%rsp),%r12
4327	leaq	288(%rsp),%rdi
4328	call	__ecp_nistz256_mul_montx
4329
4330	movq	448(%rsp),%rdx
4331	leaq	448(%rsp),%rbx
4332	movq	0+32(%rsp),%r9
4333	movq	8+32(%rsp),%r10
4334	leaq	-128+32(%rsp),%rsi
4335	movq	16+32(%rsp),%r11
4336	movq	24+32(%rsp),%r12
4337	leaq	32(%rsp),%rdi
4338	call	__ecp_nistz256_mul_montx
4339
4340	leaq	352(%rsp),%rbx
4341	leaq	96(%rsp),%rdi
4342	call	__ecp_nistz256_sub_fromx
4343
4344	movq	0+64(%rsp),%rdx
4345	movq	8+64(%rsp),%r14
4346	leaq	-128+64(%rsp),%rsi
4347	movq	16+64(%rsp),%r15
4348	movq	24+64(%rsp),%r8
4349	leaq	128(%rsp),%rdi
4350	call	__ecp_nistz256_sqr_montx
4351
4352	movq	0+96(%rsp),%rdx
4353	movq	8+96(%rsp),%r14
4354	leaq	-128+96(%rsp),%rsi
4355	movq	16+96(%rsp),%r15
4356	movq	24+96(%rsp),%r8
4357	leaq	192(%rsp),%rdi
4358	call	__ecp_nistz256_sqr_montx
4359
4360	movq	128(%rsp),%rdx
4361	leaq	128(%rsp),%rbx
4362	movq	0+64(%rsp),%r9
4363	movq	8+64(%rsp),%r10
4364	leaq	-128+64(%rsp),%rsi
4365	movq	16+64(%rsp),%r11
4366	movq	24+64(%rsp),%r12
4367	leaq	160(%rsp),%rdi
4368	call	__ecp_nistz256_mul_montx
4369
4370	movq	320(%rsp),%rdx
4371	leaq	320(%rsp),%rbx
4372	movq	0+128(%rsp),%r9
4373	movq	8+128(%rsp),%r10
4374	leaq	-128+128(%rsp),%rsi
4375	movq	16+128(%rsp),%r11
4376	movq	24+128(%rsp),%r12
4377	leaq	0(%rsp),%rdi
4378	call	__ecp_nistz256_mul_montx
4379
4380
4381
4382
4383	xorq	%r11,%r11
4384	addq	%r12,%r12
4385	leaq	192(%rsp),%rsi
4386	adcq	%r13,%r13
4387	movq	%r12,%rax
4388	adcq	%r8,%r8
4389	adcq	%r9,%r9
4390	movq	%r13,%rbp
4391	adcq	$0,%r11
4392
4393	subq	$-1,%r12
4394	movq	%r8,%rcx
4395	sbbq	%r14,%r13
4396	sbbq	$0,%r8
4397	movq	%r9,%r10
4398	sbbq	%r15,%r9
4399	sbbq	$0,%r11
4400
4401	cmovcq	%rax,%r12
4402	movq	0(%rsi),%rax
4403	cmovcq	%rbp,%r13
4404	movq	8(%rsi),%rbp
4405	cmovcq	%rcx,%r8
4406	movq	16(%rsi),%rcx
4407	cmovcq	%r10,%r9
4408	movq	24(%rsi),%r10
4409
4410	call	__ecp_nistz256_subx
4411
4412	leaq	160(%rsp),%rbx
4413	leaq	224(%rsp),%rdi
4414	call	__ecp_nistz256_sub_fromx
4415
4416	movq	0+0(%rsp),%rax
4417	movq	0+8(%rsp),%rbp
4418	movq	0+16(%rsp),%rcx
4419	movq	0+24(%rsp),%r10
4420	leaq	64(%rsp),%rdi
4421
4422	call	__ecp_nistz256_subx
4423
4424	movq	%r12,0(%rdi)
4425	movq	%r13,8(%rdi)
4426	movq	%r8,16(%rdi)
4427	movq	%r9,24(%rdi)
4428	movq	352(%rsp),%rdx
4429	leaq	352(%rsp),%rbx
4430	movq	0+160(%rsp),%r9
4431	movq	8+160(%rsp),%r10
4432	leaq	-128+160(%rsp),%rsi
4433	movq	16+160(%rsp),%r11
4434	movq	24+160(%rsp),%r12
4435	leaq	32(%rsp),%rdi
4436	call	__ecp_nistz256_mul_montx
4437
4438	movq	96(%rsp),%rdx
4439	leaq	96(%rsp),%rbx
4440	movq	0+64(%rsp),%r9
4441	movq	8+64(%rsp),%r10
4442	leaq	-128+64(%rsp),%rsi
4443	movq	16+64(%rsp),%r11
4444	movq	24+64(%rsp),%r12
4445	leaq	64(%rsp),%rdi
4446	call	__ecp_nistz256_mul_montx
4447
4448	leaq	32(%rsp),%rbx
4449	leaq	256(%rsp),%rdi
4450	call	__ecp_nistz256_sub_fromx
4451
4452.byte	102,72,15,126,199
4453
4454	movdqa	%xmm5,%xmm0
4455	movdqa	%xmm5,%xmm1
4456	pandn	288(%rsp),%xmm0
4457	movdqa	%xmm5,%xmm2
4458	pandn	288+16(%rsp),%xmm1
4459	movdqa	%xmm5,%xmm3
4460	pand	.LONE_mont(%rip),%xmm2
4461	pand	.LONE_mont+16(%rip),%xmm3
4462	por	%xmm0,%xmm2
4463	por	%xmm1,%xmm3
4464
4465	movdqa	%xmm4,%xmm0
4466	movdqa	%xmm4,%xmm1
4467	pandn	%xmm2,%xmm0
4468	movdqa	%xmm4,%xmm2
4469	pandn	%xmm3,%xmm1
4470	movdqa	%xmm4,%xmm3
4471	pand	384(%rsp),%xmm2
4472	pand	384+16(%rsp),%xmm3
4473	por	%xmm0,%xmm2
4474	por	%xmm1,%xmm3
4475	movdqu	%xmm2,64(%rdi)
4476	movdqu	%xmm3,80(%rdi)
4477
4478	movdqa	%xmm5,%xmm0
4479	movdqa	%xmm5,%xmm1
4480	pandn	224(%rsp),%xmm0
4481	movdqa	%xmm5,%xmm2
4482	pandn	224+16(%rsp),%xmm1
4483	movdqa	%xmm5,%xmm3
4484	pand	416(%rsp),%xmm2
4485	pand	416+16(%rsp),%xmm3
4486	por	%xmm0,%xmm2
4487	por	%xmm1,%xmm3
4488
4489	movdqa	%xmm4,%xmm0
4490	movdqa	%xmm4,%xmm1
4491	pandn	%xmm2,%xmm0
4492	movdqa	%xmm4,%xmm2
4493	pandn	%xmm3,%xmm1
4494	movdqa	%xmm4,%xmm3
4495	pand	320(%rsp),%xmm2
4496	pand	320+16(%rsp),%xmm3
4497	por	%xmm0,%xmm2
4498	por	%xmm1,%xmm3
4499	movdqu	%xmm2,0(%rdi)
4500	movdqu	%xmm3,16(%rdi)
4501
4502	movdqa	%xmm5,%xmm0
4503	movdqa	%xmm5,%xmm1
4504	pandn	256(%rsp),%xmm0
4505	movdqa	%xmm5,%xmm2
4506	pandn	256+16(%rsp),%xmm1
4507	movdqa	%xmm5,%xmm3
4508	pand	448(%rsp),%xmm2
4509	pand	448+16(%rsp),%xmm3
4510	por	%xmm0,%xmm2
4511	por	%xmm1,%xmm3
4512
4513	movdqa	%xmm4,%xmm0
4514	movdqa	%xmm4,%xmm1
4515	pandn	%xmm2,%xmm0
4516	movdqa	%xmm4,%xmm2
4517	pandn	%xmm3,%xmm1
4518	movdqa	%xmm4,%xmm3
4519	pand	352(%rsp),%xmm2
4520	pand	352+16(%rsp),%xmm3
4521	por	%xmm0,%xmm2
4522	por	%xmm1,%xmm3
4523	movdqu	%xmm2,32(%rdi)
4524	movdqu	%xmm3,48(%rdi)
4525
4526	leaq	480+56(%rsp),%rsi
4527.cfi_def_cfa	%rsi,8
4528	movq	-48(%rsi),%r15
4529.cfi_restore	%r15
4530	movq	-40(%rsi),%r14
4531.cfi_restore	%r14
4532	movq	-32(%rsi),%r13
4533.cfi_restore	%r13
4534	movq	-24(%rsi),%r12
4535.cfi_restore	%r12
4536	movq	-16(%rsi),%rbx
4537.cfi_restore	%rbx
4538	movq	-8(%rsi),%rbp
4539.cfi_restore	%rbp
4540	leaq	(%rsi),%rsp
4541.cfi_def_cfa_register	%rsp
4542.Ladd_affinex_epilogue:
4543	ret
4544.cfi_endproc
4545.size	ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
4546#endif
4547