xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/bcm/p256-x86_64-asm-linux.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
7.text
8.extern	OPENSSL_ia32cap_P
9.hidden OPENSSL_ia32cap_P
10
11
12.section	.rodata
13.align	64
14.Lpoly:
15.quad	0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
16
17.LOne:
18.long	1,1,1,1,1,1,1,1
19.LTwo:
20.long	2,2,2,2,2,2,2,2
21.LThree:
22.long	3,3,3,3,3,3,3,3
23.LONE_mont:
24.quad	0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
25
26
27.Lord:
28.quad	0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
29.LordK:
30.quad	0xccd1c8aaee00bc4f
31.text
32
33
34
35.globl	ecp_nistz256_neg
36.hidden ecp_nistz256_neg
37.type	ecp_nistz256_neg,@function
38.align	32
39ecp_nistz256_neg:
40.cfi_startproc
41_CET_ENDBR
42	pushq	%r12
43.cfi_adjust_cfa_offset	8
44.cfi_offset	%r12,-16
45	pushq	%r13
46.cfi_adjust_cfa_offset	8
47.cfi_offset	%r13,-24
48.Lneg_body:
49
50	xorq	%r8,%r8
51	xorq	%r9,%r9
52	xorq	%r10,%r10
53	xorq	%r11,%r11
54	xorq	%r13,%r13
55
56	subq	0(%rsi),%r8
57	sbbq	8(%rsi),%r9
58	sbbq	16(%rsi),%r10
59	movq	%r8,%rax
60	sbbq	24(%rsi),%r11
61	leaq	.Lpoly(%rip),%rsi
62	movq	%r9,%rdx
63	sbbq	$0,%r13
64
65	addq	0(%rsi),%r8
66	movq	%r10,%rcx
67	adcq	8(%rsi),%r9
68	adcq	16(%rsi),%r10
69	movq	%r11,%r12
70	adcq	24(%rsi),%r11
71	testq	%r13,%r13
72
73	cmovzq	%rax,%r8
74	cmovzq	%rdx,%r9
75	movq	%r8,0(%rdi)
76	cmovzq	%rcx,%r10
77	movq	%r9,8(%rdi)
78	cmovzq	%r12,%r11
79	movq	%r10,16(%rdi)
80	movq	%r11,24(%rdi)
81
82	movq	0(%rsp),%r13
83.cfi_restore	%r13
84	movq	8(%rsp),%r12
85.cfi_restore	%r12
86	leaq	16(%rsp),%rsp
87.cfi_adjust_cfa_offset	-16
88.Lneg_epilogue:
89	ret
90.cfi_endproc
91.size	ecp_nistz256_neg,.-ecp_nistz256_neg
92
93
94
95
96
97
98.globl	ecp_nistz256_ord_mul_mont
99.hidden ecp_nistz256_ord_mul_mont
100.type	ecp_nistz256_ord_mul_mont,@function
101.align	32
102ecp_nistz256_ord_mul_mont:
103.cfi_startproc
104_CET_ENDBR
105	leaq	OPENSSL_ia32cap_P(%rip),%rcx
106	movq	8(%rcx),%rcx
107	andl	$0x80100,%ecx
108	cmpl	$0x80100,%ecx
109	je	.Lecp_nistz256_ord_mul_montx
110	pushq	%rbp
111.cfi_adjust_cfa_offset	8
112.cfi_offset	%rbp,-16
113	pushq	%rbx
114.cfi_adjust_cfa_offset	8
115.cfi_offset	%rbx,-24
116	pushq	%r12
117.cfi_adjust_cfa_offset	8
118.cfi_offset	%r12,-32
119	pushq	%r13
120.cfi_adjust_cfa_offset	8
121.cfi_offset	%r13,-40
122	pushq	%r14
123.cfi_adjust_cfa_offset	8
124.cfi_offset	%r14,-48
125	pushq	%r15
126.cfi_adjust_cfa_offset	8
127.cfi_offset	%r15,-56
128.Lord_mul_body:
129
130	movq	0(%rdx),%rax
131	movq	%rdx,%rbx
132	leaq	.Lord(%rip),%r14
133	movq	.LordK(%rip),%r15
134
135
136	movq	%rax,%rcx
137	mulq	0(%rsi)
138	movq	%rax,%r8
139	movq	%rcx,%rax
140	movq	%rdx,%r9
141
142	mulq	8(%rsi)
143	addq	%rax,%r9
144	movq	%rcx,%rax
145	adcq	$0,%rdx
146	movq	%rdx,%r10
147
148	mulq	16(%rsi)
149	addq	%rax,%r10
150	movq	%rcx,%rax
151	adcq	$0,%rdx
152
153	movq	%r8,%r13
154	imulq	%r15,%r8
155
156	movq	%rdx,%r11
157	mulq	24(%rsi)
158	addq	%rax,%r11
159	movq	%r8,%rax
160	adcq	$0,%rdx
161	movq	%rdx,%r12
162
163
164	mulq	0(%r14)
165	movq	%r8,%rbp
166	addq	%rax,%r13
167	movq	%r8,%rax
168	adcq	$0,%rdx
169	movq	%rdx,%rcx
170
171	subq	%r8,%r10
172	sbbq	$0,%r8
173
174	mulq	8(%r14)
175	addq	%rcx,%r9
176	adcq	$0,%rdx
177	addq	%rax,%r9
178	movq	%rbp,%rax
179	adcq	%rdx,%r10
180	movq	%rbp,%rdx
181	adcq	$0,%r8
182
183	shlq	$32,%rax
184	shrq	$32,%rdx
185	subq	%rax,%r11
186	movq	8(%rbx),%rax
187	sbbq	%rdx,%rbp
188
189	addq	%r8,%r11
190	adcq	%rbp,%r12
191	adcq	$0,%r13
192
193
194	movq	%rax,%rcx
195	mulq	0(%rsi)
196	addq	%rax,%r9
197	movq	%rcx,%rax
198	adcq	$0,%rdx
199	movq	%rdx,%rbp
200
201	mulq	8(%rsi)
202	addq	%rbp,%r10
203	adcq	$0,%rdx
204	addq	%rax,%r10
205	movq	%rcx,%rax
206	adcq	$0,%rdx
207	movq	%rdx,%rbp
208
209	mulq	16(%rsi)
210	addq	%rbp,%r11
211	adcq	$0,%rdx
212	addq	%rax,%r11
213	movq	%rcx,%rax
214	adcq	$0,%rdx
215
216	movq	%r9,%rcx
217	imulq	%r15,%r9
218
219	movq	%rdx,%rbp
220	mulq	24(%rsi)
221	addq	%rbp,%r12
222	adcq	$0,%rdx
223	xorq	%r8,%r8
224	addq	%rax,%r12
225	movq	%r9,%rax
226	adcq	%rdx,%r13
227	adcq	$0,%r8
228
229
230	mulq	0(%r14)
231	movq	%r9,%rbp
232	addq	%rax,%rcx
233	movq	%r9,%rax
234	adcq	%rdx,%rcx
235
236	subq	%r9,%r11
237	sbbq	$0,%r9
238
239	mulq	8(%r14)
240	addq	%rcx,%r10
241	adcq	$0,%rdx
242	addq	%rax,%r10
243	movq	%rbp,%rax
244	adcq	%rdx,%r11
245	movq	%rbp,%rdx
246	adcq	$0,%r9
247
248	shlq	$32,%rax
249	shrq	$32,%rdx
250	subq	%rax,%r12
251	movq	16(%rbx),%rax
252	sbbq	%rdx,%rbp
253
254	addq	%r9,%r12
255	adcq	%rbp,%r13
256	adcq	$0,%r8
257
258
259	movq	%rax,%rcx
260	mulq	0(%rsi)
261	addq	%rax,%r10
262	movq	%rcx,%rax
263	adcq	$0,%rdx
264	movq	%rdx,%rbp
265
266	mulq	8(%rsi)
267	addq	%rbp,%r11
268	adcq	$0,%rdx
269	addq	%rax,%r11
270	movq	%rcx,%rax
271	adcq	$0,%rdx
272	movq	%rdx,%rbp
273
274	mulq	16(%rsi)
275	addq	%rbp,%r12
276	adcq	$0,%rdx
277	addq	%rax,%r12
278	movq	%rcx,%rax
279	adcq	$0,%rdx
280
281	movq	%r10,%rcx
282	imulq	%r15,%r10
283
284	movq	%rdx,%rbp
285	mulq	24(%rsi)
286	addq	%rbp,%r13
287	adcq	$0,%rdx
288	xorq	%r9,%r9
289	addq	%rax,%r13
290	movq	%r10,%rax
291	adcq	%rdx,%r8
292	adcq	$0,%r9
293
294
295	mulq	0(%r14)
296	movq	%r10,%rbp
297	addq	%rax,%rcx
298	movq	%r10,%rax
299	adcq	%rdx,%rcx
300
301	subq	%r10,%r12
302	sbbq	$0,%r10
303
304	mulq	8(%r14)
305	addq	%rcx,%r11
306	adcq	$0,%rdx
307	addq	%rax,%r11
308	movq	%rbp,%rax
309	adcq	%rdx,%r12
310	movq	%rbp,%rdx
311	adcq	$0,%r10
312
313	shlq	$32,%rax
314	shrq	$32,%rdx
315	subq	%rax,%r13
316	movq	24(%rbx),%rax
317	sbbq	%rdx,%rbp
318
319	addq	%r10,%r13
320	adcq	%rbp,%r8
321	adcq	$0,%r9
322
323
324	movq	%rax,%rcx
325	mulq	0(%rsi)
326	addq	%rax,%r11
327	movq	%rcx,%rax
328	adcq	$0,%rdx
329	movq	%rdx,%rbp
330
331	mulq	8(%rsi)
332	addq	%rbp,%r12
333	adcq	$0,%rdx
334	addq	%rax,%r12
335	movq	%rcx,%rax
336	adcq	$0,%rdx
337	movq	%rdx,%rbp
338
339	mulq	16(%rsi)
340	addq	%rbp,%r13
341	adcq	$0,%rdx
342	addq	%rax,%r13
343	movq	%rcx,%rax
344	adcq	$0,%rdx
345
346	movq	%r11,%rcx
347	imulq	%r15,%r11
348
349	movq	%rdx,%rbp
350	mulq	24(%rsi)
351	addq	%rbp,%r8
352	adcq	$0,%rdx
353	xorq	%r10,%r10
354	addq	%rax,%r8
355	movq	%r11,%rax
356	adcq	%rdx,%r9
357	adcq	$0,%r10
358
359
360	mulq	0(%r14)
361	movq	%r11,%rbp
362	addq	%rax,%rcx
363	movq	%r11,%rax
364	adcq	%rdx,%rcx
365
366	subq	%r11,%r13
367	sbbq	$0,%r11
368
369	mulq	8(%r14)
370	addq	%rcx,%r12
371	adcq	$0,%rdx
372	addq	%rax,%r12
373	movq	%rbp,%rax
374	adcq	%rdx,%r13
375	movq	%rbp,%rdx
376	adcq	$0,%r11
377
378	shlq	$32,%rax
379	shrq	$32,%rdx
380	subq	%rax,%r8
381	sbbq	%rdx,%rbp
382
383	addq	%r11,%r8
384	adcq	%rbp,%r9
385	adcq	$0,%r10
386
387
388	movq	%r12,%rsi
389	subq	0(%r14),%r12
390	movq	%r13,%r11
391	sbbq	8(%r14),%r13
392	movq	%r8,%rcx
393	sbbq	16(%r14),%r8
394	movq	%r9,%rbp
395	sbbq	24(%r14),%r9
396	sbbq	$0,%r10
397
398	cmovcq	%rsi,%r12
399	cmovcq	%r11,%r13
400	cmovcq	%rcx,%r8
401	cmovcq	%rbp,%r9
402
403	movq	%r12,0(%rdi)
404	movq	%r13,8(%rdi)
405	movq	%r8,16(%rdi)
406	movq	%r9,24(%rdi)
407
408	movq	0(%rsp),%r15
409.cfi_restore	%r15
410	movq	8(%rsp),%r14
411.cfi_restore	%r14
412	movq	16(%rsp),%r13
413.cfi_restore	%r13
414	movq	24(%rsp),%r12
415.cfi_restore	%r12
416	movq	32(%rsp),%rbx
417.cfi_restore	%rbx
418	movq	40(%rsp),%rbp
419.cfi_restore	%rbp
420	leaq	48(%rsp),%rsp
421.cfi_adjust_cfa_offset	-48
422.Lord_mul_epilogue:
423	ret
424.cfi_endproc
425.size	ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
426
427
428
429
430
431
432
433.globl	ecp_nistz256_ord_sqr_mont
434.hidden ecp_nistz256_ord_sqr_mont
435.type	ecp_nistz256_ord_sqr_mont,@function
436.align	32
437ecp_nistz256_ord_sqr_mont:
438.cfi_startproc
439_CET_ENDBR
440	leaq	OPENSSL_ia32cap_P(%rip),%rcx
441	movq	8(%rcx),%rcx
442	andl	$0x80100,%ecx
443	cmpl	$0x80100,%ecx
444	je	.Lecp_nistz256_ord_sqr_montx
445	pushq	%rbp
446.cfi_adjust_cfa_offset	8
447.cfi_offset	%rbp,-16
448	pushq	%rbx
449.cfi_adjust_cfa_offset	8
450.cfi_offset	%rbx,-24
451	pushq	%r12
452.cfi_adjust_cfa_offset	8
453.cfi_offset	%r12,-32
454	pushq	%r13
455.cfi_adjust_cfa_offset	8
456.cfi_offset	%r13,-40
457	pushq	%r14
458.cfi_adjust_cfa_offset	8
459.cfi_offset	%r14,-48
460	pushq	%r15
461.cfi_adjust_cfa_offset	8
462.cfi_offset	%r15,-56
463.Lord_sqr_body:
464
465	movq	0(%rsi),%r8
466	movq	8(%rsi),%rax
467	movq	16(%rsi),%r14
468	movq	24(%rsi),%r15
469	leaq	.Lord(%rip),%rsi
470	movq	%rdx,%rbx
471	jmp	.Loop_ord_sqr
472
473.align	32
474.Loop_ord_sqr:
475
476	movq	%rax,%rbp
477	mulq	%r8
478	movq	%rax,%r9
479.byte	102,72,15,110,205
480	movq	%r14,%rax
481	movq	%rdx,%r10
482
483	mulq	%r8
484	addq	%rax,%r10
485	movq	%r15,%rax
486.byte	102,73,15,110,214
487	adcq	$0,%rdx
488	movq	%rdx,%r11
489
490	mulq	%r8
491	addq	%rax,%r11
492	movq	%r15,%rax
493.byte	102,73,15,110,223
494	adcq	$0,%rdx
495	movq	%rdx,%r12
496
497
498	mulq	%r14
499	movq	%rax,%r13
500	movq	%r14,%rax
501	movq	%rdx,%r14
502
503
504	mulq	%rbp
505	addq	%rax,%r11
506	movq	%r15,%rax
507	adcq	$0,%rdx
508	movq	%rdx,%r15
509
510	mulq	%rbp
511	addq	%rax,%r12
512	adcq	$0,%rdx
513
514	addq	%r15,%r12
515	adcq	%rdx,%r13
516	adcq	$0,%r14
517
518
519	xorq	%r15,%r15
520	movq	%r8,%rax
521	addq	%r9,%r9
522	adcq	%r10,%r10
523	adcq	%r11,%r11
524	adcq	%r12,%r12
525	adcq	%r13,%r13
526	adcq	%r14,%r14
527	adcq	$0,%r15
528
529
530	mulq	%rax
531	movq	%rax,%r8
532.byte	102,72,15,126,200
533	movq	%rdx,%rbp
534
535	mulq	%rax
536	addq	%rbp,%r9
537	adcq	%rax,%r10
538.byte	102,72,15,126,208
539	adcq	$0,%rdx
540	movq	%rdx,%rbp
541
542	mulq	%rax
543	addq	%rbp,%r11
544	adcq	%rax,%r12
545.byte	102,72,15,126,216
546	adcq	$0,%rdx
547	movq	%rdx,%rbp
548
549	movq	%r8,%rcx
550	imulq	32(%rsi),%r8
551
552	mulq	%rax
553	addq	%rbp,%r13
554	adcq	%rax,%r14
555	movq	0(%rsi),%rax
556	adcq	%rdx,%r15
557
558
559	mulq	%r8
560	movq	%r8,%rbp
561	addq	%rax,%rcx
562	movq	8(%rsi),%rax
563	adcq	%rdx,%rcx
564
565	subq	%r8,%r10
566	sbbq	$0,%rbp
567
568	mulq	%r8
569	addq	%rcx,%r9
570	adcq	$0,%rdx
571	addq	%rax,%r9
572	movq	%r8,%rax
573	adcq	%rdx,%r10
574	movq	%r8,%rdx
575	adcq	$0,%rbp
576
577	movq	%r9,%rcx
578	imulq	32(%rsi),%r9
579
580	shlq	$32,%rax
581	shrq	$32,%rdx
582	subq	%rax,%r11
583	movq	0(%rsi),%rax
584	sbbq	%rdx,%r8
585
586	addq	%rbp,%r11
587	adcq	$0,%r8
588
589
590	mulq	%r9
591	movq	%r9,%rbp
592	addq	%rax,%rcx
593	movq	8(%rsi),%rax
594	adcq	%rdx,%rcx
595
596	subq	%r9,%r11
597	sbbq	$0,%rbp
598
599	mulq	%r9
600	addq	%rcx,%r10
601	adcq	$0,%rdx
602	addq	%rax,%r10
603	movq	%r9,%rax
604	adcq	%rdx,%r11
605	movq	%r9,%rdx
606	adcq	$0,%rbp
607
608	movq	%r10,%rcx
609	imulq	32(%rsi),%r10
610
611	shlq	$32,%rax
612	shrq	$32,%rdx
613	subq	%rax,%r8
614	movq	0(%rsi),%rax
615	sbbq	%rdx,%r9
616
617	addq	%rbp,%r8
618	adcq	$0,%r9
619
620
621	mulq	%r10
622	movq	%r10,%rbp
623	addq	%rax,%rcx
624	movq	8(%rsi),%rax
625	adcq	%rdx,%rcx
626
627	subq	%r10,%r8
628	sbbq	$0,%rbp
629
630	mulq	%r10
631	addq	%rcx,%r11
632	adcq	$0,%rdx
633	addq	%rax,%r11
634	movq	%r10,%rax
635	adcq	%rdx,%r8
636	movq	%r10,%rdx
637	adcq	$0,%rbp
638
639	movq	%r11,%rcx
640	imulq	32(%rsi),%r11
641
642	shlq	$32,%rax
643	shrq	$32,%rdx
644	subq	%rax,%r9
645	movq	0(%rsi),%rax
646	sbbq	%rdx,%r10
647
648	addq	%rbp,%r9
649	adcq	$0,%r10
650
651
652	mulq	%r11
653	movq	%r11,%rbp
654	addq	%rax,%rcx
655	movq	8(%rsi),%rax
656	adcq	%rdx,%rcx
657
658	subq	%r11,%r9
659	sbbq	$0,%rbp
660
661	mulq	%r11
662	addq	%rcx,%r8
663	adcq	$0,%rdx
664	addq	%rax,%r8
665	movq	%r11,%rax
666	adcq	%rdx,%r9
667	movq	%r11,%rdx
668	adcq	$0,%rbp
669
670	shlq	$32,%rax
671	shrq	$32,%rdx
672	subq	%rax,%r10
673	sbbq	%rdx,%r11
674
675	addq	%rbp,%r10
676	adcq	$0,%r11
677
678
679	xorq	%rdx,%rdx
680	addq	%r12,%r8
681	adcq	%r13,%r9
682	movq	%r8,%r12
683	adcq	%r14,%r10
684	adcq	%r15,%r11
685	movq	%r9,%rax
686	adcq	$0,%rdx
687
688
689	subq	0(%rsi),%r8
690	movq	%r10,%r14
691	sbbq	8(%rsi),%r9
692	sbbq	16(%rsi),%r10
693	movq	%r11,%r15
694	sbbq	24(%rsi),%r11
695	sbbq	$0,%rdx
696
697	cmovcq	%r12,%r8
698	cmovncq	%r9,%rax
699	cmovncq	%r10,%r14
700	cmovncq	%r11,%r15
701
702	decq	%rbx
703	jnz	.Loop_ord_sqr
704
705	movq	%r8,0(%rdi)
706	movq	%rax,8(%rdi)
707	pxor	%xmm1,%xmm1
708	movq	%r14,16(%rdi)
709	pxor	%xmm2,%xmm2
710	movq	%r15,24(%rdi)
711	pxor	%xmm3,%xmm3
712
713	movq	0(%rsp),%r15
714.cfi_restore	%r15
715	movq	8(%rsp),%r14
716.cfi_restore	%r14
717	movq	16(%rsp),%r13
718.cfi_restore	%r13
719	movq	24(%rsp),%r12
720.cfi_restore	%r12
721	movq	32(%rsp),%rbx
722.cfi_restore	%rbx
723	movq	40(%rsp),%rbp
724.cfi_restore	%rbp
725	leaq	48(%rsp),%rsp
726.cfi_adjust_cfa_offset	-48
727.Lord_sqr_epilogue:
728	ret
729.cfi_endproc
730.size	ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
731
732.type	ecp_nistz256_ord_mul_montx,@function
733.align	32
734ecp_nistz256_ord_mul_montx:
735.cfi_startproc
736.Lecp_nistz256_ord_mul_montx:
737	pushq	%rbp
738.cfi_adjust_cfa_offset	8
739.cfi_offset	%rbp,-16
740	pushq	%rbx
741.cfi_adjust_cfa_offset	8
742.cfi_offset	%rbx,-24
743	pushq	%r12
744.cfi_adjust_cfa_offset	8
745.cfi_offset	%r12,-32
746	pushq	%r13
747.cfi_adjust_cfa_offset	8
748.cfi_offset	%r13,-40
749	pushq	%r14
750.cfi_adjust_cfa_offset	8
751.cfi_offset	%r14,-48
752	pushq	%r15
753.cfi_adjust_cfa_offset	8
754.cfi_offset	%r15,-56
755.Lord_mulx_body:
756
757	movq	%rdx,%rbx
758	movq	0(%rdx),%rdx
759	movq	0(%rsi),%r9
760	movq	8(%rsi),%r10
761	movq	16(%rsi),%r11
762	movq	24(%rsi),%r12
763	leaq	-128(%rsi),%rsi
764	leaq	.Lord-128(%rip),%r14
765	movq	.LordK(%rip),%r15
766
767
768	mulxq	%r9,%r8,%r9
769	mulxq	%r10,%rcx,%r10
770	mulxq	%r11,%rbp,%r11
771	addq	%rcx,%r9
772	mulxq	%r12,%rcx,%r12
773	movq	%r8,%rdx
774	mulxq	%r15,%rdx,%rax
775	adcq	%rbp,%r10
776	adcq	%rcx,%r11
777	adcq	$0,%r12
778
779
780	xorq	%r13,%r13
781	mulxq	0+128(%r14),%rcx,%rbp
782	adcxq	%rcx,%r8
783	adoxq	%rbp,%r9
784
785	mulxq	8+128(%r14),%rcx,%rbp
786	adcxq	%rcx,%r9
787	adoxq	%rbp,%r10
788
789	mulxq	16+128(%r14),%rcx,%rbp
790	adcxq	%rcx,%r10
791	adoxq	%rbp,%r11
792
793	mulxq	24+128(%r14),%rcx,%rbp
794	movq	8(%rbx),%rdx
795	adcxq	%rcx,%r11
796	adoxq	%rbp,%r12
797	adcxq	%r8,%r12
798	adoxq	%r8,%r13
799	adcq	$0,%r13
800
801
802	mulxq	0+128(%rsi),%rcx,%rbp
803	adcxq	%rcx,%r9
804	adoxq	%rbp,%r10
805
806	mulxq	8+128(%rsi),%rcx,%rbp
807	adcxq	%rcx,%r10
808	adoxq	%rbp,%r11
809
810	mulxq	16+128(%rsi),%rcx,%rbp
811	adcxq	%rcx,%r11
812	adoxq	%rbp,%r12
813
814	mulxq	24+128(%rsi),%rcx,%rbp
815	movq	%r9,%rdx
816	mulxq	%r15,%rdx,%rax
817	adcxq	%rcx,%r12
818	adoxq	%rbp,%r13
819
820	adcxq	%r8,%r13
821	adoxq	%r8,%r8
822	adcq	$0,%r8
823
824
825	mulxq	0+128(%r14),%rcx,%rbp
826	adcxq	%rcx,%r9
827	adoxq	%rbp,%r10
828
829	mulxq	8+128(%r14),%rcx,%rbp
830	adcxq	%rcx,%r10
831	adoxq	%rbp,%r11
832
833	mulxq	16+128(%r14),%rcx,%rbp
834	adcxq	%rcx,%r11
835	adoxq	%rbp,%r12
836
837	mulxq	24+128(%r14),%rcx,%rbp
838	movq	16(%rbx),%rdx
839	adcxq	%rcx,%r12
840	adoxq	%rbp,%r13
841	adcxq	%r9,%r13
842	adoxq	%r9,%r8
843	adcq	$0,%r8
844
845
846	mulxq	0+128(%rsi),%rcx,%rbp
847	adcxq	%rcx,%r10
848	adoxq	%rbp,%r11
849
850	mulxq	8+128(%rsi),%rcx,%rbp
851	adcxq	%rcx,%r11
852	adoxq	%rbp,%r12
853
854	mulxq	16+128(%rsi),%rcx,%rbp
855	adcxq	%rcx,%r12
856	adoxq	%rbp,%r13
857
858	mulxq	24+128(%rsi),%rcx,%rbp
859	movq	%r10,%rdx
860	mulxq	%r15,%rdx,%rax
861	adcxq	%rcx,%r13
862	adoxq	%rbp,%r8
863
864	adcxq	%r9,%r8
865	adoxq	%r9,%r9
866	adcq	$0,%r9
867
868
869	mulxq	0+128(%r14),%rcx,%rbp
870	adcxq	%rcx,%r10
871	adoxq	%rbp,%r11
872
873	mulxq	8+128(%r14),%rcx,%rbp
874	adcxq	%rcx,%r11
875	adoxq	%rbp,%r12
876
877	mulxq	16+128(%r14),%rcx,%rbp
878	adcxq	%rcx,%r12
879	adoxq	%rbp,%r13
880
881	mulxq	24+128(%r14),%rcx,%rbp
882	movq	24(%rbx),%rdx
883	adcxq	%rcx,%r13
884	adoxq	%rbp,%r8
885	adcxq	%r10,%r8
886	adoxq	%r10,%r9
887	adcq	$0,%r9
888
889
890	mulxq	0+128(%rsi),%rcx,%rbp
891	adcxq	%rcx,%r11
892	adoxq	%rbp,%r12
893
894	mulxq	8+128(%rsi),%rcx,%rbp
895	adcxq	%rcx,%r12
896	adoxq	%rbp,%r13
897
898	mulxq	16+128(%rsi),%rcx,%rbp
899	adcxq	%rcx,%r13
900	adoxq	%rbp,%r8
901
902	mulxq	24+128(%rsi),%rcx,%rbp
903	movq	%r11,%rdx
904	mulxq	%r15,%rdx,%rax
905	adcxq	%rcx,%r8
906	adoxq	%rbp,%r9
907
908	adcxq	%r10,%r9
909	adoxq	%r10,%r10
910	adcq	$0,%r10
911
912
913	mulxq	0+128(%r14),%rcx,%rbp
914	adcxq	%rcx,%r11
915	adoxq	%rbp,%r12
916
917	mulxq	8+128(%r14),%rcx,%rbp
918	adcxq	%rcx,%r12
919	adoxq	%rbp,%r13
920
921	mulxq	16+128(%r14),%rcx,%rbp
922	adcxq	%rcx,%r13
923	adoxq	%rbp,%r8
924
925	mulxq	24+128(%r14),%rcx,%rbp
926	leaq	128(%r14),%r14
927	movq	%r12,%rbx
928	adcxq	%rcx,%r8
929	adoxq	%rbp,%r9
930	movq	%r13,%rdx
931	adcxq	%r11,%r9
932	adoxq	%r11,%r10
933	adcq	$0,%r10
934
935
936
937	movq	%r8,%rcx
938	subq	0(%r14),%r12
939	sbbq	8(%r14),%r13
940	sbbq	16(%r14),%r8
941	movq	%r9,%rbp
942	sbbq	24(%r14),%r9
943	sbbq	$0,%r10
944
945	cmovcq	%rbx,%r12
946	cmovcq	%rdx,%r13
947	cmovcq	%rcx,%r8
948	cmovcq	%rbp,%r9
949
950	movq	%r12,0(%rdi)
951	movq	%r13,8(%rdi)
952	movq	%r8,16(%rdi)
953	movq	%r9,24(%rdi)
954
955	movq	0(%rsp),%r15
956.cfi_restore	%r15
957	movq	8(%rsp),%r14
958.cfi_restore	%r14
959	movq	16(%rsp),%r13
960.cfi_restore	%r13
961	movq	24(%rsp),%r12
962.cfi_restore	%r12
963	movq	32(%rsp),%rbx
964.cfi_restore	%rbx
965	movq	40(%rsp),%rbp
966.cfi_restore	%rbp
967	leaq	48(%rsp),%rsp
968.cfi_adjust_cfa_offset	-48
969.Lord_mulx_epilogue:
970	ret
971.cfi_endproc
972.size	ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx
973
974.type	ecp_nistz256_ord_sqr_montx,@function
975.align	32
976ecp_nistz256_ord_sqr_montx:
977.cfi_startproc
978.Lecp_nistz256_ord_sqr_montx:
979	pushq	%rbp
980.cfi_adjust_cfa_offset	8
981.cfi_offset	%rbp,-16
982	pushq	%rbx
983.cfi_adjust_cfa_offset	8
984.cfi_offset	%rbx,-24
985	pushq	%r12
986.cfi_adjust_cfa_offset	8
987.cfi_offset	%r12,-32
988	pushq	%r13
989.cfi_adjust_cfa_offset	8
990.cfi_offset	%r13,-40
991	pushq	%r14
992.cfi_adjust_cfa_offset	8
993.cfi_offset	%r14,-48
994	pushq	%r15
995.cfi_adjust_cfa_offset	8
996.cfi_offset	%r15,-56
997.Lord_sqrx_body:
998
999	movq	%rdx,%rbx
1000	movq	0(%rsi),%rdx
1001	movq	8(%rsi),%r14
1002	movq	16(%rsi),%r15
1003	movq	24(%rsi),%r8
1004	leaq	.Lord(%rip),%rsi
1005	jmp	.Loop_ord_sqrx
1006
1007.align	32
1008.Loop_ord_sqrx:
1009	mulxq	%r14,%r9,%r10
1010	mulxq	%r15,%rcx,%r11
1011	movq	%rdx,%rax
1012.byte	102,73,15,110,206
1013	mulxq	%r8,%rbp,%r12
1014	movq	%r14,%rdx
1015	addq	%rcx,%r10
1016.byte	102,73,15,110,215
1017	adcq	%rbp,%r11
1018	adcq	$0,%r12
1019	xorq	%r13,%r13
1020
1021	mulxq	%r15,%rcx,%rbp
1022	adcxq	%rcx,%r11
1023	adoxq	%rbp,%r12
1024
1025	mulxq	%r8,%rcx,%rbp
1026	movq	%r15,%rdx
1027	adcxq	%rcx,%r12
1028	adoxq	%rbp,%r13
1029	adcq	$0,%r13
1030
1031	mulxq	%r8,%rcx,%r14
1032	movq	%rax,%rdx
1033.byte	102,73,15,110,216
1034	xorq	%r15,%r15
1035	adcxq	%r9,%r9
1036	adoxq	%rcx,%r13
1037	adcxq	%r10,%r10
1038	adoxq	%r15,%r14
1039
1040
1041	mulxq	%rdx,%r8,%rbp
1042.byte	102,72,15,126,202
1043	adcxq	%r11,%r11
1044	adoxq	%rbp,%r9
1045	adcxq	%r12,%r12
1046	mulxq	%rdx,%rcx,%rax
1047.byte	102,72,15,126,210
1048	adcxq	%r13,%r13
1049	adoxq	%rcx,%r10
1050	adcxq	%r14,%r14
1051	mulxq	%rdx,%rcx,%rbp
1052.byte	0x67
1053.byte	102,72,15,126,218
1054	adoxq	%rax,%r11
1055	adcxq	%r15,%r15
1056	adoxq	%rcx,%r12
1057	adoxq	%rbp,%r13
1058	mulxq	%rdx,%rcx,%rax
1059	adoxq	%rcx,%r14
1060	adoxq	%rax,%r15
1061
1062
1063	movq	%r8,%rdx
1064	mulxq	32(%rsi),%rdx,%rcx
1065
1066	xorq	%rax,%rax
1067	mulxq	0(%rsi),%rcx,%rbp
1068	adcxq	%rcx,%r8
1069	adoxq	%rbp,%r9
1070	mulxq	8(%rsi),%rcx,%rbp
1071	adcxq	%rcx,%r9
1072	adoxq	%rbp,%r10
1073	mulxq	16(%rsi),%rcx,%rbp
1074	adcxq	%rcx,%r10
1075	adoxq	%rbp,%r11
1076	mulxq	24(%rsi),%rcx,%rbp
1077	adcxq	%rcx,%r11
1078	adoxq	%rbp,%r8
1079	adcxq	%rax,%r8
1080
1081
1082	movq	%r9,%rdx
1083	mulxq	32(%rsi),%rdx,%rcx
1084
1085	mulxq	0(%rsi),%rcx,%rbp
1086	adoxq	%rcx,%r9
1087	adcxq	%rbp,%r10
1088	mulxq	8(%rsi),%rcx,%rbp
1089	adoxq	%rcx,%r10
1090	adcxq	%rbp,%r11
1091	mulxq	16(%rsi),%rcx,%rbp
1092	adoxq	%rcx,%r11
1093	adcxq	%rbp,%r8
1094	mulxq	24(%rsi),%rcx,%rbp
1095	adoxq	%rcx,%r8
1096	adcxq	%rbp,%r9
1097	adoxq	%rax,%r9
1098
1099
1100	movq	%r10,%rdx
1101	mulxq	32(%rsi),%rdx,%rcx
1102
1103	mulxq	0(%rsi),%rcx,%rbp
1104	adcxq	%rcx,%r10
1105	adoxq	%rbp,%r11
1106	mulxq	8(%rsi),%rcx,%rbp
1107	adcxq	%rcx,%r11
1108	adoxq	%rbp,%r8
1109	mulxq	16(%rsi),%rcx,%rbp
1110	adcxq	%rcx,%r8
1111	adoxq	%rbp,%r9
1112	mulxq	24(%rsi),%rcx,%rbp
1113	adcxq	%rcx,%r9
1114	adoxq	%rbp,%r10
1115	adcxq	%rax,%r10
1116
1117
1118	movq	%r11,%rdx
1119	mulxq	32(%rsi),%rdx,%rcx
1120
1121	mulxq	0(%rsi),%rcx,%rbp
1122	adoxq	%rcx,%r11
1123	adcxq	%rbp,%r8
1124	mulxq	8(%rsi),%rcx,%rbp
1125	adoxq	%rcx,%r8
1126	adcxq	%rbp,%r9
1127	mulxq	16(%rsi),%rcx,%rbp
1128	adoxq	%rcx,%r9
1129	adcxq	%rbp,%r10
1130	mulxq	24(%rsi),%rcx,%rbp
1131	adoxq	%rcx,%r10
1132	adcxq	%rbp,%r11
1133	adoxq	%rax,%r11
1134
1135
1136	addq	%r8,%r12
1137	adcq	%r13,%r9
1138	movq	%r12,%rdx
1139	adcq	%r14,%r10
1140	adcq	%r15,%r11
1141	movq	%r9,%r14
1142	adcq	$0,%rax
1143
1144
1145	subq	0(%rsi),%r12
1146	movq	%r10,%r15
1147	sbbq	8(%rsi),%r9
1148	sbbq	16(%rsi),%r10
1149	movq	%r11,%r8
1150	sbbq	24(%rsi),%r11
1151	sbbq	$0,%rax
1152
1153	cmovncq	%r12,%rdx
1154	cmovncq	%r9,%r14
1155	cmovncq	%r10,%r15
1156	cmovncq	%r11,%r8
1157
1158	decq	%rbx
1159	jnz	.Loop_ord_sqrx
1160
1161	movq	%rdx,0(%rdi)
1162	movq	%r14,8(%rdi)
1163	pxor	%xmm1,%xmm1
1164	movq	%r15,16(%rdi)
1165	pxor	%xmm2,%xmm2
1166	movq	%r8,24(%rdi)
1167	pxor	%xmm3,%xmm3
1168
1169	movq	0(%rsp),%r15
1170.cfi_restore	%r15
1171	movq	8(%rsp),%r14
1172.cfi_restore	%r14
1173	movq	16(%rsp),%r13
1174.cfi_restore	%r13
1175	movq	24(%rsp),%r12
1176.cfi_restore	%r12
1177	movq	32(%rsp),%rbx
1178.cfi_restore	%rbx
1179	movq	40(%rsp),%rbp
1180.cfi_restore	%rbp
1181	leaq	48(%rsp),%rsp
1182.cfi_adjust_cfa_offset	-48
1183.Lord_sqrx_epilogue:
1184	ret
1185.cfi_endproc
1186.size	ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
1187
1188
1189
1190
1191
1192
1193.globl	ecp_nistz256_mul_mont
1194.hidden ecp_nistz256_mul_mont
1195.type	ecp_nistz256_mul_mont,@function
1196.align	32
1197ecp_nistz256_mul_mont:
1198.cfi_startproc
1199_CET_ENDBR
1200	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1201	movq	8(%rcx),%rcx
1202	andl	$0x80100,%ecx
1203.Lmul_mont:
1204	pushq	%rbp
1205.cfi_adjust_cfa_offset	8
1206.cfi_offset	%rbp,-16
1207	pushq	%rbx
1208.cfi_adjust_cfa_offset	8
1209.cfi_offset	%rbx,-24
1210	pushq	%r12
1211.cfi_adjust_cfa_offset	8
1212.cfi_offset	%r12,-32
1213	pushq	%r13
1214.cfi_adjust_cfa_offset	8
1215.cfi_offset	%r13,-40
1216	pushq	%r14
1217.cfi_adjust_cfa_offset	8
1218.cfi_offset	%r14,-48
1219	pushq	%r15
1220.cfi_adjust_cfa_offset	8
1221.cfi_offset	%r15,-56
1222.Lmul_body:
1223	cmpl	$0x80100,%ecx
1224	je	.Lmul_montx
1225	movq	%rdx,%rbx
1226	movq	0(%rdx),%rax
1227	movq	0(%rsi),%r9
1228	movq	8(%rsi),%r10
1229	movq	16(%rsi),%r11
1230	movq	24(%rsi),%r12
1231
1232	call	__ecp_nistz256_mul_montq
1233	jmp	.Lmul_mont_done
1234
1235.align	32
1236.Lmul_montx:
1237	movq	%rdx,%rbx
1238	movq	0(%rdx),%rdx
1239	movq	0(%rsi),%r9
1240	movq	8(%rsi),%r10
1241	movq	16(%rsi),%r11
1242	movq	24(%rsi),%r12
1243	leaq	-128(%rsi),%rsi
1244
1245	call	__ecp_nistz256_mul_montx
1246.Lmul_mont_done:
1247	movq	0(%rsp),%r15
1248.cfi_restore	%r15
1249	movq	8(%rsp),%r14
1250.cfi_restore	%r14
1251	movq	16(%rsp),%r13
1252.cfi_restore	%r13
1253	movq	24(%rsp),%r12
1254.cfi_restore	%r12
1255	movq	32(%rsp),%rbx
1256.cfi_restore	%rbx
1257	movq	40(%rsp),%rbp
1258.cfi_restore	%rbp
1259	leaq	48(%rsp),%rsp
1260.cfi_adjust_cfa_offset	-48
1261.Lmul_epilogue:
1262	ret
1263.cfi_endproc
1264.size	ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
1265
1266.type	__ecp_nistz256_mul_montq,@function
1267.align	32
1268__ecp_nistz256_mul_montq:
1269.cfi_startproc
1270
1271
1272	movq	%rax,%rbp
1273	mulq	%r9
1274	movq	.Lpoly+8(%rip),%r14
1275	movq	%rax,%r8
1276	movq	%rbp,%rax
1277	movq	%rdx,%r9
1278
1279	mulq	%r10
1280	movq	.Lpoly+24(%rip),%r15
1281	addq	%rax,%r9
1282	movq	%rbp,%rax
1283	adcq	$0,%rdx
1284	movq	%rdx,%r10
1285
1286	mulq	%r11
1287	addq	%rax,%r10
1288	movq	%rbp,%rax
1289	adcq	$0,%rdx
1290	movq	%rdx,%r11
1291
1292	mulq	%r12
1293	addq	%rax,%r11
1294	movq	%r8,%rax
1295	adcq	$0,%rdx
1296	xorq	%r13,%r13
1297	movq	%rdx,%r12
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308	movq	%r8,%rbp
1309	shlq	$32,%r8
1310	mulq	%r15
1311	shrq	$32,%rbp
1312	addq	%r8,%r9
1313	adcq	%rbp,%r10
1314	adcq	%rax,%r11
1315	movq	8(%rbx),%rax
1316	adcq	%rdx,%r12
1317	adcq	$0,%r13
1318	xorq	%r8,%r8
1319
1320
1321
1322	movq	%rax,%rbp
1323	mulq	0(%rsi)
1324	addq	%rax,%r9
1325	movq	%rbp,%rax
1326	adcq	$0,%rdx
1327	movq	%rdx,%rcx
1328
1329	mulq	8(%rsi)
1330	addq	%rcx,%r10
1331	adcq	$0,%rdx
1332	addq	%rax,%r10
1333	movq	%rbp,%rax
1334	adcq	$0,%rdx
1335	movq	%rdx,%rcx
1336
1337	mulq	16(%rsi)
1338	addq	%rcx,%r11
1339	adcq	$0,%rdx
1340	addq	%rax,%r11
1341	movq	%rbp,%rax
1342	adcq	$0,%rdx
1343	movq	%rdx,%rcx
1344
1345	mulq	24(%rsi)
1346	addq	%rcx,%r12
1347	adcq	$0,%rdx
1348	addq	%rax,%r12
1349	movq	%r9,%rax
1350	adcq	%rdx,%r13
1351	adcq	$0,%r8
1352
1353
1354
1355	movq	%r9,%rbp
1356	shlq	$32,%r9
1357	mulq	%r15
1358	shrq	$32,%rbp
1359	addq	%r9,%r10
1360	adcq	%rbp,%r11
1361	adcq	%rax,%r12
1362	movq	16(%rbx),%rax
1363	adcq	%rdx,%r13
1364	adcq	$0,%r8
1365	xorq	%r9,%r9
1366
1367
1368
1369	movq	%rax,%rbp
1370	mulq	0(%rsi)
1371	addq	%rax,%r10
1372	movq	%rbp,%rax
1373	adcq	$0,%rdx
1374	movq	%rdx,%rcx
1375
1376	mulq	8(%rsi)
1377	addq	%rcx,%r11
1378	adcq	$0,%rdx
1379	addq	%rax,%r11
1380	movq	%rbp,%rax
1381	adcq	$0,%rdx
1382	movq	%rdx,%rcx
1383
1384	mulq	16(%rsi)
1385	addq	%rcx,%r12
1386	adcq	$0,%rdx
1387	addq	%rax,%r12
1388	movq	%rbp,%rax
1389	adcq	$0,%rdx
1390	movq	%rdx,%rcx
1391
1392	mulq	24(%rsi)
1393	addq	%rcx,%r13
1394	adcq	$0,%rdx
1395	addq	%rax,%r13
1396	movq	%r10,%rax
1397	adcq	%rdx,%r8
1398	adcq	$0,%r9
1399
1400
1401
1402	movq	%r10,%rbp
1403	shlq	$32,%r10
1404	mulq	%r15
1405	shrq	$32,%rbp
1406	addq	%r10,%r11
1407	adcq	%rbp,%r12
1408	adcq	%rax,%r13
1409	movq	24(%rbx),%rax
1410	adcq	%rdx,%r8
1411	adcq	$0,%r9
1412	xorq	%r10,%r10
1413
1414
1415
1416	movq	%rax,%rbp
1417	mulq	0(%rsi)
1418	addq	%rax,%r11
1419	movq	%rbp,%rax
1420	adcq	$0,%rdx
1421	movq	%rdx,%rcx
1422
1423	mulq	8(%rsi)
1424	addq	%rcx,%r12
1425	adcq	$0,%rdx
1426	addq	%rax,%r12
1427	movq	%rbp,%rax
1428	adcq	$0,%rdx
1429	movq	%rdx,%rcx
1430
1431	mulq	16(%rsi)
1432	addq	%rcx,%r13
1433	adcq	$0,%rdx
1434	addq	%rax,%r13
1435	movq	%rbp,%rax
1436	adcq	$0,%rdx
1437	movq	%rdx,%rcx
1438
1439	mulq	24(%rsi)
1440	addq	%rcx,%r8
1441	adcq	$0,%rdx
1442	addq	%rax,%r8
1443	movq	%r11,%rax
1444	adcq	%rdx,%r9
1445	adcq	$0,%r10
1446
1447
1448
1449	movq	%r11,%rbp
1450	shlq	$32,%r11
1451	mulq	%r15
1452	shrq	$32,%rbp
1453	addq	%r11,%r12
1454	adcq	%rbp,%r13
1455	movq	%r12,%rcx
1456	adcq	%rax,%r8
1457	adcq	%rdx,%r9
1458	movq	%r13,%rbp
1459	adcq	$0,%r10
1460
1461
1462
1463	subq	$-1,%r12
1464	movq	%r8,%rbx
1465	sbbq	%r14,%r13
1466	sbbq	$0,%r8
1467	movq	%r9,%rdx
1468	sbbq	%r15,%r9
1469	sbbq	$0,%r10
1470
1471	cmovcq	%rcx,%r12
1472	cmovcq	%rbp,%r13
1473	movq	%r12,0(%rdi)
1474	cmovcq	%rbx,%r8
1475	movq	%r13,8(%rdi)
1476	cmovcq	%rdx,%r9
1477	movq	%r8,16(%rdi)
1478	movq	%r9,24(%rdi)
1479
1480	ret
1481.cfi_endproc
1482.size	__ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
1483
1484
1485
1486
1487
1488
1489
1490
1491.globl	ecp_nistz256_sqr_mont
1492.hidden ecp_nistz256_sqr_mont
1493.type	ecp_nistz256_sqr_mont,@function
1494.align	32
1495ecp_nistz256_sqr_mont:
1496.cfi_startproc
1497_CET_ENDBR
1498	leaq	OPENSSL_ia32cap_P(%rip),%rcx
1499	movq	8(%rcx),%rcx
1500	andl	$0x80100,%ecx
1501	pushq	%rbp
1502.cfi_adjust_cfa_offset	8
1503.cfi_offset	%rbp,-16
1504	pushq	%rbx
1505.cfi_adjust_cfa_offset	8
1506.cfi_offset	%rbx,-24
1507	pushq	%r12
1508.cfi_adjust_cfa_offset	8
1509.cfi_offset	%r12,-32
1510	pushq	%r13
1511.cfi_adjust_cfa_offset	8
1512.cfi_offset	%r13,-40
1513	pushq	%r14
1514.cfi_adjust_cfa_offset	8
1515.cfi_offset	%r14,-48
1516	pushq	%r15
1517.cfi_adjust_cfa_offset	8
1518.cfi_offset	%r15,-56
1519.Lsqr_body:
1520	cmpl	$0x80100,%ecx
1521	je	.Lsqr_montx
1522	movq	0(%rsi),%rax
1523	movq	8(%rsi),%r14
1524	movq	16(%rsi),%r15
1525	movq	24(%rsi),%r8
1526
1527	call	__ecp_nistz256_sqr_montq
1528	jmp	.Lsqr_mont_done
1529
1530.align	32
1531.Lsqr_montx:
1532	movq	0(%rsi),%rdx
1533	movq	8(%rsi),%r14
1534	movq	16(%rsi),%r15
1535	movq	24(%rsi),%r8
1536	leaq	-128(%rsi),%rsi
1537
1538	call	__ecp_nistz256_sqr_montx
1539.Lsqr_mont_done:
1540	movq	0(%rsp),%r15
1541.cfi_restore	%r15
1542	movq	8(%rsp),%r14
1543.cfi_restore	%r14
1544	movq	16(%rsp),%r13
1545.cfi_restore	%r13
1546	movq	24(%rsp),%r12
1547.cfi_restore	%r12
1548	movq	32(%rsp),%rbx
1549.cfi_restore	%rbx
1550	movq	40(%rsp),%rbp
1551.cfi_restore	%rbp
1552	leaq	48(%rsp),%rsp
1553.cfi_adjust_cfa_offset	-48
1554.Lsqr_epilogue:
1555	ret
1556.cfi_endproc
1557.size	ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
1558
1559.type	__ecp_nistz256_sqr_montq,@function
1560.align	32
1561__ecp_nistz256_sqr_montq:
1562.cfi_startproc
1563	movq	%rax,%r13
1564	mulq	%r14
1565	movq	%rax,%r9
1566	movq	%r15,%rax
1567	movq	%rdx,%r10
1568
1569	mulq	%r13
1570	addq	%rax,%r10
1571	movq	%r8,%rax
1572	adcq	$0,%rdx
1573	movq	%rdx,%r11
1574
1575	mulq	%r13
1576	addq	%rax,%r11
1577	movq	%r15,%rax
1578	adcq	$0,%rdx
1579	movq	%rdx,%r12
1580
1581
1582	mulq	%r14
1583	addq	%rax,%r11
1584	movq	%r8,%rax
1585	adcq	$0,%rdx
1586	movq	%rdx,%rbp
1587
1588	mulq	%r14
1589	addq	%rax,%r12
1590	movq	%r8,%rax
1591	adcq	$0,%rdx
1592	addq	%rbp,%r12
1593	movq	%rdx,%r13
1594	adcq	$0,%r13
1595
1596
1597	mulq	%r15
1598	xorq	%r15,%r15
1599	addq	%rax,%r13
1600	movq	0(%rsi),%rax
1601	movq	%rdx,%r14
1602	adcq	$0,%r14
1603
1604	addq	%r9,%r9
1605	adcq	%r10,%r10
1606	adcq	%r11,%r11
1607	adcq	%r12,%r12
1608	adcq	%r13,%r13
1609	adcq	%r14,%r14
1610	adcq	$0,%r15
1611
1612	mulq	%rax
1613	movq	%rax,%r8
1614	movq	8(%rsi),%rax
1615	movq	%rdx,%rcx
1616
1617	mulq	%rax
1618	addq	%rcx,%r9
1619	adcq	%rax,%r10
1620	movq	16(%rsi),%rax
1621	adcq	$0,%rdx
1622	movq	%rdx,%rcx
1623
1624	mulq	%rax
1625	addq	%rcx,%r11
1626	adcq	%rax,%r12
1627	movq	24(%rsi),%rax
1628	adcq	$0,%rdx
1629	movq	%rdx,%rcx
1630
1631	mulq	%rax
1632	addq	%rcx,%r13
1633	adcq	%rax,%r14
1634	movq	%r8,%rax
1635	adcq	%rdx,%r15
1636
1637	movq	.Lpoly+8(%rip),%rsi
1638	movq	.Lpoly+24(%rip),%rbp
1639
1640
1641
1642
1643	movq	%r8,%rcx
1644	shlq	$32,%r8
1645	mulq	%rbp
1646	shrq	$32,%rcx
1647	addq	%r8,%r9
1648	adcq	%rcx,%r10
1649	adcq	%rax,%r11
1650	movq	%r9,%rax
1651	adcq	$0,%rdx
1652
1653
1654
1655	movq	%r9,%rcx
1656	shlq	$32,%r9
1657	movq	%rdx,%r8
1658	mulq	%rbp
1659	shrq	$32,%rcx
1660	addq	%r9,%r10
1661	adcq	%rcx,%r11
1662	adcq	%rax,%r8
1663	movq	%r10,%rax
1664	adcq	$0,%rdx
1665
1666
1667
1668	movq	%r10,%rcx
1669	shlq	$32,%r10
1670	movq	%rdx,%r9
1671	mulq	%rbp
1672	shrq	$32,%rcx
1673	addq	%r10,%r11
1674	adcq	%rcx,%r8
1675	adcq	%rax,%r9
1676	movq	%r11,%rax
1677	adcq	$0,%rdx
1678
1679
1680
1681	movq	%r11,%rcx
1682	shlq	$32,%r11
1683	movq	%rdx,%r10
1684	mulq	%rbp
1685	shrq	$32,%rcx
1686	addq	%r11,%r8
1687	adcq	%rcx,%r9
1688	adcq	%rax,%r10
1689	adcq	$0,%rdx
1690	xorq	%r11,%r11
1691
1692
1693
1694	addq	%r8,%r12
1695	adcq	%r9,%r13
1696	movq	%r12,%r8
1697	adcq	%r10,%r14
1698	adcq	%rdx,%r15
1699	movq	%r13,%r9
1700	adcq	$0,%r11
1701
1702	subq	$-1,%r12
1703	movq	%r14,%r10
1704	sbbq	%rsi,%r13
1705	sbbq	$0,%r14
1706	movq	%r15,%rcx
1707	sbbq	%rbp,%r15
1708	sbbq	$0,%r11
1709
1710	cmovcq	%r8,%r12
1711	cmovcq	%r9,%r13
1712	movq	%r12,0(%rdi)
1713	cmovcq	%r10,%r14
1714	movq	%r13,8(%rdi)
1715	cmovcq	%rcx,%r15
1716	movq	%r14,16(%rdi)
1717	movq	%r15,24(%rdi)
1718
1719	ret
1720.cfi_endproc
1721.size	__ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
1722.type	__ecp_nistz256_mul_montx,@function
1723.align	32
1724__ecp_nistz256_mul_montx:
1725.cfi_startproc
1726
1727
1728	mulxq	%r9,%r8,%r9
1729	mulxq	%r10,%rcx,%r10
1730	movq	$32,%r14
1731	xorq	%r13,%r13
1732	mulxq	%r11,%rbp,%r11
1733	movq	.Lpoly+24(%rip),%r15
1734	adcq	%rcx,%r9
1735	mulxq	%r12,%rcx,%r12
1736	movq	%r8,%rdx
1737	adcq	%rbp,%r10
1738	shlxq	%r14,%r8,%rbp
1739	adcq	%rcx,%r11
1740	shrxq	%r14,%r8,%rcx
1741	adcq	$0,%r12
1742
1743
1744
1745	addq	%rbp,%r9
1746	adcq	%rcx,%r10
1747
1748	mulxq	%r15,%rcx,%rbp
1749	movq	8(%rbx),%rdx
1750	adcq	%rcx,%r11
1751	adcq	%rbp,%r12
1752	adcq	$0,%r13
1753	xorq	%r8,%r8
1754
1755
1756
1757	mulxq	0+128(%rsi),%rcx,%rbp
1758	adcxq	%rcx,%r9
1759	adoxq	%rbp,%r10
1760
1761	mulxq	8+128(%rsi),%rcx,%rbp
1762	adcxq	%rcx,%r10
1763	adoxq	%rbp,%r11
1764
1765	mulxq	16+128(%rsi),%rcx,%rbp
1766	adcxq	%rcx,%r11
1767	adoxq	%rbp,%r12
1768
1769	mulxq	24+128(%rsi),%rcx,%rbp
1770	movq	%r9,%rdx
1771	adcxq	%rcx,%r12
1772	shlxq	%r14,%r9,%rcx
1773	adoxq	%rbp,%r13
1774	shrxq	%r14,%r9,%rbp
1775
1776	adcxq	%r8,%r13
1777	adoxq	%r8,%r8
1778	adcq	$0,%r8
1779
1780
1781
1782	addq	%rcx,%r10
1783	adcq	%rbp,%r11
1784
1785	mulxq	%r15,%rcx,%rbp
1786	movq	16(%rbx),%rdx
1787	adcq	%rcx,%r12
1788	adcq	%rbp,%r13
1789	adcq	$0,%r8
1790	xorq	%r9,%r9
1791
1792
1793
1794	mulxq	0+128(%rsi),%rcx,%rbp
1795	adcxq	%rcx,%r10
1796	adoxq	%rbp,%r11
1797
1798	mulxq	8+128(%rsi),%rcx,%rbp
1799	adcxq	%rcx,%r11
1800	adoxq	%rbp,%r12
1801
1802	mulxq	16+128(%rsi),%rcx,%rbp
1803	adcxq	%rcx,%r12
1804	adoxq	%rbp,%r13
1805
1806	mulxq	24+128(%rsi),%rcx,%rbp
1807	movq	%r10,%rdx
1808	adcxq	%rcx,%r13
1809	shlxq	%r14,%r10,%rcx
1810	adoxq	%rbp,%r8
1811	shrxq	%r14,%r10,%rbp
1812
1813	adcxq	%r9,%r8
1814	adoxq	%r9,%r9
1815	adcq	$0,%r9
1816
1817
1818
1819	addq	%rcx,%r11
1820	adcq	%rbp,%r12
1821
1822	mulxq	%r15,%rcx,%rbp
1823	movq	24(%rbx),%rdx
1824	adcq	%rcx,%r13
1825	adcq	%rbp,%r8
1826	adcq	$0,%r9
1827	xorq	%r10,%r10
1828
1829
1830
1831	mulxq	0+128(%rsi),%rcx,%rbp
1832	adcxq	%rcx,%r11
1833	adoxq	%rbp,%r12
1834
1835	mulxq	8+128(%rsi),%rcx,%rbp
1836	adcxq	%rcx,%r12
1837	adoxq	%rbp,%r13
1838
1839	mulxq	16+128(%rsi),%rcx,%rbp
1840	adcxq	%rcx,%r13
1841	adoxq	%rbp,%r8
1842
1843	mulxq	24+128(%rsi),%rcx,%rbp
1844	movq	%r11,%rdx
1845	adcxq	%rcx,%r8
1846	shlxq	%r14,%r11,%rcx
1847	adoxq	%rbp,%r9
1848	shrxq	%r14,%r11,%rbp
1849
1850	adcxq	%r10,%r9
1851	adoxq	%r10,%r10
1852	adcq	$0,%r10
1853
1854
1855
1856	addq	%rcx,%r12
1857	adcq	%rbp,%r13
1858
1859	mulxq	%r15,%rcx,%rbp
1860	movq	%r12,%rbx
1861	movq	.Lpoly+8(%rip),%r14
1862	adcq	%rcx,%r8
1863	movq	%r13,%rdx
1864	adcq	%rbp,%r9
1865	adcq	$0,%r10
1866
1867
1868
1869	xorl	%eax,%eax
1870	movq	%r8,%rcx
1871	sbbq	$-1,%r12
1872	sbbq	%r14,%r13
1873	sbbq	$0,%r8
1874	movq	%r9,%rbp
1875	sbbq	%r15,%r9
1876	sbbq	$0,%r10
1877
1878	cmovcq	%rbx,%r12
1879	cmovcq	%rdx,%r13
1880	movq	%r12,0(%rdi)
1881	cmovcq	%rcx,%r8
1882	movq	%r13,8(%rdi)
1883	cmovcq	%rbp,%r9
1884	movq	%r8,16(%rdi)
1885	movq	%r9,24(%rdi)
1886
1887	ret
1888.cfi_endproc
1889.size	__ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
1890
1891.type	__ecp_nistz256_sqr_montx,@function
1892.align	32
1893__ecp_nistz256_sqr_montx:
1894.cfi_startproc
1895	mulxq	%r14,%r9,%r10
1896	mulxq	%r15,%rcx,%r11
1897	xorl	%eax,%eax
1898	adcq	%rcx,%r10
1899	mulxq	%r8,%rbp,%r12
1900	movq	%r14,%rdx
1901	adcq	%rbp,%r11
1902	adcq	$0,%r12
1903	xorq	%r13,%r13
1904
1905
1906	mulxq	%r15,%rcx,%rbp
1907	adcxq	%rcx,%r11
1908	adoxq	%rbp,%r12
1909
1910	mulxq	%r8,%rcx,%rbp
1911	movq	%r15,%rdx
1912	adcxq	%rcx,%r12
1913	adoxq	%rbp,%r13
1914	adcq	$0,%r13
1915
1916
1917	mulxq	%r8,%rcx,%r14
1918	movq	0+128(%rsi),%rdx
1919	xorq	%r15,%r15
1920	adcxq	%r9,%r9
1921	adoxq	%rcx,%r13
1922	adcxq	%r10,%r10
1923	adoxq	%r15,%r14
1924
1925	mulxq	%rdx,%r8,%rbp
1926	movq	8+128(%rsi),%rdx
1927	adcxq	%r11,%r11
1928	adoxq	%rbp,%r9
1929	adcxq	%r12,%r12
1930	mulxq	%rdx,%rcx,%rax
1931	movq	16+128(%rsi),%rdx
1932	adcxq	%r13,%r13
1933	adoxq	%rcx,%r10
1934	adcxq	%r14,%r14
1935.byte	0x67
1936	mulxq	%rdx,%rcx,%rbp
1937	movq	24+128(%rsi),%rdx
1938	adoxq	%rax,%r11
1939	adcxq	%r15,%r15
1940	adoxq	%rcx,%r12
1941	movq	$32,%rsi
1942	adoxq	%rbp,%r13
1943.byte	0x67,0x67
1944	mulxq	%rdx,%rcx,%rax
1945	movq	.Lpoly+24(%rip),%rdx
1946	adoxq	%rcx,%r14
1947	shlxq	%rsi,%r8,%rcx
1948	adoxq	%rax,%r15
1949	shrxq	%rsi,%r8,%rax
1950	movq	%rdx,%rbp
1951
1952
1953	addq	%rcx,%r9
1954	adcq	%rax,%r10
1955
1956	mulxq	%r8,%rcx,%r8
1957	adcq	%rcx,%r11
1958	shlxq	%rsi,%r9,%rcx
1959	adcq	$0,%r8
1960	shrxq	%rsi,%r9,%rax
1961
1962
1963	addq	%rcx,%r10
1964	adcq	%rax,%r11
1965
1966	mulxq	%r9,%rcx,%r9
1967	adcq	%rcx,%r8
1968	shlxq	%rsi,%r10,%rcx
1969	adcq	$0,%r9
1970	shrxq	%rsi,%r10,%rax
1971
1972
1973	addq	%rcx,%r11
1974	adcq	%rax,%r8
1975
1976	mulxq	%r10,%rcx,%r10
1977	adcq	%rcx,%r9
1978	shlxq	%rsi,%r11,%rcx
1979	adcq	$0,%r10
1980	shrxq	%rsi,%r11,%rax
1981
1982
1983	addq	%rcx,%r8
1984	adcq	%rax,%r9
1985
1986	mulxq	%r11,%rcx,%r11
1987	adcq	%rcx,%r10
1988	adcq	$0,%r11
1989
1990	xorq	%rdx,%rdx
1991	addq	%r8,%r12
1992	movq	.Lpoly+8(%rip),%rsi
1993	adcq	%r9,%r13
1994	movq	%r12,%r8
1995	adcq	%r10,%r14
1996	adcq	%r11,%r15
1997	movq	%r13,%r9
1998	adcq	$0,%rdx
1999
2000	subq	$-1,%r12
2001	movq	%r14,%r10
2002	sbbq	%rsi,%r13
2003	sbbq	$0,%r14
2004	movq	%r15,%r11
2005	sbbq	%rbp,%r15
2006	sbbq	$0,%rdx
2007
2008	cmovcq	%r8,%r12
2009	cmovcq	%r9,%r13
2010	movq	%r12,0(%rdi)
2011	cmovcq	%r10,%r14
2012	movq	%r13,8(%rdi)
2013	cmovcq	%r11,%r15
2014	movq	%r14,16(%rdi)
2015	movq	%r15,24(%rdi)
2016
2017	ret
2018.cfi_endproc
2019.size	__ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
2020
2021
2022.globl	ecp_nistz256_select_w5
2023.hidden ecp_nistz256_select_w5
2024.type	ecp_nistz256_select_w5,@function
2025.align	32
2026ecp_nistz256_select_w5:
2027.cfi_startproc
2028_CET_ENDBR
2029	leaq	OPENSSL_ia32cap_P(%rip),%rax
2030	movq	8(%rax),%rax
2031	testl	$32,%eax
2032	jnz	.Lavx2_select_w5
2033	movdqa	.LOne(%rip),%xmm0
2034	movd	%edx,%xmm1
2035
2036	pxor	%xmm2,%xmm2
2037	pxor	%xmm3,%xmm3
2038	pxor	%xmm4,%xmm4
2039	pxor	%xmm5,%xmm5
2040	pxor	%xmm6,%xmm6
2041	pxor	%xmm7,%xmm7
2042
2043	movdqa	%xmm0,%xmm8
2044	pshufd	$0,%xmm1,%xmm1
2045
2046	movq	$16,%rax
2047.Lselect_loop_sse_w5:
2048
2049	movdqa	%xmm8,%xmm15
2050	paddd	%xmm0,%xmm8
2051	pcmpeqd	%xmm1,%xmm15
2052
2053	movdqa	0(%rsi),%xmm9
2054	movdqa	16(%rsi),%xmm10
2055	movdqa	32(%rsi),%xmm11
2056	movdqa	48(%rsi),%xmm12
2057	movdqa	64(%rsi),%xmm13
2058	movdqa	80(%rsi),%xmm14
2059	leaq	96(%rsi),%rsi
2060
2061	pand	%xmm15,%xmm9
2062	pand	%xmm15,%xmm10
2063	por	%xmm9,%xmm2
2064	pand	%xmm15,%xmm11
2065	por	%xmm10,%xmm3
2066	pand	%xmm15,%xmm12
2067	por	%xmm11,%xmm4
2068	pand	%xmm15,%xmm13
2069	por	%xmm12,%xmm5
2070	pand	%xmm15,%xmm14
2071	por	%xmm13,%xmm6
2072	por	%xmm14,%xmm7
2073
2074	decq	%rax
2075	jnz	.Lselect_loop_sse_w5
2076
2077	movdqu	%xmm2,0(%rdi)
2078	movdqu	%xmm3,16(%rdi)
2079	movdqu	%xmm4,32(%rdi)
2080	movdqu	%xmm5,48(%rdi)
2081	movdqu	%xmm6,64(%rdi)
2082	movdqu	%xmm7,80(%rdi)
2083	ret
2084.cfi_endproc
2085.LSEH_end_ecp_nistz256_select_w5:
2086.size	ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
2087
2088
2089
2090.globl	ecp_nistz256_select_w7
2091.hidden ecp_nistz256_select_w7
2092.type	ecp_nistz256_select_w7,@function
2093.align	32
2094ecp_nistz256_select_w7:
2095.cfi_startproc
2096_CET_ENDBR
2097	leaq	OPENSSL_ia32cap_P(%rip),%rax
2098	movq	8(%rax),%rax
2099	testl	$32,%eax
2100	jnz	.Lavx2_select_w7
2101	movdqa	.LOne(%rip),%xmm8
2102	movd	%edx,%xmm1
2103
2104	pxor	%xmm2,%xmm2
2105	pxor	%xmm3,%xmm3
2106	pxor	%xmm4,%xmm4
2107	pxor	%xmm5,%xmm5
2108
2109	movdqa	%xmm8,%xmm0
2110	pshufd	$0,%xmm1,%xmm1
2111	movq	$64,%rax
2112
2113.Lselect_loop_sse_w7:
2114	movdqa	%xmm8,%xmm15
2115	paddd	%xmm0,%xmm8
2116	movdqa	0(%rsi),%xmm9
2117	movdqa	16(%rsi),%xmm10
2118	pcmpeqd	%xmm1,%xmm15
2119	movdqa	32(%rsi),%xmm11
2120	movdqa	48(%rsi),%xmm12
2121	leaq	64(%rsi),%rsi
2122
2123	pand	%xmm15,%xmm9
2124	pand	%xmm15,%xmm10
2125	por	%xmm9,%xmm2
2126	pand	%xmm15,%xmm11
2127	por	%xmm10,%xmm3
2128	pand	%xmm15,%xmm12
2129	por	%xmm11,%xmm4
2130	prefetcht0	255(%rsi)
2131	por	%xmm12,%xmm5
2132
2133	decq	%rax
2134	jnz	.Lselect_loop_sse_w7
2135
2136	movdqu	%xmm2,0(%rdi)
2137	movdqu	%xmm3,16(%rdi)
2138	movdqu	%xmm4,32(%rdi)
2139	movdqu	%xmm5,48(%rdi)
2140	ret
2141.cfi_endproc
2142.LSEH_end_ecp_nistz256_select_w7:
2143.size	ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
2144
2145
2146.type	ecp_nistz256_avx2_select_w5,@function
2147.align	32
2148ecp_nistz256_avx2_select_w5:
2149.cfi_startproc
2150.Lavx2_select_w5:
2151	vzeroupper
2152	vmovdqa	.LTwo(%rip),%ymm0
2153
2154	vpxor	%ymm2,%ymm2,%ymm2
2155	vpxor	%ymm3,%ymm3,%ymm3
2156	vpxor	%ymm4,%ymm4,%ymm4
2157
2158	vmovdqa	.LOne(%rip),%ymm5
2159	vmovdqa	.LTwo(%rip),%ymm10
2160
2161	vmovd	%edx,%xmm1
2162	vpermd	%ymm1,%ymm2,%ymm1
2163
2164	movq	$8,%rax
2165.Lselect_loop_avx2_w5:
2166
2167	vmovdqa	0(%rsi),%ymm6
2168	vmovdqa	32(%rsi),%ymm7
2169	vmovdqa	64(%rsi),%ymm8
2170
2171	vmovdqa	96(%rsi),%ymm11
2172	vmovdqa	128(%rsi),%ymm12
2173	vmovdqa	160(%rsi),%ymm13
2174
2175	vpcmpeqd	%ymm1,%ymm5,%ymm9
2176	vpcmpeqd	%ymm1,%ymm10,%ymm14
2177
2178	vpaddd	%ymm0,%ymm5,%ymm5
2179	vpaddd	%ymm0,%ymm10,%ymm10
2180	leaq	192(%rsi),%rsi
2181
2182	vpand	%ymm9,%ymm6,%ymm6
2183	vpand	%ymm9,%ymm7,%ymm7
2184	vpand	%ymm9,%ymm8,%ymm8
2185	vpand	%ymm14,%ymm11,%ymm11
2186	vpand	%ymm14,%ymm12,%ymm12
2187	vpand	%ymm14,%ymm13,%ymm13
2188
2189	vpxor	%ymm6,%ymm2,%ymm2
2190	vpxor	%ymm7,%ymm3,%ymm3
2191	vpxor	%ymm8,%ymm4,%ymm4
2192	vpxor	%ymm11,%ymm2,%ymm2
2193	vpxor	%ymm12,%ymm3,%ymm3
2194	vpxor	%ymm13,%ymm4,%ymm4
2195
2196	decq	%rax
2197	jnz	.Lselect_loop_avx2_w5
2198
2199	vmovdqu	%ymm2,0(%rdi)
2200	vmovdqu	%ymm3,32(%rdi)
2201	vmovdqu	%ymm4,64(%rdi)
2202	vzeroupper
2203	ret
2204.cfi_endproc
2205.LSEH_end_ecp_nistz256_avx2_select_w5:
2206.size	ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
2207
2208
2209
2210.globl	ecp_nistz256_avx2_select_w7
2211.hidden ecp_nistz256_avx2_select_w7
2212.type	ecp_nistz256_avx2_select_w7,@function
2213.align	32
2214ecp_nistz256_avx2_select_w7:
2215.cfi_startproc
2216.Lavx2_select_w7:
2217_CET_ENDBR
2218	vzeroupper
2219	vmovdqa	.LThree(%rip),%ymm0
2220
2221	vpxor	%ymm2,%ymm2,%ymm2
2222	vpxor	%ymm3,%ymm3,%ymm3
2223
2224	vmovdqa	.LOne(%rip),%ymm4
2225	vmovdqa	.LTwo(%rip),%ymm8
2226	vmovdqa	.LThree(%rip),%ymm12
2227
2228	vmovd	%edx,%xmm1
2229	vpermd	%ymm1,%ymm2,%ymm1
2230
2231
2232	movq	$21,%rax
2233.Lselect_loop_avx2_w7:
2234
2235	vmovdqa	0(%rsi),%ymm5
2236	vmovdqa	32(%rsi),%ymm6
2237
2238	vmovdqa	64(%rsi),%ymm9
2239	vmovdqa	96(%rsi),%ymm10
2240
2241	vmovdqa	128(%rsi),%ymm13
2242	vmovdqa	160(%rsi),%ymm14
2243
2244	vpcmpeqd	%ymm1,%ymm4,%ymm7
2245	vpcmpeqd	%ymm1,%ymm8,%ymm11
2246	vpcmpeqd	%ymm1,%ymm12,%ymm15
2247
2248	vpaddd	%ymm0,%ymm4,%ymm4
2249	vpaddd	%ymm0,%ymm8,%ymm8
2250	vpaddd	%ymm0,%ymm12,%ymm12
2251	leaq	192(%rsi),%rsi
2252
2253	vpand	%ymm7,%ymm5,%ymm5
2254	vpand	%ymm7,%ymm6,%ymm6
2255	vpand	%ymm11,%ymm9,%ymm9
2256	vpand	%ymm11,%ymm10,%ymm10
2257	vpand	%ymm15,%ymm13,%ymm13
2258	vpand	%ymm15,%ymm14,%ymm14
2259
2260	vpxor	%ymm5,%ymm2,%ymm2
2261	vpxor	%ymm6,%ymm3,%ymm3
2262	vpxor	%ymm9,%ymm2,%ymm2
2263	vpxor	%ymm10,%ymm3,%ymm3
2264	vpxor	%ymm13,%ymm2,%ymm2
2265	vpxor	%ymm14,%ymm3,%ymm3
2266
2267	decq	%rax
2268	jnz	.Lselect_loop_avx2_w7
2269
2270
2271	vmovdqa	0(%rsi),%ymm5
2272	vmovdqa	32(%rsi),%ymm6
2273
2274	vpcmpeqd	%ymm1,%ymm4,%ymm7
2275
2276	vpand	%ymm7,%ymm5,%ymm5
2277	vpand	%ymm7,%ymm6,%ymm6
2278
2279	vpxor	%ymm5,%ymm2,%ymm2
2280	vpxor	%ymm6,%ymm3,%ymm3
2281
2282	vmovdqu	%ymm2,0(%rdi)
2283	vmovdqu	%ymm3,32(%rdi)
2284	vzeroupper
2285	ret
2286.cfi_endproc
2287.LSEH_end_ecp_nistz256_avx2_select_w7:
2288.size	ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
2289.type	__ecp_nistz256_add_toq,@function
2290.align	32
2291__ecp_nistz256_add_toq:
2292.cfi_startproc
2293	xorq	%r11,%r11
2294	addq	0(%rbx),%r12
2295	adcq	8(%rbx),%r13
2296	movq	%r12,%rax
2297	adcq	16(%rbx),%r8
2298	adcq	24(%rbx),%r9
2299	movq	%r13,%rbp
2300	adcq	$0,%r11
2301
2302	subq	$-1,%r12
2303	movq	%r8,%rcx
2304	sbbq	%r14,%r13
2305	sbbq	$0,%r8
2306	movq	%r9,%r10
2307	sbbq	%r15,%r9
2308	sbbq	$0,%r11
2309
2310	cmovcq	%rax,%r12
2311	cmovcq	%rbp,%r13
2312	movq	%r12,0(%rdi)
2313	cmovcq	%rcx,%r8
2314	movq	%r13,8(%rdi)
2315	cmovcq	%r10,%r9
2316	movq	%r8,16(%rdi)
2317	movq	%r9,24(%rdi)
2318
2319	ret
2320.cfi_endproc
2321.size	__ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
2322
2323.type	__ecp_nistz256_sub_fromq,@function
2324.align	32
2325__ecp_nistz256_sub_fromq:
2326.cfi_startproc
2327	subq	0(%rbx),%r12
2328	sbbq	8(%rbx),%r13
2329	movq	%r12,%rax
2330	sbbq	16(%rbx),%r8
2331	sbbq	24(%rbx),%r9
2332	movq	%r13,%rbp
2333	sbbq	%r11,%r11
2334
2335	addq	$-1,%r12
2336	movq	%r8,%rcx
2337	adcq	%r14,%r13
2338	adcq	$0,%r8
2339	movq	%r9,%r10
2340	adcq	%r15,%r9
2341	testq	%r11,%r11
2342
2343	cmovzq	%rax,%r12
2344	cmovzq	%rbp,%r13
2345	movq	%r12,0(%rdi)
2346	cmovzq	%rcx,%r8
2347	movq	%r13,8(%rdi)
2348	cmovzq	%r10,%r9
2349	movq	%r8,16(%rdi)
2350	movq	%r9,24(%rdi)
2351
2352	ret
2353.cfi_endproc
2354.size	__ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
2355
2356.type	__ecp_nistz256_subq,@function
2357.align	32
2358__ecp_nistz256_subq:
2359.cfi_startproc
2360	subq	%r12,%rax
2361	sbbq	%r13,%rbp
2362	movq	%rax,%r12
2363	sbbq	%r8,%rcx
2364	sbbq	%r9,%r10
2365	movq	%rbp,%r13
2366	sbbq	%r11,%r11
2367
2368	addq	$-1,%rax
2369	movq	%rcx,%r8
2370	adcq	%r14,%rbp
2371	adcq	$0,%rcx
2372	movq	%r10,%r9
2373	adcq	%r15,%r10
2374	testq	%r11,%r11
2375
2376	cmovnzq	%rax,%r12
2377	cmovnzq	%rbp,%r13
2378	cmovnzq	%rcx,%r8
2379	cmovnzq	%r10,%r9
2380
2381	ret
2382.cfi_endproc
2383.size	__ecp_nistz256_subq,.-__ecp_nistz256_subq
2384
2385.type	__ecp_nistz256_mul_by_2q,@function
2386.align	32
2387__ecp_nistz256_mul_by_2q:
2388.cfi_startproc
2389	xorq	%r11,%r11
2390	addq	%r12,%r12
2391	adcq	%r13,%r13
2392	movq	%r12,%rax
2393	adcq	%r8,%r8
2394	adcq	%r9,%r9
2395	movq	%r13,%rbp
2396	adcq	$0,%r11
2397
2398	subq	$-1,%r12
2399	movq	%r8,%rcx
2400	sbbq	%r14,%r13
2401	sbbq	$0,%r8
2402	movq	%r9,%r10
2403	sbbq	%r15,%r9
2404	sbbq	$0,%r11
2405
2406	cmovcq	%rax,%r12
2407	cmovcq	%rbp,%r13
2408	movq	%r12,0(%rdi)
2409	cmovcq	%rcx,%r8
2410	movq	%r13,8(%rdi)
2411	cmovcq	%r10,%r9
2412	movq	%r8,16(%rdi)
2413	movq	%r9,24(%rdi)
2414
2415	ret
2416.cfi_endproc
2417.size	__ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
2418.globl	ecp_nistz256_point_double
2419.hidden ecp_nistz256_point_double
2420.type	ecp_nistz256_point_double,@function
2421.align	32
2422ecp_nistz256_point_double:
2423.cfi_startproc
2424_CET_ENDBR
2425	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2426	movq	8(%rcx),%rcx
2427	andl	$0x80100,%ecx
2428	cmpl	$0x80100,%ecx
2429	je	.Lpoint_doublex
2430	pushq	%rbp
2431.cfi_adjust_cfa_offset	8
2432.cfi_offset	%rbp,-16
2433	pushq	%rbx
2434.cfi_adjust_cfa_offset	8
2435.cfi_offset	%rbx,-24
2436	pushq	%r12
2437.cfi_adjust_cfa_offset	8
2438.cfi_offset	%r12,-32
2439	pushq	%r13
2440.cfi_adjust_cfa_offset	8
2441.cfi_offset	%r13,-40
2442	pushq	%r14
2443.cfi_adjust_cfa_offset	8
2444.cfi_offset	%r14,-48
2445	pushq	%r15
2446.cfi_adjust_cfa_offset	8
2447.cfi_offset	%r15,-56
2448	subq	$160+8,%rsp
2449.cfi_adjust_cfa_offset	32*5+8
2450.Lpoint_doubleq_body:
2451
2452.Lpoint_double_shortcutq:
2453	movdqu	0(%rsi),%xmm0
2454	movq	%rsi,%rbx
2455	movdqu	16(%rsi),%xmm1
2456	movq	32+0(%rsi),%r12
2457	movq	32+8(%rsi),%r13
2458	movq	32+16(%rsi),%r8
2459	movq	32+24(%rsi),%r9
2460	movq	.Lpoly+8(%rip),%r14
2461	movq	.Lpoly+24(%rip),%r15
2462	movdqa	%xmm0,96(%rsp)
2463	movdqa	%xmm1,96+16(%rsp)
2464	leaq	32(%rdi),%r10
2465	leaq	64(%rdi),%r11
2466.byte	102,72,15,110,199
2467.byte	102,73,15,110,202
2468.byte	102,73,15,110,211
2469
2470	leaq	0(%rsp),%rdi
2471	call	__ecp_nistz256_mul_by_2q
2472
2473	movq	64+0(%rsi),%rax
2474	movq	64+8(%rsi),%r14
2475	movq	64+16(%rsi),%r15
2476	movq	64+24(%rsi),%r8
2477	leaq	64-0(%rsi),%rsi
2478	leaq	64(%rsp),%rdi
2479	call	__ecp_nistz256_sqr_montq
2480
2481	movq	0+0(%rsp),%rax
2482	movq	8+0(%rsp),%r14
2483	leaq	0+0(%rsp),%rsi
2484	movq	16+0(%rsp),%r15
2485	movq	24+0(%rsp),%r8
2486	leaq	0(%rsp),%rdi
2487	call	__ecp_nistz256_sqr_montq
2488
2489	movq	32(%rbx),%rax
2490	movq	64+0(%rbx),%r9
2491	movq	64+8(%rbx),%r10
2492	movq	64+16(%rbx),%r11
2493	movq	64+24(%rbx),%r12
2494	leaq	64-0(%rbx),%rsi
2495	leaq	32(%rbx),%rbx
2496.byte	102,72,15,126,215
2497	call	__ecp_nistz256_mul_montq
2498	call	__ecp_nistz256_mul_by_2q
2499
2500	movq	96+0(%rsp),%r12
2501	movq	96+8(%rsp),%r13
2502	leaq	64(%rsp),%rbx
2503	movq	96+16(%rsp),%r8
2504	movq	96+24(%rsp),%r9
2505	leaq	32(%rsp),%rdi
2506	call	__ecp_nistz256_add_toq
2507
2508	movq	96+0(%rsp),%r12
2509	movq	96+8(%rsp),%r13
2510	leaq	64(%rsp),%rbx
2511	movq	96+16(%rsp),%r8
2512	movq	96+24(%rsp),%r9
2513	leaq	64(%rsp),%rdi
2514	call	__ecp_nistz256_sub_fromq
2515
2516	movq	0+0(%rsp),%rax
2517	movq	8+0(%rsp),%r14
2518	leaq	0+0(%rsp),%rsi
2519	movq	16+0(%rsp),%r15
2520	movq	24+0(%rsp),%r8
2521.byte	102,72,15,126,207
2522	call	__ecp_nistz256_sqr_montq
2523	xorq	%r9,%r9
2524	movq	%r12,%rax
2525	addq	$-1,%r12
2526	movq	%r13,%r10
2527	adcq	%rsi,%r13
2528	movq	%r14,%rcx
2529	adcq	$0,%r14
2530	movq	%r15,%r8
2531	adcq	%rbp,%r15
2532	adcq	$0,%r9
2533	xorq	%rsi,%rsi
2534	testq	$1,%rax
2535
2536	cmovzq	%rax,%r12
2537	cmovzq	%r10,%r13
2538	cmovzq	%rcx,%r14
2539	cmovzq	%r8,%r15
2540	cmovzq	%rsi,%r9
2541
2542	movq	%r13,%rax
2543	shrq	$1,%r12
2544	shlq	$63,%rax
2545	movq	%r14,%r10
2546	shrq	$1,%r13
2547	orq	%rax,%r12
2548	shlq	$63,%r10
2549	movq	%r15,%rcx
2550	shrq	$1,%r14
2551	orq	%r10,%r13
2552	shlq	$63,%rcx
2553	movq	%r12,0(%rdi)
2554	shrq	$1,%r15
2555	movq	%r13,8(%rdi)
2556	shlq	$63,%r9
2557	orq	%rcx,%r14
2558	orq	%r9,%r15
2559	movq	%r14,16(%rdi)
2560	movq	%r15,24(%rdi)
2561	movq	64(%rsp),%rax
2562	leaq	64(%rsp),%rbx
2563	movq	0+32(%rsp),%r9
2564	movq	8+32(%rsp),%r10
2565	leaq	0+32(%rsp),%rsi
2566	movq	16+32(%rsp),%r11
2567	movq	24+32(%rsp),%r12
2568	leaq	32(%rsp),%rdi
2569	call	__ecp_nistz256_mul_montq
2570
2571	leaq	128(%rsp),%rdi
2572	call	__ecp_nistz256_mul_by_2q
2573
2574	leaq	32(%rsp),%rbx
2575	leaq	32(%rsp),%rdi
2576	call	__ecp_nistz256_add_toq
2577
2578	movq	96(%rsp),%rax
2579	leaq	96(%rsp),%rbx
2580	movq	0+0(%rsp),%r9
2581	movq	8+0(%rsp),%r10
2582	leaq	0+0(%rsp),%rsi
2583	movq	16+0(%rsp),%r11
2584	movq	24+0(%rsp),%r12
2585	leaq	0(%rsp),%rdi
2586	call	__ecp_nistz256_mul_montq
2587
2588	leaq	128(%rsp),%rdi
2589	call	__ecp_nistz256_mul_by_2q
2590
2591	movq	0+32(%rsp),%rax
2592	movq	8+32(%rsp),%r14
2593	leaq	0+32(%rsp),%rsi
2594	movq	16+32(%rsp),%r15
2595	movq	24+32(%rsp),%r8
2596.byte	102,72,15,126,199
2597	call	__ecp_nistz256_sqr_montq
2598
2599	leaq	128(%rsp),%rbx
2600	movq	%r14,%r8
2601	movq	%r15,%r9
2602	movq	%rsi,%r14
2603	movq	%rbp,%r15
2604	call	__ecp_nistz256_sub_fromq
2605
2606	movq	0+0(%rsp),%rax
2607	movq	0+8(%rsp),%rbp
2608	movq	0+16(%rsp),%rcx
2609	movq	0+24(%rsp),%r10
2610	leaq	0(%rsp),%rdi
2611	call	__ecp_nistz256_subq
2612
2613	movq	32(%rsp),%rax
2614	leaq	32(%rsp),%rbx
2615	movq	%r12,%r14
2616	xorl	%ecx,%ecx
2617	movq	%r12,0+0(%rsp)
2618	movq	%r13,%r10
2619	movq	%r13,0+8(%rsp)
2620	cmovzq	%r8,%r11
2621	movq	%r8,0+16(%rsp)
2622	leaq	0-0(%rsp),%rsi
2623	cmovzq	%r9,%r12
2624	movq	%r9,0+24(%rsp)
2625	movq	%r14,%r9
2626	leaq	0(%rsp),%rdi
2627	call	__ecp_nistz256_mul_montq
2628
2629.byte	102,72,15,126,203
2630.byte	102,72,15,126,207
2631	call	__ecp_nistz256_sub_fromq
2632
2633	leaq	160+56(%rsp),%rsi
2634.cfi_def_cfa	%rsi,8
2635	movq	-48(%rsi),%r15
2636.cfi_restore	%r15
2637	movq	-40(%rsi),%r14
2638.cfi_restore	%r14
2639	movq	-32(%rsi),%r13
2640.cfi_restore	%r13
2641	movq	-24(%rsi),%r12
2642.cfi_restore	%r12
2643	movq	-16(%rsi),%rbx
2644.cfi_restore	%rbx
2645	movq	-8(%rsi),%rbp
2646.cfi_restore	%rbp
2647	leaq	(%rsi),%rsp
2648.cfi_def_cfa_register	%rsp
2649.Lpoint_doubleq_epilogue:
2650	ret
2651.cfi_endproc
2652.size	ecp_nistz256_point_double,.-ecp_nistz256_point_double
2653.globl	ecp_nistz256_point_add
2654.hidden ecp_nistz256_point_add
2655.type	ecp_nistz256_point_add,@function
2656.align	32
2657ecp_nistz256_point_add:
2658.cfi_startproc
2659_CET_ENDBR
2660	leaq	OPENSSL_ia32cap_P(%rip),%rcx
2661	movq	8(%rcx),%rcx
2662	andl	$0x80100,%ecx
2663	cmpl	$0x80100,%ecx
2664	je	.Lpoint_addx
2665	pushq	%rbp
2666.cfi_adjust_cfa_offset	8
2667.cfi_offset	%rbp,-16
2668	pushq	%rbx
2669.cfi_adjust_cfa_offset	8
2670.cfi_offset	%rbx,-24
2671	pushq	%r12
2672.cfi_adjust_cfa_offset	8
2673.cfi_offset	%r12,-32
2674	pushq	%r13
2675.cfi_adjust_cfa_offset	8
2676.cfi_offset	%r13,-40
2677	pushq	%r14
2678.cfi_adjust_cfa_offset	8
2679.cfi_offset	%r14,-48
2680	pushq	%r15
2681.cfi_adjust_cfa_offset	8
2682.cfi_offset	%r15,-56
2683	subq	$576+8,%rsp
2684.cfi_adjust_cfa_offset	32*18+8
2685.Lpoint_addq_body:
2686
2687	movdqu	0(%rsi),%xmm0
2688	movdqu	16(%rsi),%xmm1
2689	movdqu	32(%rsi),%xmm2
2690	movdqu	48(%rsi),%xmm3
2691	movdqu	64(%rsi),%xmm4
2692	movdqu	80(%rsi),%xmm5
2693	movq	%rsi,%rbx
2694	movq	%rdx,%rsi
2695	movdqa	%xmm0,384(%rsp)
2696	movdqa	%xmm1,384+16(%rsp)
2697	movdqa	%xmm2,416(%rsp)
2698	movdqa	%xmm3,416+16(%rsp)
2699	movdqa	%xmm4,448(%rsp)
2700	movdqa	%xmm5,448+16(%rsp)
2701	por	%xmm4,%xmm5
2702
2703	movdqu	0(%rsi),%xmm0
2704	pshufd	$0xb1,%xmm5,%xmm3
2705	movdqu	16(%rsi),%xmm1
2706	movdqu	32(%rsi),%xmm2
2707	por	%xmm3,%xmm5
2708	movdqu	48(%rsi),%xmm3
2709	movq	64+0(%rsi),%rax
2710	movq	64+8(%rsi),%r14
2711	movq	64+16(%rsi),%r15
2712	movq	64+24(%rsi),%r8
2713	movdqa	%xmm0,480(%rsp)
2714	pshufd	$0x1e,%xmm5,%xmm4
2715	movdqa	%xmm1,480+16(%rsp)
2716	movdqu	64(%rsi),%xmm0
2717	movdqu	80(%rsi),%xmm1
2718	movdqa	%xmm2,512(%rsp)
2719	movdqa	%xmm3,512+16(%rsp)
2720	por	%xmm4,%xmm5
2721	pxor	%xmm4,%xmm4
2722	por	%xmm0,%xmm1
2723.byte	102,72,15,110,199
2724
2725	leaq	64-0(%rsi),%rsi
2726	movq	%rax,544+0(%rsp)
2727	movq	%r14,544+8(%rsp)
2728	movq	%r15,544+16(%rsp)
2729	movq	%r8,544+24(%rsp)
2730	leaq	96(%rsp),%rdi
2731	call	__ecp_nistz256_sqr_montq
2732
2733	pcmpeqd	%xmm4,%xmm5
2734	pshufd	$0xb1,%xmm1,%xmm4
2735	por	%xmm1,%xmm4
2736	pshufd	$0,%xmm5,%xmm5
2737	pshufd	$0x1e,%xmm4,%xmm3
2738	por	%xmm3,%xmm4
2739	pxor	%xmm3,%xmm3
2740	pcmpeqd	%xmm3,%xmm4
2741	pshufd	$0,%xmm4,%xmm4
2742	movq	64+0(%rbx),%rax
2743	movq	64+8(%rbx),%r14
2744	movq	64+16(%rbx),%r15
2745	movq	64+24(%rbx),%r8
2746.byte	102,72,15,110,203
2747
2748	leaq	64-0(%rbx),%rsi
2749	leaq	32(%rsp),%rdi
2750	call	__ecp_nistz256_sqr_montq
2751
2752	movq	544(%rsp),%rax
2753	leaq	544(%rsp),%rbx
2754	movq	0+96(%rsp),%r9
2755	movq	8+96(%rsp),%r10
2756	leaq	0+96(%rsp),%rsi
2757	movq	16+96(%rsp),%r11
2758	movq	24+96(%rsp),%r12
2759	leaq	224(%rsp),%rdi
2760	call	__ecp_nistz256_mul_montq
2761
2762	movq	448(%rsp),%rax
2763	leaq	448(%rsp),%rbx
2764	movq	0+32(%rsp),%r9
2765	movq	8+32(%rsp),%r10
2766	leaq	0+32(%rsp),%rsi
2767	movq	16+32(%rsp),%r11
2768	movq	24+32(%rsp),%r12
2769	leaq	256(%rsp),%rdi
2770	call	__ecp_nistz256_mul_montq
2771
2772	movq	416(%rsp),%rax
2773	leaq	416(%rsp),%rbx
2774	movq	0+224(%rsp),%r9
2775	movq	8+224(%rsp),%r10
2776	leaq	0+224(%rsp),%rsi
2777	movq	16+224(%rsp),%r11
2778	movq	24+224(%rsp),%r12
2779	leaq	224(%rsp),%rdi
2780	call	__ecp_nistz256_mul_montq
2781
2782	movq	512(%rsp),%rax
2783	leaq	512(%rsp),%rbx
2784	movq	0+256(%rsp),%r9
2785	movq	8+256(%rsp),%r10
2786	leaq	0+256(%rsp),%rsi
2787	movq	16+256(%rsp),%r11
2788	movq	24+256(%rsp),%r12
2789	leaq	256(%rsp),%rdi
2790	call	__ecp_nistz256_mul_montq
2791
2792	leaq	224(%rsp),%rbx
2793	leaq	64(%rsp),%rdi
2794	call	__ecp_nistz256_sub_fromq
2795
2796	orq	%r13,%r12
2797	movdqa	%xmm4,%xmm2
2798	orq	%r8,%r12
2799	orq	%r9,%r12
2800	por	%xmm5,%xmm2
2801.byte	102,73,15,110,220
2802
2803	movq	384(%rsp),%rax
2804	leaq	384(%rsp),%rbx
2805	movq	0+96(%rsp),%r9
2806	movq	8+96(%rsp),%r10
2807	leaq	0+96(%rsp),%rsi
2808	movq	16+96(%rsp),%r11
2809	movq	24+96(%rsp),%r12
2810	leaq	160(%rsp),%rdi
2811	call	__ecp_nistz256_mul_montq
2812
2813	movq	480(%rsp),%rax
2814	leaq	480(%rsp),%rbx
2815	movq	0+32(%rsp),%r9
2816	movq	8+32(%rsp),%r10
2817	leaq	0+32(%rsp),%rsi
2818	movq	16+32(%rsp),%r11
2819	movq	24+32(%rsp),%r12
2820	leaq	192(%rsp),%rdi
2821	call	__ecp_nistz256_mul_montq
2822
2823	leaq	160(%rsp),%rbx
2824	leaq	0(%rsp),%rdi
2825	call	__ecp_nistz256_sub_fromq
2826
2827	orq	%r13,%r12
2828	orq	%r8,%r12
2829	orq	%r9,%r12
2830
2831.byte	102,73,15,126,208
2832.byte	102,73,15,126,217
2833	orq	%r8,%r12
2834.byte	0x3e
2835	jnz	.Ladd_proceedq
2836
2837
2838
2839	testq	%r9,%r9
2840	jz	.Ladd_doubleq
2841
2842
2843
2844
2845
2846
2847.byte	102,72,15,126,199
2848	pxor	%xmm0,%xmm0
2849	movdqu	%xmm0,0(%rdi)
2850	movdqu	%xmm0,16(%rdi)
2851	movdqu	%xmm0,32(%rdi)
2852	movdqu	%xmm0,48(%rdi)
2853	movdqu	%xmm0,64(%rdi)
2854	movdqu	%xmm0,80(%rdi)
2855	jmp	.Ladd_doneq
2856
2857.align	32
2858.Ladd_doubleq:
2859.byte	102,72,15,126,206
2860.byte	102,72,15,126,199
2861	addq	$416,%rsp
2862.cfi_adjust_cfa_offset	-416
2863	jmp	.Lpoint_double_shortcutq
2864.cfi_adjust_cfa_offset	416
2865
2866.align	32
2867.Ladd_proceedq:
2868	movq	0+64(%rsp),%rax
2869	movq	8+64(%rsp),%r14
2870	leaq	0+64(%rsp),%rsi
2871	movq	16+64(%rsp),%r15
2872	movq	24+64(%rsp),%r8
2873	leaq	96(%rsp),%rdi
2874	call	__ecp_nistz256_sqr_montq
2875
2876	movq	448(%rsp),%rax
2877	leaq	448(%rsp),%rbx
2878	movq	0+0(%rsp),%r9
2879	movq	8+0(%rsp),%r10
2880	leaq	0+0(%rsp),%rsi
2881	movq	16+0(%rsp),%r11
2882	movq	24+0(%rsp),%r12
2883	leaq	352(%rsp),%rdi
2884	call	__ecp_nistz256_mul_montq
2885
2886	movq	0+0(%rsp),%rax
2887	movq	8+0(%rsp),%r14
2888	leaq	0+0(%rsp),%rsi
2889	movq	16+0(%rsp),%r15
2890	movq	24+0(%rsp),%r8
2891	leaq	32(%rsp),%rdi
2892	call	__ecp_nistz256_sqr_montq
2893
2894	movq	544(%rsp),%rax
2895	leaq	544(%rsp),%rbx
2896	movq	0+352(%rsp),%r9
2897	movq	8+352(%rsp),%r10
2898	leaq	0+352(%rsp),%rsi
2899	movq	16+352(%rsp),%r11
2900	movq	24+352(%rsp),%r12
2901	leaq	352(%rsp),%rdi
2902	call	__ecp_nistz256_mul_montq
2903
2904	movq	0(%rsp),%rax
2905	leaq	0(%rsp),%rbx
2906	movq	0+32(%rsp),%r9
2907	movq	8+32(%rsp),%r10
2908	leaq	0+32(%rsp),%rsi
2909	movq	16+32(%rsp),%r11
2910	movq	24+32(%rsp),%r12
2911	leaq	128(%rsp),%rdi
2912	call	__ecp_nistz256_mul_montq
2913
2914	movq	160(%rsp),%rax
2915	leaq	160(%rsp),%rbx
2916	movq	0+32(%rsp),%r9
2917	movq	8+32(%rsp),%r10
2918	leaq	0+32(%rsp),%rsi
2919	movq	16+32(%rsp),%r11
2920	movq	24+32(%rsp),%r12
2921	leaq	192(%rsp),%rdi
2922	call	__ecp_nistz256_mul_montq
2923
2924
2925
2926
2927	xorq	%r11,%r11
2928	addq	%r12,%r12
2929	leaq	96(%rsp),%rsi
2930	adcq	%r13,%r13
2931	movq	%r12,%rax
2932	adcq	%r8,%r8
2933	adcq	%r9,%r9
2934	movq	%r13,%rbp
2935	adcq	$0,%r11
2936
2937	subq	$-1,%r12
2938	movq	%r8,%rcx
2939	sbbq	%r14,%r13
2940	sbbq	$0,%r8
2941	movq	%r9,%r10
2942	sbbq	%r15,%r9
2943	sbbq	$0,%r11
2944
2945	cmovcq	%rax,%r12
2946	movq	0(%rsi),%rax
2947	cmovcq	%rbp,%r13
2948	movq	8(%rsi),%rbp
2949	cmovcq	%rcx,%r8
2950	movq	16(%rsi),%rcx
2951	cmovcq	%r10,%r9
2952	movq	24(%rsi),%r10
2953
2954	call	__ecp_nistz256_subq
2955
2956	leaq	128(%rsp),%rbx
2957	leaq	288(%rsp),%rdi
2958	call	__ecp_nistz256_sub_fromq
2959
2960	movq	192+0(%rsp),%rax
2961	movq	192+8(%rsp),%rbp
2962	movq	192+16(%rsp),%rcx
2963	movq	192+24(%rsp),%r10
2964	leaq	320(%rsp),%rdi
2965
2966	call	__ecp_nistz256_subq
2967
2968	movq	%r12,0(%rdi)
2969	movq	%r13,8(%rdi)
2970	movq	%r8,16(%rdi)
2971	movq	%r9,24(%rdi)
2972	movq	128(%rsp),%rax
2973	leaq	128(%rsp),%rbx
2974	movq	0+224(%rsp),%r9
2975	movq	8+224(%rsp),%r10
2976	leaq	0+224(%rsp),%rsi
2977	movq	16+224(%rsp),%r11
2978	movq	24+224(%rsp),%r12
2979	leaq	256(%rsp),%rdi
2980	call	__ecp_nistz256_mul_montq
2981
2982	movq	320(%rsp),%rax
2983	leaq	320(%rsp),%rbx
2984	movq	0+64(%rsp),%r9
2985	movq	8+64(%rsp),%r10
2986	leaq	0+64(%rsp),%rsi
2987	movq	16+64(%rsp),%r11
2988	movq	24+64(%rsp),%r12
2989	leaq	320(%rsp),%rdi
2990	call	__ecp_nistz256_mul_montq
2991
2992	leaq	256(%rsp),%rbx
2993	leaq	320(%rsp),%rdi
2994	call	__ecp_nistz256_sub_fromq
2995
2996.byte	102,72,15,126,199
2997
2998	movdqa	%xmm5,%xmm0
2999	movdqa	%xmm5,%xmm1
3000	pandn	352(%rsp),%xmm0
3001	movdqa	%xmm5,%xmm2
3002	pandn	352+16(%rsp),%xmm1
3003	movdqa	%xmm5,%xmm3
3004	pand	544(%rsp),%xmm2
3005	pand	544+16(%rsp),%xmm3
3006	por	%xmm0,%xmm2
3007	por	%xmm1,%xmm3
3008
3009	movdqa	%xmm4,%xmm0
3010	movdqa	%xmm4,%xmm1
3011	pandn	%xmm2,%xmm0
3012	movdqa	%xmm4,%xmm2
3013	pandn	%xmm3,%xmm1
3014	movdqa	%xmm4,%xmm3
3015	pand	448(%rsp),%xmm2
3016	pand	448+16(%rsp),%xmm3
3017	por	%xmm0,%xmm2
3018	por	%xmm1,%xmm3
3019	movdqu	%xmm2,64(%rdi)
3020	movdqu	%xmm3,80(%rdi)
3021
3022	movdqa	%xmm5,%xmm0
3023	movdqa	%xmm5,%xmm1
3024	pandn	288(%rsp),%xmm0
3025	movdqa	%xmm5,%xmm2
3026	pandn	288+16(%rsp),%xmm1
3027	movdqa	%xmm5,%xmm3
3028	pand	480(%rsp),%xmm2
3029	pand	480+16(%rsp),%xmm3
3030	por	%xmm0,%xmm2
3031	por	%xmm1,%xmm3
3032
3033	movdqa	%xmm4,%xmm0
3034	movdqa	%xmm4,%xmm1
3035	pandn	%xmm2,%xmm0
3036	movdqa	%xmm4,%xmm2
3037	pandn	%xmm3,%xmm1
3038	movdqa	%xmm4,%xmm3
3039	pand	384(%rsp),%xmm2
3040	pand	384+16(%rsp),%xmm3
3041	por	%xmm0,%xmm2
3042	por	%xmm1,%xmm3
3043	movdqu	%xmm2,0(%rdi)
3044	movdqu	%xmm3,16(%rdi)
3045
3046	movdqa	%xmm5,%xmm0
3047	movdqa	%xmm5,%xmm1
3048	pandn	320(%rsp),%xmm0
3049	movdqa	%xmm5,%xmm2
3050	pandn	320+16(%rsp),%xmm1
3051	movdqa	%xmm5,%xmm3
3052	pand	512(%rsp),%xmm2
3053	pand	512+16(%rsp),%xmm3
3054	por	%xmm0,%xmm2
3055	por	%xmm1,%xmm3
3056
3057	movdqa	%xmm4,%xmm0
3058	movdqa	%xmm4,%xmm1
3059	pandn	%xmm2,%xmm0
3060	movdqa	%xmm4,%xmm2
3061	pandn	%xmm3,%xmm1
3062	movdqa	%xmm4,%xmm3
3063	pand	416(%rsp),%xmm2
3064	pand	416+16(%rsp),%xmm3
3065	por	%xmm0,%xmm2
3066	por	%xmm1,%xmm3
3067	movdqu	%xmm2,32(%rdi)
3068	movdqu	%xmm3,48(%rdi)
3069
3070.Ladd_doneq:
3071	leaq	576+56(%rsp),%rsi
3072.cfi_def_cfa	%rsi,8
3073	movq	-48(%rsi),%r15
3074.cfi_restore	%r15
3075	movq	-40(%rsi),%r14
3076.cfi_restore	%r14
3077	movq	-32(%rsi),%r13
3078.cfi_restore	%r13
3079	movq	-24(%rsi),%r12
3080.cfi_restore	%r12
3081	movq	-16(%rsi),%rbx
3082.cfi_restore	%rbx
3083	movq	-8(%rsi),%rbp
3084.cfi_restore	%rbp
3085	leaq	(%rsi),%rsp
3086.cfi_def_cfa_register	%rsp
3087.Lpoint_addq_epilogue:
3088	ret
3089.cfi_endproc
3090.size	ecp_nistz256_point_add,.-ecp_nistz256_point_add
3091.globl	ecp_nistz256_point_add_affine
3092.hidden ecp_nistz256_point_add_affine
3093.type	ecp_nistz256_point_add_affine,@function
3094.align	32
3095ecp_nistz256_point_add_affine:
3096.cfi_startproc
3097_CET_ENDBR
3098	leaq	OPENSSL_ia32cap_P(%rip),%rcx
3099	movq	8(%rcx),%rcx
3100	andl	$0x80100,%ecx
3101	cmpl	$0x80100,%ecx
3102	je	.Lpoint_add_affinex
3103	pushq	%rbp
3104.cfi_adjust_cfa_offset	8
3105.cfi_offset	%rbp,-16
3106	pushq	%rbx
3107.cfi_adjust_cfa_offset	8
3108.cfi_offset	%rbx,-24
3109	pushq	%r12
3110.cfi_adjust_cfa_offset	8
3111.cfi_offset	%r12,-32
3112	pushq	%r13
3113.cfi_adjust_cfa_offset	8
3114.cfi_offset	%r13,-40
3115	pushq	%r14
3116.cfi_adjust_cfa_offset	8
3117.cfi_offset	%r14,-48
3118	pushq	%r15
3119.cfi_adjust_cfa_offset	8
3120.cfi_offset	%r15,-56
3121	subq	$480+8,%rsp
3122.cfi_adjust_cfa_offset	32*15+8
3123.Ladd_affineq_body:
3124
3125	movdqu	0(%rsi),%xmm0
3126	movq	%rdx,%rbx
3127	movdqu	16(%rsi),%xmm1
3128	movdqu	32(%rsi),%xmm2
3129	movdqu	48(%rsi),%xmm3
3130	movdqu	64(%rsi),%xmm4
3131	movdqu	80(%rsi),%xmm5
3132	movq	64+0(%rsi),%rax
3133	movq	64+8(%rsi),%r14
3134	movq	64+16(%rsi),%r15
3135	movq	64+24(%rsi),%r8
3136	movdqa	%xmm0,320(%rsp)
3137	movdqa	%xmm1,320+16(%rsp)
3138	movdqa	%xmm2,352(%rsp)
3139	movdqa	%xmm3,352+16(%rsp)
3140	movdqa	%xmm4,384(%rsp)
3141	movdqa	%xmm5,384+16(%rsp)
3142	por	%xmm4,%xmm5
3143
3144	movdqu	0(%rbx),%xmm0
3145	pshufd	$0xb1,%xmm5,%xmm3
3146	movdqu	16(%rbx),%xmm1
3147	movdqu	32(%rbx),%xmm2
3148	por	%xmm3,%xmm5
3149	movdqu	48(%rbx),%xmm3
3150	movdqa	%xmm0,416(%rsp)
3151	pshufd	$0x1e,%xmm5,%xmm4
3152	movdqa	%xmm1,416+16(%rsp)
3153	por	%xmm0,%xmm1
3154.byte	102,72,15,110,199
3155	movdqa	%xmm2,448(%rsp)
3156	movdqa	%xmm3,448+16(%rsp)
3157	por	%xmm2,%xmm3
3158	por	%xmm4,%xmm5
3159	pxor	%xmm4,%xmm4
3160	por	%xmm1,%xmm3
3161
3162	leaq	64-0(%rsi),%rsi
3163	leaq	32(%rsp),%rdi
3164	call	__ecp_nistz256_sqr_montq
3165
3166	pcmpeqd	%xmm4,%xmm5
3167	pshufd	$0xb1,%xmm3,%xmm4
3168	movq	0(%rbx),%rax
3169
3170	movq	%r12,%r9
3171	por	%xmm3,%xmm4
3172	pshufd	$0,%xmm5,%xmm5
3173	pshufd	$0x1e,%xmm4,%xmm3
3174	movq	%r13,%r10
3175	por	%xmm3,%xmm4
3176	pxor	%xmm3,%xmm3
3177	movq	%r14,%r11
3178	pcmpeqd	%xmm3,%xmm4
3179	pshufd	$0,%xmm4,%xmm4
3180
3181	leaq	32-0(%rsp),%rsi
3182	movq	%r15,%r12
3183	leaq	0(%rsp),%rdi
3184	call	__ecp_nistz256_mul_montq
3185
3186	leaq	320(%rsp),%rbx
3187	leaq	64(%rsp),%rdi
3188	call	__ecp_nistz256_sub_fromq
3189
3190	movq	384(%rsp),%rax
3191	leaq	384(%rsp),%rbx
3192	movq	0+32(%rsp),%r9
3193	movq	8+32(%rsp),%r10
3194	leaq	0+32(%rsp),%rsi
3195	movq	16+32(%rsp),%r11
3196	movq	24+32(%rsp),%r12
3197	leaq	32(%rsp),%rdi
3198	call	__ecp_nistz256_mul_montq
3199
3200	movq	384(%rsp),%rax
3201	leaq	384(%rsp),%rbx
3202	movq	0+64(%rsp),%r9
3203	movq	8+64(%rsp),%r10
3204	leaq	0+64(%rsp),%rsi
3205	movq	16+64(%rsp),%r11
3206	movq	24+64(%rsp),%r12
3207	leaq	288(%rsp),%rdi
3208	call	__ecp_nistz256_mul_montq
3209
3210	movq	448(%rsp),%rax
3211	leaq	448(%rsp),%rbx
3212	movq	0+32(%rsp),%r9
3213	movq	8+32(%rsp),%r10
3214	leaq	0+32(%rsp),%rsi
3215	movq	16+32(%rsp),%r11
3216	movq	24+32(%rsp),%r12
3217	leaq	32(%rsp),%rdi
3218	call	__ecp_nistz256_mul_montq
3219
3220	leaq	352(%rsp),%rbx
3221	leaq	96(%rsp),%rdi
3222	call	__ecp_nistz256_sub_fromq
3223
3224	movq	0+64(%rsp),%rax
3225	movq	8+64(%rsp),%r14
3226	leaq	0+64(%rsp),%rsi
3227	movq	16+64(%rsp),%r15
3228	movq	24+64(%rsp),%r8
3229	leaq	128(%rsp),%rdi
3230	call	__ecp_nistz256_sqr_montq
3231
3232	movq	0+96(%rsp),%rax
3233	movq	8+96(%rsp),%r14
3234	leaq	0+96(%rsp),%rsi
3235	movq	16+96(%rsp),%r15
3236	movq	24+96(%rsp),%r8
3237	leaq	192(%rsp),%rdi
3238	call	__ecp_nistz256_sqr_montq
3239
3240	movq	128(%rsp),%rax
3241	leaq	128(%rsp),%rbx
3242	movq	0+64(%rsp),%r9
3243	movq	8+64(%rsp),%r10
3244	leaq	0+64(%rsp),%rsi
3245	movq	16+64(%rsp),%r11
3246	movq	24+64(%rsp),%r12
3247	leaq	160(%rsp),%rdi
3248	call	__ecp_nistz256_mul_montq
3249
3250	movq	320(%rsp),%rax
3251	leaq	320(%rsp),%rbx
3252	movq	0+128(%rsp),%r9
3253	movq	8+128(%rsp),%r10
3254	leaq	0+128(%rsp),%rsi
3255	movq	16+128(%rsp),%r11
3256	movq	24+128(%rsp),%r12
3257	leaq	0(%rsp),%rdi
3258	call	__ecp_nistz256_mul_montq
3259
3260
3261
3262
3263	xorq	%r11,%r11
3264	addq	%r12,%r12
3265	leaq	192(%rsp),%rsi
3266	adcq	%r13,%r13
3267	movq	%r12,%rax
3268	adcq	%r8,%r8
3269	adcq	%r9,%r9
3270	movq	%r13,%rbp
3271	adcq	$0,%r11
3272
3273	subq	$-1,%r12
3274	movq	%r8,%rcx
3275	sbbq	%r14,%r13
3276	sbbq	$0,%r8
3277	movq	%r9,%r10
3278	sbbq	%r15,%r9
3279	sbbq	$0,%r11
3280
3281	cmovcq	%rax,%r12
3282	movq	0(%rsi),%rax
3283	cmovcq	%rbp,%r13
3284	movq	8(%rsi),%rbp
3285	cmovcq	%rcx,%r8
3286	movq	16(%rsi),%rcx
3287	cmovcq	%r10,%r9
3288	movq	24(%rsi),%r10
3289
3290	call	__ecp_nistz256_subq
3291
3292	leaq	160(%rsp),%rbx
3293	leaq	224(%rsp),%rdi
3294	call	__ecp_nistz256_sub_fromq
3295
3296	movq	0+0(%rsp),%rax
3297	movq	0+8(%rsp),%rbp
3298	movq	0+16(%rsp),%rcx
3299	movq	0+24(%rsp),%r10
3300	leaq	64(%rsp),%rdi
3301
3302	call	__ecp_nistz256_subq
3303
3304	movq	%r12,0(%rdi)
3305	movq	%r13,8(%rdi)
3306	movq	%r8,16(%rdi)
3307	movq	%r9,24(%rdi)
3308	movq	352(%rsp),%rax
3309	leaq	352(%rsp),%rbx
3310	movq	0+160(%rsp),%r9
3311	movq	8+160(%rsp),%r10
3312	leaq	0+160(%rsp),%rsi
3313	movq	16+160(%rsp),%r11
3314	movq	24+160(%rsp),%r12
3315	leaq	32(%rsp),%rdi
3316	call	__ecp_nistz256_mul_montq
3317
3318	movq	96(%rsp),%rax
3319	leaq	96(%rsp),%rbx
3320	movq	0+64(%rsp),%r9
3321	movq	8+64(%rsp),%r10
3322	leaq	0+64(%rsp),%rsi
3323	movq	16+64(%rsp),%r11
3324	movq	24+64(%rsp),%r12
3325	leaq	64(%rsp),%rdi
3326	call	__ecp_nistz256_mul_montq
3327
3328	leaq	32(%rsp),%rbx
3329	leaq	256(%rsp),%rdi
3330	call	__ecp_nistz256_sub_fromq
3331
3332.byte	102,72,15,126,199
3333
3334	movdqa	%xmm5,%xmm0
3335	movdqa	%xmm5,%xmm1
3336	pandn	288(%rsp),%xmm0
3337	movdqa	%xmm5,%xmm2
3338	pandn	288+16(%rsp),%xmm1
3339	movdqa	%xmm5,%xmm3
3340	pand	.LONE_mont(%rip),%xmm2
3341	pand	.LONE_mont+16(%rip),%xmm3
3342	por	%xmm0,%xmm2
3343	por	%xmm1,%xmm3
3344
3345	movdqa	%xmm4,%xmm0
3346	movdqa	%xmm4,%xmm1
3347	pandn	%xmm2,%xmm0
3348	movdqa	%xmm4,%xmm2
3349	pandn	%xmm3,%xmm1
3350	movdqa	%xmm4,%xmm3
3351	pand	384(%rsp),%xmm2
3352	pand	384+16(%rsp),%xmm3
3353	por	%xmm0,%xmm2
3354	por	%xmm1,%xmm3
3355	movdqu	%xmm2,64(%rdi)
3356	movdqu	%xmm3,80(%rdi)
3357
3358	movdqa	%xmm5,%xmm0
3359	movdqa	%xmm5,%xmm1
3360	pandn	224(%rsp),%xmm0
3361	movdqa	%xmm5,%xmm2
3362	pandn	224+16(%rsp),%xmm1
3363	movdqa	%xmm5,%xmm3
3364	pand	416(%rsp),%xmm2
3365	pand	416+16(%rsp),%xmm3
3366	por	%xmm0,%xmm2
3367	por	%xmm1,%xmm3
3368
3369	movdqa	%xmm4,%xmm0
3370	movdqa	%xmm4,%xmm1
3371	pandn	%xmm2,%xmm0
3372	movdqa	%xmm4,%xmm2
3373	pandn	%xmm3,%xmm1
3374	movdqa	%xmm4,%xmm3
3375	pand	320(%rsp),%xmm2
3376	pand	320+16(%rsp),%xmm3
3377	por	%xmm0,%xmm2
3378	por	%xmm1,%xmm3
3379	movdqu	%xmm2,0(%rdi)
3380	movdqu	%xmm3,16(%rdi)
3381
3382	movdqa	%xmm5,%xmm0
3383	movdqa	%xmm5,%xmm1
3384	pandn	256(%rsp),%xmm0
3385	movdqa	%xmm5,%xmm2
3386	pandn	256+16(%rsp),%xmm1
3387	movdqa	%xmm5,%xmm3
3388	pand	448(%rsp),%xmm2
3389	pand	448+16(%rsp),%xmm3
3390	por	%xmm0,%xmm2
3391	por	%xmm1,%xmm3
3392
3393	movdqa	%xmm4,%xmm0
3394	movdqa	%xmm4,%xmm1
3395	pandn	%xmm2,%xmm0
3396	movdqa	%xmm4,%xmm2
3397	pandn	%xmm3,%xmm1
3398	movdqa	%xmm4,%xmm3
3399	pand	352(%rsp),%xmm2
3400	pand	352+16(%rsp),%xmm3
3401	por	%xmm0,%xmm2
3402	por	%xmm1,%xmm3
3403	movdqu	%xmm2,32(%rdi)
3404	movdqu	%xmm3,48(%rdi)
3405
3406	leaq	480+56(%rsp),%rsi
3407.cfi_def_cfa	%rsi,8
3408	movq	-48(%rsi),%r15
3409.cfi_restore	%r15
3410	movq	-40(%rsi),%r14
3411.cfi_restore	%r14
3412	movq	-32(%rsi),%r13
3413.cfi_restore	%r13
3414	movq	-24(%rsi),%r12
3415.cfi_restore	%r12
3416	movq	-16(%rsi),%rbx
3417.cfi_restore	%rbx
3418	movq	-8(%rsi),%rbp
3419.cfi_restore	%rbp
3420	leaq	(%rsi),%rsp
3421.cfi_def_cfa_register	%rsp
3422.Ladd_affineq_epilogue:
3423	ret
3424.cfi_endproc
3425.size	ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
3426.type	__ecp_nistz256_add_tox,@function
3427.align	32
3428__ecp_nistz256_add_tox:
3429.cfi_startproc
3430	xorq	%r11,%r11
3431	adcq	0(%rbx),%r12
3432	adcq	8(%rbx),%r13
3433	movq	%r12,%rax
3434	adcq	16(%rbx),%r8
3435	adcq	24(%rbx),%r9
3436	movq	%r13,%rbp
3437	adcq	$0,%r11
3438
3439	xorq	%r10,%r10
3440	sbbq	$-1,%r12
3441	movq	%r8,%rcx
3442	sbbq	%r14,%r13
3443	sbbq	$0,%r8
3444	movq	%r9,%r10
3445	sbbq	%r15,%r9
3446	sbbq	$0,%r11
3447
3448	cmovcq	%rax,%r12
3449	cmovcq	%rbp,%r13
3450	movq	%r12,0(%rdi)
3451	cmovcq	%rcx,%r8
3452	movq	%r13,8(%rdi)
3453	cmovcq	%r10,%r9
3454	movq	%r8,16(%rdi)
3455	movq	%r9,24(%rdi)
3456
3457	ret
3458.cfi_endproc
3459.size	__ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
3460
3461.type	__ecp_nistz256_sub_fromx,@function
3462.align	32
3463__ecp_nistz256_sub_fromx:
3464.cfi_startproc
3465	xorq	%r11,%r11
3466	sbbq	0(%rbx),%r12
3467	sbbq	8(%rbx),%r13
3468	movq	%r12,%rax
3469	sbbq	16(%rbx),%r8
3470	sbbq	24(%rbx),%r9
3471	movq	%r13,%rbp
3472	sbbq	$0,%r11
3473
3474	xorq	%r10,%r10
3475	adcq	$-1,%r12
3476	movq	%r8,%rcx
3477	adcq	%r14,%r13
3478	adcq	$0,%r8
3479	movq	%r9,%r10
3480	adcq	%r15,%r9
3481
3482	btq	$0,%r11
3483	cmovncq	%rax,%r12
3484	cmovncq	%rbp,%r13
3485	movq	%r12,0(%rdi)
3486	cmovncq	%rcx,%r8
3487	movq	%r13,8(%rdi)
3488	cmovncq	%r10,%r9
3489	movq	%r8,16(%rdi)
3490	movq	%r9,24(%rdi)
3491
3492	ret
3493.cfi_endproc
3494.size	__ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
3495
3496.type	__ecp_nistz256_subx,@function
3497.align	32
3498__ecp_nistz256_subx:
3499.cfi_startproc
3500	xorq	%r11,%r11
3501	sbbq	%r12,%rax
3502	sbbq	%r13,%rbp
3503	movq	%rax,%r12
3504	sbbq	%r8,%rcx
3505	sbbq	%r9,%r10
3506	movq	%rbp,%r13
3507	sbbq	$0,%r11
3508
3509	xorq	%r9,%r9
3510	adcq	$-1,%rax
3511	movq	%rcx,%r8
3512	adcq	%r14,%rbp
3513	adcq	$0,%rcx
3514	movq	%r10,%r9
3515	adcq	%r15,%r10
3516
3517	btq	$0,%r11
3518	cmovcq	%rax,%r12
3519	cmovcq	%rbp,%r13
3520	cmovcq	%rcx,%r8
3521	cmovcq	%r10,%r9
3522
3523	ret
3524.cfi_endproc
3525.size	__ecp_nistz256_subx,.-__ecp_nistz256_subx
3526
3527.type	__ecp_nistz256_mul_by_2x,@function
3528.align	32
3529__ecp_nistz256_mul_by_2x:
3530.cfi_startproc
3531	xorq	%r11,%r11
3532	adcq	%r12,%r12
3533	adcq	%r13,%r13
3534	movq	%r12,%rax
3535	adcq	%r8,%r8
3536	adcq	%r9,%r9
3537	movq	%r13,%rbp
3538	adcq	$0,%r11
3539
3540	xorq	%r10,%r10
3541	sbbq	$-1,%r12
3542	movq	%r8,%rcx
3543	sbbq	%r14,%r13
3544	sbbq	$0,%r8
3545	movq	%r9,%r10
3546	sbbq	%r15,%r9
3547	sbbq	$0,%r11
3548
3549	cmovcq	%rax,%r12
3550	cmovcq	%rbp,%r13
3551	movq	%r12,0(%rdi)
3552	cmovcq	%rcx,%r8
3553	movq	%r13,8(%rdi)
3554	cmovcq	%r10,%r9
3555	movq	%r8,16(%rdi)
3556	movq	%r9,24(%rdi)
3557
3558	ret
3559.cfi_endproc
3560.size	__ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
3561.type	ecp_nistz256_point_doublex,@function
3562.align	32
3563ecp_nistz256_point_doublex:
3564.cfi_startproc
3565.Lpoint_doublex:
3566	pushq	%rbp
3567.cfi_adjust_cfa_offset	8
3568.cfi_offset	%rbp,-16
3569	pushq	%rbx
3570.cfi_adjust_cfa_offset	8
3571.cfi_offset	%rbx,-24
3572	pushq	%r12
3573.cfi_adjust_cfa_offset	8
3574.cfi_offset	%r12,-32
3575	pushq	%r13
3576.cfi_adjust_cfa_offset	8
3577.cfi_offset	%r13,-40
3578	pushq	%r14
3579.cfi_adjust_cfa_offset	8
3580.cfi_offset	%r14,-48
3581	pushq	%r15
3582.cfi_adjust_cfa_offset	8
3583.cfi_offset	%r15,-56
3584	subq	$160+8,%rsp
3585.cfi_adjust_cfa_offset	32*5+8
3586.Lpoint_doublex_body:
3587
3588.Lpoint_double_shortcutx:
3589	movdqu	0(%rsi),%xmm0
3590	movq	%rsi,%rbx
3591	movdqu	16(%rsi),%xmm1
3592	movq	32+0(%rsi),%r12
3593	movq	32+8(%rsi),%r13
3594	movq	32+16(%rsi),%r8
3595	movq	32+24(%rsi),%r9
3596	movq	.Lpoly+8(%rip),%r14
3597	movq	.Lpoly+24(%rip),%r15
3598	movdqa	%xmm0,96(%rsp)
3599	movdqa	%xmm1,96+16(%rsp)
3600	leaq	32(%rdi),%r10
3601	leaq	64(%rdi),%r11
3602.byte	102,72,15,110,199
3603.byte	102,73,15,110,202
3604.byte	102,73,15,110,211
3605
3606	leaq	0(%rsp),%rdi
3607	call	__ecp_nistz256_mul_by_2x
3608
3609	movq	64+0(%rsi),%rdx
3610	movq	64+8(%rsi),%r14
3611	movq	64+16(%rsi),%r15
3612	movq	64+24(%rsi),%r8
3613	leaq	64-128(%rsi),%rsi
3614	leaq	64(%rsp),%rdi
3615	call	__ecp_nistz256_sqr_montx
3616
3617	movq	0+0(%rsp),%rdx
3618	movq	8+0(%rsp),%r14
3619	leaq	-128+0(%rsp),%rsi
3620	movq	16+0(%rsp),%r15
3621	movq	24+0(%rsp),%r8
3622	leaq	0(%rsp),%rdi
3623	call	__ecp_nistz256_sqr_montx
3624
3625	movq	32(%rbx),%rdx
3626	movq	64+0(%rbx),%r9
3627	movq	64+8(%rbx),%r10
3628	movq	64+16(%rbx),%r11
3629	movq	64+24(%rbx),%r12
3630	leaq	64-128(%rbx),%rsi
3631	leaq	32(%rbx),%rbx
3632.byte	102,72,15,126,215
3633	call	__ecp_nistz256_mul_montx
3634	call	__ecp_nistz256_mul_by_2x
3635
3636	movq	96+0(%rsp),%r12
3637	movq	96+8(%rsp),%r13
3638	leaq	64(%rsp),%rbx
3639	movq	96+16(%rsp),%r8
3640	movq	96+24(%rsp),%r9
3641	leaq	32(%rsp),%rdi
3642	call	__ecp_nistz256_add_tox
3643
3644	movq	96+0(%rsp),%r12
3645	movq	96+8(%rsp),%r13
3646	leaq	64(%rsp),%rbx
3647	movq	96+16(%rsp),%r8
3648	movq	96+24(%rsp),%r9
3649	leaq	64(%rsp),%rdi
3650	call	__ecp_nistz256_sub_fromx
3651
3652	movq	0+0(%rsp),%rdx
3653	movq	8+0(%rsp),%r14
3654	leaq	-128+0(%rsp),%rsi
3655	movq	16+0(%rsp),%r15
3656	movq	24+0(%rsp),%r8
3657.byte	102,72,15,126,207
3658	call	__ecp_nistz256_sqr_montx
3659	xorq	%r9,%r9
3660	movq	%r12,%rax
3661	addq	$-1,%r12
3662	movq	%r13,%r10
3663	adcq	%rsi,%r13
3664	movq	%r14,%rcx
3665	adcq	$0,%r14
3666	movq	%r15,%r8
3667	adcq	%rbp,%r15
3668	adcq	$0,%r9
3669	xorq	%rsi,%rsi
3670	testq	$1,%rax
3671
3672	cmovzq	%rax,%r12
3673	cmovzq	%r10,%r13
3674	cmovzq	%rcx,%r14
3675	cmovzq	%r8,%r15
3676	cmovzq	%rsi,%r9
3677
3678	movq	%r13,%rax
3679	shrq	$1,%r12
3680	shlq	$63,%rax
3681	movq	%r14,%r10
3682	shrq	$1,%r13
3683	orq	%rax,%r12
3684	shlq	$63,%r10
3685	movq	%r15,%rcx
3686	shrq	$1,%r14
3687	orq	%r10,%r13
3688	shlq	$63,%rcx
3689	movq	%r12,0(%rdi)
3690	shrq	$1,%r15
3691	movq	%r13,8(%rdi)
3692	shlq	$63,%r9
3693	orq	%rcx,%r14
3694	orq	%r9,%r15
3695	movq	%r14,16(%rdi)
3696	movq	%r15,24(%rdi)
3697	movq	64(%rsp),%rdx
3698	leaq	64(%rsp),%rbx
3699	movq	0+32(%rsp),%r9
3700	movq	8+32(%rsp),%r10
3701	leaq	-128+32(%rsp),%rsi
3702	movq	16+32(%rsp),%r11
3703	movq	24+32(%rsp),%r12
3704	leaq	32(%rsp),%rdi
3705	call	__ecp_nistz256_mul_montx
3706
3707	leaq	128(%rsp),%rdi
3708	call	__ecp_nistz256_mul_by_2x
3709
3710	leaq	32(%rsp),%rbx
3711	leaq	32(%rsp),%rdi
3712	call	__ecp_nistz256_add_tox
3713
3714	movq	96(%rsp),%rdx
3715	leaq	96(%rsp),%rbx
3716	movq	0+0(%rsp),%r9
3717	movq	8+0(%rsp),%r10
3718	leaq	-128+0(%rsp),%rsi
3719	movq	16+0(%rsp),%r11
3720	movq	24+0(%rsp),%r12
3721	leaq	0(%rsp),%rdi
3722	call	__ecp_nistz256_mul_montx
3723
3724	leaq	128(%rsp),%rdi
3725	call	__ecp_nistz256_mul_by_2x
3726
3727	movq	0+32(%rsp),%rdx
3728	movq	8+32(%rsp),%r14
3729	leaq	-128+32(%rsp),%rsi
3730	movq	16+32(%rsp),%r15
3731	movq	24+32(%rsp),%r8
3732.byte	102,72,15,126,199
3733	call	__ecp_nistz256_sqr_montx
3734
3735	leaq	128(%rsp),%rbx
3736	movq	%r14,%r8
3737	movq	%r15,%r9
3738	movq	%rsi,%r14
3739	movq	%rbp,%r15
3740	call	__ecp_nistz256_sub_fromx
3741
3742	movq	0+0(%rsp),%rax
3743	movq	0+8(%rsp),%rbp
3744	movq	0+16(%rsp),%rcx
3745	movq	0+24(%rsp),%r10
3746	leaq	0(%rsp),%rdi
3747	call	__ecp_nistz256_subx
3748
3749	movq	32(%rsp),%rdx
3750	leaq	32(%rsp),%rbx
3751	movq	%r12,%r14
3752	xorl	%ecx,%ecx
3753	movq	%r12,0+0(%rsp)
3754	movq	%r13,%r10
3755	movq	%r13,0+8(%rsp)
3756	cmovzq	%r8,%r11
3757	movq	%r8,0+16(%rsp)
3758	leaq	0-128(%rsp),%rsi
3759	cmovzq	%r9,%r12
3760	movq	%r9,0+24(%rsp)
3761	movq	%r14,%r9
3762	leaq	0(%rsp),%rdi
3763	call	__ecp_nistz256_mul_montx
3764
3765.byte	102,72,15,126,203
3766.byte	102,72,15,126,207
3767	call	__ecp_nistz256_sub_fromx
3768
3769	leaq	160+56(%rsp),%rsi
3770.cfi_def_cfa	%rsi,8
3771	movq	-48(%rsi),%r15
3772.cfi_restore	%r15
3773	movq	-40(%rsi),%r14
3774.cfi_restore	%r14
3775	movq	-32(%rsi),%r13
3776.cfi_restore	%r13
3777	movq	-24(%rsi),%r12
3778.cfi_restore	%r12
3779	movq	-16(%rsi),%rbx
3780.cfi_restore	%rbx
3781	movq	-8(%rsi),%rbp
3782.cfi_restore	%rbp
3783	leaq	(%rsi),%rsp
3784.cfi_def_cfa_register	%rsp
3785.Lpoint_doublex_epilogue:
3786	ret
3787.cfi_endproc
3788.size	ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
3789.type	ecp_nistz256_point_addx,@function
3790.align	32
3791ecp_nistz256_point_addx:
3792.cfi_startproc
3793.Lpoint_addx:
3794	pushq	%rbp
3795.cfi_adjust_cfa_offset	8
3796.cfi_offset	%rbp,-16
3797	pushq	%rbx
3798.cfi_adjust_cfa_offset	8
3799.cfi_offset	%rbx,-24
3800	pushq	%r12
3801.cfi_adjust_cfa_offset	8
3802.cfi_offset	%r12,-32
3803	pushq	%r13
3804.cfi_adjust_cfa_offset	8
3805.cfi_offset	%r13,-40
3806	pushq	%r14
3807.cfi_adjust_cfa_offset	8
3808.cfi_offset	%r14,-48
3809	pushq	%r15
3810.cfi_adjust_cfa_offset	8
3811.cfi_offset	%r15,-56
3812	subq	$576+8,%rsp
3813.cfi_adjust_cfa_offset	32*18+8
3814.Lpoint_addx_body:
3815
3816	movdqu	0(%rsi),%xmm0
3817	movdqu	16(%rsi),%xmm1
3818	movdqu	32(%rsi),%xmm2
3819	movdqu	48(%rsi),%xmm3
3820	movdqu	64(%rsi),%xmm4
3821	movdqu	80(%rsi),%xmm5
3822	movq	%rsi,%rbx
3823	movq	%rdx,%rsi
3824	movdqa	%xmm0,384(%rsp)
3825	movdqa	%xmm1,384+16(%rsp)
3826	movdqa	%xmm2,416(%rsp)
3827	movdqa	%xmm3,416+16(%rsp)
3828	movdqa	%xmm4,448(%rsp)
3829	movdqa	%xmm5,448+16(%rsp)
3830	por	%xmm4,%xmm5
3831
3832	movdqu	0(%rsi),%xmm0
3833	pshufd	$0xb1,%xmm5,%xmm3
3834	movdqu	16(%rsi),%xmm1
3835	movdqu	32(%rsi),%xmm2
3836	por	%xmm3,%xmm5
3837	movdqu	48(%rsi),%xmm3
3838	movq	64+0(%rsi),%rdx
3839	movq	64+8(%rsi),%r14
3840	movq	64+16(%rsi),%r15
3841	movq	64+24(%rsi),%r8
3842	movdqa	%xmm0,480(%rsp)
3843	pshufd	$0x1e,%xmm5,%xmm4
3844	movdqa	%xmm1,480+16(%rsp)
3845	movdqu	64(%rsi),%xmm0
3846	movdqu	80(%rsi),%xmm1
3847	movdqa	%xmm2,512(%rsp)
3848	movdqa	%xmm3,512+16(%rsp)
3849	por	%xmm4,%xmm5
3850	pxor	%xmm4,%xmm4
3851	por	%xmm0,%xmm1
3852.byte	102,72,15,110,199
3853
3854	leaq	64-128(%rsi),%rsi
3855	movq	%rdx,544+0(%rsp)
3856	movq	%r14,544+8(%rsp)
3857	movq	%r15,544+16(%rsp)
3858	movq	%r8,544+24(%rsp)
3859	leaq	96(%rsp),%rdi
3860	call	__ecp_nistz256_sqr_montx
3861
3862	pcmpeqd	%xmm4,%xmm5
3863	pshufd	$0xb1,%xmm1,%xmm4
3864	por	%xmm1,%xmm4
3865	pshufd	$0,%xmm5,%xmm5
3866	pshufd	$0x1e,%xmm4,%xmm3
3867	por	%xmm3,%xmm4
3868	pxor	%xmm3,%xmm3
3869	pcmpeqd	%xmm3,%xmm4
3870	pshufd	$0,%xmm4,%xmm4
3871	movq	64+0(%rbx),%rdx
3872	movq	64+8(%rbx),%r14
3873	movq	64+16(%rbx),%r15
3874	movq	64+24(%rbx),%r8
3875.byte	102,72,15,110,203
3876
3877	leaq	64-128(%rbx),%rsi
3878	leaq	32(%rsp),%rdi
3879	call	__ecp_nistz256_sqr_montx
3880
3881	movq	544(%rsp),%rdx
3882	leaq	544(%rsp),%rbx
3883	movq	0+96(%rsp),%r9
3884	movq	8+96(%rsp),%r10
3885	leaq	-128+96(%rsp),%rsi
3886	movq	16+96(%rsp),%r11
3887	movq	24+96(%rsp),%r12
3888	leaq	224(%rsp),%rdi
3889	call	__ecp_nistz256_mul_montx
3890
3891	movq	448(%rsp),%rdx
3892	leaq	448(%rsp),%rbx
3893	movq	0+32(%rsp),%r9
3894	movq	8+32(%rsp),%r10
3895	leaq	-128+32(%rsp),%rsi
3896	movq	16+32(%rsp),%r11
3897	movq	24+32(%rsp),%r12
3898	leaq	256(%rsp),%rdi
3899	call	__ecp_nistz256_mul_montx
3900
3901	movq	416(%rsp),%rdx
3902	leaq	416(%rsp),%rbx
3903	movq	0+224(%rsp),%r9
3904	movq	8+224(%rsp),%r10
3905	leaq	-128+224(%rsp),%rsi
3906	movq	16+224(%rsp),%r11
3907	movq	24+224(%rsp),%r12
3908	leaq	224(%rsp),%rdi
3909	call	__ecp_nistz256_mul_montx
3910
3911	movq	512(%rsp),%rdx
3912	leaq	512(%rsp),%rbx
3913	movq	0+256(%rsp),%r9
3914	movq	8+256(%rsp),%r10
3915	leaq	-128+256(%rsp),%rsi
3916	movq	16+256(%rsp),%r11
3917	movq	24+256(%rsp),%r12
3918	leaq	256(%rsp),%rdi
3919	call	__ecp_nistz256_mul_montx
3920
3921	leaq	224(%rsp),%rbx
3922	leaq	64(%rsp),%rdi
3923	call	__ecp_nistz256_sub_fromx
3924
3925	orq	%r13,%r12
3926	movdqa	%xmm4,%xmm2
3927	orq	%r8,%r12
3928	orq	%r9,%r12
3929	por	%xmm5,%xmm2
3930.byte	102,73,15,110,220
3931
3932	movq	384(%rsp),%rdx
3933	leaq	384(%rsp),%rbx
3934	movq	0+96(%rsp),%r9
3935	movq	8+96(%rsp),%r10
3936	leaq	-128+96(%rsp),%rsi
3937	movq	16+96(%rsp),%r11
3938	movq	24+96(%rsp),%r12
3939	leaq	160(%rsp),%rdi
3940	call	__ecp_nistz256_mul_montx
3941
3942	movq	480(%rsp),%rdx
3943	leaq	480(%rsp),%rbx
3944	movq	0+32(%rsp),%r9
3945	movq	8+32(%rsp),%r10
3946	leaq	-128+32(%rsp),%rsi
3947	movq	16+32(%rsp),%r11
3948	movq	24+32(%rsp),%r12
3949	leaq	192(%rsp),%rdi
3950	call	__ecp_nistz256_mul_montx
3951
3952	leaq	160(%rsp),%rbx
3953	leaq	0(%rsp),%rdi
3954	call	__ecp_nistz256_sub_fromx
3955
3956	orq	%r13,%r12
3957	orq	%r8,%r12
3958	orq	%r9,%r12
3959
3960.byte	102,73,15,126,208
3961.byte	102,73,15,126,217
3962	orq	%r8,%r12
3963.byte	0x3e
3964	jnz	.Ladd_proceedx
3965
3966
3967
3968	testq	%r9,%r9
3969	jz	.Ladd_doublex
3970
3971
3972
3973
3974
3975
3976.byte	102,72,15,126,199
3977	pxor	%xmm0,%xmm0
3978	movdqu	%xmm0,0(%rdi)
3979	movdqu	%xmm0,16(%rdi)
3980	movdqu	%xmm0,32(%rdi)
3981	movdqu	%xmm0,48(%rdi)
3982	movdqu	%xmm0,64(%rdi)
3983	movdqu	%xmm0,80(%rdi)
3984	jmp	.Ladd_donex
3985
3986.align	32
3987.Ladd_doublex:
3988.byte	102,72,15,126,206
3989.byte	102,72,15,126,199
3990	addq	$416,%rsp
3991.cfi_adjust_cfa_offset	-416
3992	jmp	.Lpoint_double_shortcutx
3993.cfi_adjust_cfa_offset	416
3994
3995.align	32
3996.Ladd_proceedx:
3997	movq	0+64(%rsp),%rdx
3998	movq	8+64(%rsp),%r14
3999	leaq	-128+64(%rsp),%rsi
4000	movq	16+64(%rsp),%r15
4001	movq	24+64(%rsp),%r8
4002	leaq	96(%rsp),%rdi
4003	call	__ecp_nistz256_sqr_montx
4004
4005	movq	448(%rsp),%rdx
4006	leaq	448(%rsp),%rbx
4007	movq	0+0(%rsp),%r9
4008	movq	8+0(%rsp),%r10
4009	leaq	-128+0(%rsp),%rsi
4010	movq	16+0(%rsp),%r11
4011	movq	24+0(%rsp),%r12
4012	leaq	352(%rsp),%rdi
4013	call	__ecp_nistz256_mul_montx
4014
4015	movq	0+0(%rsp),%rdx
4016	movq	8+0(%rsp),%r14
4017	leaq	-128+0(%rsp),%rsi
4018	movq	16+0(%rsp),%r15
4019	movq	24+0(%rsp),%r8
4020	leaq	32(%rsp),%rdi
4021	call	__ecp_nistz256_sqr_montx
4022
4023	movq	544(%rsp),%rdx
4024	leaq	544(%rsp),%rbx
4025	movq	0+352(%rsp),%r9
4026	movq	8+352(%rsp),%r10
4027	leaq	-128+352(%rsp),%rsi
4028	movq	16+352(%rsp),%r11
4029	movq	24+352(%rsp),%r12
4030	leaq	352(%rsp),%rdi
4031	call	__ecp_nistz256_mul_montx
4032
4033	movq	0(%rsp),%rdx
4034	leaq	0(%rsp),%rbx
4035	movq	0+32(%rsp),%r9
4036	movq	8+32(%rsp),%r10
4037	leaq	-128+32(%rsp),%rsi
4038	movq	16+32(%rsp),%r11
4039	movq	24+32(%rsp),%r12
4040	leaq	128(%rsp),%rdi
4041	call	__ecp_nistz256_mul_montx
4042
4043	movq	160(%rsp),%rdx
4044	leaq	160(%rsp),%rbx
4045	movq	0+32(%rsp),%r9
4046	movq	8+32(%rsp),%r10
4047	leaq	-128+32(%rsp),%rsi
4048	movq	16+32(%rsp),%r11
4049	movq	24+32(%rsp),%r12
4050	leaq	192(%rsp),%rdi
4051	call	__ecp_nistz256_mul_montx
4052
4053
4054
4055
4056	xorq	%r11,%r11
4057	addq	%r12,%r12
4058	leaq	96(%rsp),%rsi
4059	adcq	%r13,%r13
4060	movq	%r12,%rax
4061	adcq	%r8,%r8
4062	adcq	%r9,%r9
4063	movq	%r13,%rbp
4064	adcq	$0,%r11
4065
4066	subq	$-1,%r12
4067	movq	%r8,%rcx
4068	sbbq	%r14,%r13
4069	sbbq	$0,%r8
4070	movq	%r9,%r10
4071	sbbq	%r15,%r9
4072	sbbq	$0,%r11
4073
4074	cmovcq	%rax,%r12
4075	movq	0(%rsi),%rax
4076	cmovcq	%rbp,%r13
4077	movq	8(%rsi),%rbp
4078	cmovcq	%rcx,%r8
4079	movq	16(%rsi),%rcx
4080	cmovcq	%r10,%r9
4081	movq	24(%rsi),%r10
4082
4083	call	__ecp_nistz256_subx
4084
4085	leaq	128(%rsp),%rbx
4086	leaq	288(%rsp),%rdi
4087	call	__ecp_nistz256_sub_fromx
4088
4089	movq	192+0(%rsp),%rax
4090	movq	192+8(%rsp),%rbp
4091	movq	192+16(%rsp),%rcx
4092	movq	192+24(%rsp),%r10
4093	leaq	320(%rsp),%rdi
4094
4095	call	__ecp_nistz256_subx
4096
4097	movq	%r12,0(%rdi)
4098	movq	%r13,8(%rdi)
4099	movq	%r8,16(%rdi)
4100	movq	%r9,24(%rdi)
4101	movq	128(%rsp),%rdx
4102	leaq	128(%rsp),%rbx
4103	movq	0+224(%rsp),%r9
4104	movq	8+224(%rsp),%r10
4105	leaq	-128+224(%rsp),%rsi
4106	movq	16+224(%rsp),%r11
4107	movq	24+224(%rsp),%r12
4108	leaq	256(%rsp),%rdi
4109	call	__ecp_nistz256_mul_montx
4110
4111	movq	320(%rsp),%rdx
4112	leaq	320(%rsp),%rbx
4113	movq	0+64(%rsp),%r9
4114	movq	8+64(%rsp),%r10
4115	leaq	-128+64(%rsp),%rsi
4116	movq	16+64(%rsp),%r11
4117	movq	24+64(%rsp),%r12
4118	leaq	320(%rsp),%rdi
4119	call	__ecp_nistz256_mul_montx
4120
4121	leaq	256(%rsp),%rbx
4122	leaq	320(%rsp),%rdi
4123	call	__ecp_nistz256_sub_fromx
4124
4125.byte	102,72,15,126,199
4126
4127	movdqa	%xmm5,%xmm0
4128	movdqa	%xmm5,%xmm1
4129	pandn	352(%rsp),%xmm0
4130	movdqa	%xmm5,%xmm2
4131	pandn	352+16(%rsp),%xmm1
4132	movdqa	%xmm5,%xmm3
4133	pand	544(%rsp),%xmm2
4134	pand	544+16(%rsp),%xmm3
4135	por	%xmm0,%xmm2
4136	por	%xmm1,%xmm3
4137
4138	movdqa	%xmm4,%xmm0
4139	movdqa	%xmm4,%xmm1
4140	pandn	%xmm2,%xmm0
4141	movdqa	%xmm4,%xmm2
4142	pandn	%xmm3,%xmm1
4143	movdqa	%xmm4,%xmm3
4144	pand	448(%rsp),%xmm2
4145	pand	448+16(%rsp),%xmm3
4146	por	%xmm0,%xmm2
4147	por	%xmm1,%xmm3
4148	movdqu	%xmm2,64(%rdi)
4149	movdqu	%xmm3,80(%rdi)
4150
4151	movdqa	%xmm5,%xmm0
4152	movdqa	%xmm5,%xmm1
4153	pandn	288(%rsp),%xmm0
4154	movdqa	%xmm5,%xmm2
4155	pandn	288+16(%rsp),%xmm1
4156	movdqa	%xmm5,%xmm3
4157	pand	480(%rsp),%xmm2
4158	pand	480+16(%rsp),%xmm3
4159	por	%xmm0,%xmm2
4160	por	%xmm1,%xmm3
4161
4162	movdqa	%xmm4,%xmm0
4163	movdqa	%xmm4,%xmm1
4164	pandn	%xmm2,%xmm0
4165	movdqa	%xmm4,%xmm2
4166	pandn	%xmm3,%xmm1
4167	movdqa	%xmm4,%xmm3
4168	pand	384(%rsp),%xmm2
4169	pand	384+16(%rsp),%xmm3
4170	por	%xmm0,%xmm2
4171	por	%xmm1,%xmm3
4172	movdqu	%xmm2,0(%rdi)
4173	movdqu	%xmm3,16(%rdi)
4174
4175	movdqa	%xmm5,%xmm0
4176	movdqa	%xmm5,%xmm1
4177	pandn	320(%rsp),%xmm0
4178	movdqa	%xmm5,%xmm2
4179	pandn	320+16(%rsp),%xmm1
4180	movdqa	%xmm5,%xmm3
4181	pand	512(%rsp),%xmm2
4182	pand	512+16(%rsp),%xmm3
4183	por	%xmm0,%xmm2
4184	por	%xmm1,%xmm3
4185
4186	movdqa	%xmm4,%xmm0
4187	movdqa	%xmm4,%xmm1
4188	pandn	%xmm2,%xmm0
4189	movdqa	%xmm4,%xmm2
4190	pandn	%xmm3,%xmm1
4191	movdqa	%xmm4,%xmm3
4192	pand	416(%rsp),%xmm2
4193	pand	416+16(%rsp),%xmm3
4194	por	%xmm0,%xmm2
4195	por	%xmm1,%xmm3
4196	movdqu	%xmm2,32(%rdi)
4197	movdqu	%xmm3,48(%rdi)
4198
4199.Ladd_donex:
4200	leaq	576+56(%rsp),%rsi
4201.cfi_def_cfa	%rsi,8
4202	movq	-48(%rsi),%r15
4203.cfi_restore	%r15
4204	movq	-40(%rsi),%r14
4205.cfi_restore	%r14
4206	movq	-32(%rsi),%r13
4207.cfi_restore	%r13
4208	movq	-24(%rsi),%r12
4209.cfi_restore	%r12
4210	movq	-16(%rsi),%rbx
4211.cfi_restore	%rbx
4212	movq	-8(%rsi),%rbp
4213.cfi_restore	%rbp
4214	leaq	(%rsi),%rsp
4215.cfi_def_cfa_register	%rsp
4216.Lpoint_addx_epilogue:
4217	ret
4218.cfi_endproc
4219.size	ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
4220.type	ecp_nistz256_point_add_affinex,@function
4221.align	32
4222ecp_nistz256_point_add_affinex:
4223.cfi_startproc
4224.Lpoint_add_affinex:
4225	pushq	%rbp
4226.cfi_adjust_cfa_offset	8
4227.cfi_offset	%rbp,-16
4228	pushq	%rbx
4229.cfi_adjust_cfa_offset	8
4230.cfi_offset	%rbx,-24
4231	pushq	%r12
4232.cfi_adjust_cfa_offset	8
4233.cfi_offset	%r12,-32
4234	pushq	%r13
4235.cfi_adjust_cfa_offset	8
4236.cfi_offset	%r13,-40
4237	pushq	%r14
4238.cfi_adjust_cfa_offset	8
4239.cfi_offset	%r14,-48
4240	pushq	%r15
4241.cfi_adjust_cfa_offset	8
4242.cfi_offset	%r15,-56
4243	subq	$480+8,%rsp
4244.cfi_adjust_cfa_offset	32*15+8
4245.Ladd_affinex_body:
4246
4247	movdqu	0(%rsi),%xmm0
4248	movq	%rdx,%rbx
4249	movdqu	16(%rsi),%xmm1
4250	movdqu	32(%rsi),%xmm2
4251	movdqu	48(%rsi),%xmm3
4252	movdqu	64(%rsi),%xmm4
4253	movdqu	80(%rsi),%xmm5
4254	movq	64+0(%rsi),%rdx
4255	movq	64+8(%rsi),%r14
4256	movq	64+16(%rsi),%r15
4257	movq	64+24(%rsi),%r8
4258	movdqa	%xmm0,320(%rsp)
4259	movdqa	%xmm1,320+16(%rsp)
4260	movdqa	%xmm2,352(%rsp)
4261	movdqa	%xmm3,352+16(%rsp)
4262	movdqa	%xmm4,384(%rsp)
4263	movdqa	%xmm5,384+16(%rsp)
4264	por	%xmm4,%xmm5
4265
4266	movdqu	0(%rbx),%xmm0
4267	pshufd	$0xb1,%xmm5,%xmm3
4268	movdqu	16(%rbx),%xmm1
4269	movdqu	32(%rbx),%xmm2
4270	por	%xmm3,%xmm5
4271	movdqu	48(%rbx),%xmm3
4272	movdqa	%xmm0,416(%rsp)
4273	pshufd	$0x1e,%xmm5,%xmm4
4274	movdqa	%xmm1,416+16(%rsp)
4275	por	%xmm0,%xmm1
4276.byte	102,72,15,110,199
4277	movdqa	%xmm2,448(%rsp)
4278	movdqa	%xmm3,448+16(%rsp)
4279	por	%xmm2,%xmm3
4280	por	%xmm4,%xmm5
4281	pxor	%xmm4,%xmm4
4282	por	%xmm1,%xmm3
4283
4284	leaq	64-128(%rsi),%rsi
4285	leaq	32(%rsp),%rdi
4286	call	__ecp_nistz256_sqr_montx
4287
4288	pcmpeqd	%xmm4,%xmm5
4289	pshufd	$0xb1,%xmm3,%xmm4
4290	movq	0(%rbx),%rdx
4291
4292	movq	%r12,%r9
4293	por	%xmm3,%xmm4
4294	pshufd	$0,%xmm5,%xmm5
4295	pshufd	$0x1e,%xmm4,%xmm3
4296	movq	%r13,%r10
4297	por	%xmm3,%xmm4
4298	pxor	%xmm3,%xmm3
4299	movq	%r14,%r11
4300	pcmpeqd	%xmm3,%xmm4
4301	pshufd	$0,%xmm4,%xmm4
4302
4303	leaq	32-128(%rsp),%rsi
4304	movq	%r15,%r12
4305	leaq	0(%rsp),%rdi
4306	call	__ecp_nistz256_mul_montx
4307
4308	leaq	320(%rsp),%rbx
4309	leaq	64(%rsp),%rdi
4310	call	__ecp_nistz256_sub_fromx
4311
4312	movq	384(%rsp),%rdx
4313	leaq	384(%rsp),%rbx
4314	movq	0+32(%rsp),%r9
4315	movq	8+32(%rsp),%r10
4316	leaq	-128+32(%rsp),%rsi
4317	movq	16+32(%rsp),%r11
4318	movq	24+32(%rsp),%r12
4319	leaq	32(%rsp),%rdi
4320	call	__ecp_nistz256_mul_montx
4321
4322	movq	384(%rsp),%rdx
4323	leaq	384(%rsp),%rbx
4324	movq	0+64(%rsp),%r9
4325	movq	8+64(%rsp),%r10
4326	leaq	-128+64(%rsp),%rsi
4327	movq	16+64(%rsp),%r11
4328	movq	24+64(%rsp),%r12
4329	leaq	288(%rsp),%rdi
4330	call	__ecp_nistz256_mul_montx
4331
4332	movq	448(%rsp),%rdx
4333	leaq	448(%rsp),%rbx
4334	movq	0+32(%rsp),%r9
4335	movq	8+32(%rsp),%r10
4336	leaq	-128+32(%rsp),%rsi
4337	movq	16+32(%rsp),%r11
4338	movq	24+32(%rsp),%r12
4339	leaq	32(%rsp),%rdi
4340	call	__ecp_nistz256_mul_montx
4341
4342	leaq	352(%rsp),%rbx
4343	leaq	96(%rsp),%rdi
4344	call	__ecp_nistz256_sub_fromx
4345
4346	movq	0+64(%rsp),%rdx
4347	movq	8+64(%rsp),%r14
4348	leaq	-128+64(%rsp),%rsi
4349	movq	16+64(%rsp),%r15
4350	movq	24+64(%rsp),%r8
4351	leaq	128(%rsp),%rdi
4352	call	__ecp_nistz256_sqr_montx
4353
4354	movq	0+96(%rsp),%rdx
4355	movq	8+96(%rsp),%r14
4356	leaq	-128+96(%rsp),%rsi
4357	movq	16+96(%rsp),%r15
4358	movq	24+96(%rsp),%r8
4359	leaq	192(%rsp),%rdi
4360	call	__ecp_nistz256_sqr_montx
4361
4362	movq	128(%rsp),%rdx
4363	leaq	128(%rsp),%rbx
4364	movq	0+64(%rsp),%r9
4365	movq	8+64(%rsp),%r10
4366	leaq	-128+64(%rsp),%rsi
4367	movq	16+64(%rsp),%r11
4368	movq	24+64(%rsp),%r12
4369	leaq	160(%rsp),%rdi
4370	call	__ecp_nistz256_mul_montx
4371
4372	movq	320(%rsp),%rdx
4373	leaq	320(%rsp),%rbx
4374	movq	0+128(%rsp),%r9
4375	movq	8+128(%rsp),%r10
4376	leaq	-128+128(%rsp),%rsi
4377	movq	16+128(%rsp),%r11
4378	movq	24+128(%rsp),%r12
4379	leaq	0(%rsp),%rdi
4380	call	__ecp_nistz256_mul_montx
4381
4382
4383
4384
4385	xorq	%r11,%r11
4386	addq	%r12,%r12
4387	leaq	192(%rsp),%rsi
4388	adcq	%r13,%r13
4389	movq	%r12,%rax
4390	adcq	%r8,%r8
4391	adcq	%r9,%r9
4392	movq	%r13,%rbp
4393	adcq	$0,%r11
4394
4395	subq	$-1,%r12
4396	movq	%r8,%rcx
4397	sbbq	%r14,%r13
4398	sbbq	$0,%r8
4399	movq	%r9,%r10
4400	sbbq	%r15,%r9
4401	sbbq	$0,%r11
4402
4403	cmovcq	%rax,%r12
4404	movq	0(%rsi),%rax
4405	cmovcq	%rbp,%r13
4406	movq	8(%rsi),%rbp
4407	cmovcq	%rcx,%r8
4408	movq	16(%rsi),%rcx
4409	cmovcq	%r10,%r9
4410	movq	24(%rsi),%r10
4411
4412	call	__ecp_nistz256_subx
4413
4414	leaq	160(%rsp),%rbx
4415	leaq	224(%rsp),%rdi
4416	call	__ecp_nistz256_sub_fromx
4417
4418	movq	0+0(%rsp),%rax
4419	movq	0+8(%rsp),%rbp
4420	movq	0+16(%rsp),%rcx
4421	movq	0+24(%rsp),%r10
4422	leaq	64(%rsp),%rdi
4423
4424	call	__ecp_nistz256_subx
4425
4426	movq	%r12,0(%rdi)
4427	movq	%r13,8(%rdi)
4428	movq	%r8,16(%rdi)
4429	movq	%r9,24(%rdi)
4430	movq	352(%rsp),%rdx
4431	leaq	352(%rsp),%rbx
4432	movq	0+160(%rsp),%r9
4433	movq	8+160(%rsp),%r10
4434	leaq	-128+160(%rsp),%rsi
4435	movq	16+160(%rsp),%r11
4436	movq	24+160(%rsp),%r12
4437	leaq	32(%rsp),%rdi
4438	call	__ecp_nistz256_mul_montx
4439
4440	movq	96(%rsp),%rdx
4441	leaq	96(%rsp),%rbx
4442	movq	0+64(%rsp),%r9
4443	movq	8+64(%rsp),%r10
4444	leaq	-128+64(%rsp),%rsi
4445	movq	16+64(%rsp),%r11
4446	movq	24+64(%rsp),%r12
4447	leaq	64(%rsp),%rdi
4448	call	__ecp_nistz256_mul_montx
4449
4450	leaq	32(%rsp),%rbx
4451	leaq	256(%rsp),%rdi
4452	call	__ecp_nistz256_sub_fromx
4453
4454.byte	102,72,15,126,199
4455
4456	movdqa	%xmm5,%xmm0
4457	movdqa	%xmm5,%xmm1
4458	pandn	288(%rsp),%xmm0
4459	movdqa	%xmm5,%xmm2
4460	pandn	288+16(%rsp),%xmm1
4461	movdqa	%xmm5,%xmm3
4462	pand	.LONE_mont(%rip),%xmm2
4463	pand	.LONE_mont+16(%rip),%xmm3
4464	por	%xmm0,%xmm2
4465	por	%xmm1,%xmm3
4466
4467	movdqa	%xmm4,%xmm0
4468	movdqa	%xmm4,%xmm1
4469	pandn	%xmm2,%xmm0
4470	movdqa	%xmm4,%xmm2
4471	pandn	%xmm3,%xmm1
4472	movdqa	%xmm4,%xmm3
4473	pand	384(%rsp),%xmm2
4474	pand	384+16(%rsp),%xmm3
4475	por	%xmm0,%xmm2
4476	por	%xmm1,%xmm3
4477	movdqu	%xmm2,64(%rdi)
4478	movdqu	%xmm3,80(%rdi)
4479
4480	movdqa	%xmm5,%xmm0
4481	movdqa	%xmm5,%xmm1
4482	pandn	224(%rsp),%xmm0
4483	movdqa	%xmm5,%xmm2
4484	pandn	224+16(%rsp),%xmm1
4485	movdqa	%xmm5,%xmm3
4486	pand	416(%rsp),%xmm2
4487	pand	416+16(%rsp),%xmm3
4488	por	%xmm0,%xmm2
4489	por	%xmm1,%xmm3
4490
4491	movdqa	%xmm4,%xmm0
4492	movdqa	%xmm4,%xmm1
4493	pandn	%xmm2,%xmm0
4494	movdqa	%xmm4,%xmm2
4495	pandn	%xmm3,%xmm1
4496	movdqa	%xmm4,%xmm3
4497	pand	320(%rsp),%xmm2
4498	pand	320+16(%rsp),%xmm3
4499	por	%xmm0,%xmm2
4500	por	%xmm1,%xmm3
4501	movdqu	%xmm2,0(%rdi)
4502	movdqu	%xmm3,16(%rdi)
4503
4504	movdqa	%xmm5,%xmm0
4505	movdqa	%xmm5,%xmm1
4506	pandn	256(%rsp),%xmm0
4507	movdqa	%xmm5,%xmm2
4508	pandn	256+16(%rsp),%xmm1
4509	movdqa	%xmm5,%xmm3
4510	pand	448(%rsp),%xmm2
4511	pand	448+16(%rsp),%xmm3
4512	por	%xmm0,%xmm2
4513	por	%xmm1,%xmm3
4514
4515	movdqa	%xmm4,%xmm0
4516	movdqa	%xmm4,%xmm1
4517	pandn	%xmm2,%xmm0
4518	movdqa	%xmm4,%xmm2
4519	pandn	%xmm3,%xmm1
4520	movdqa	%xmm4,%xmm3
4521	pand	352(%rsp),%xmm2
4522	pand	352+16(%rsp),%xmm3
4523	por	%xmm0,%xmm2
4524	por	%xmm1,%xmm3
4525	movdqu	%xmm2,32(%rdi)
4526	movdqu	%xmm3,48(%rdi)
4527
4528	leaq	480+56(%rsp),%rsi
4529.cfi_def_cfa	%rsi,8
4530	movq	-48(%rsi),%r15
4531.cfi_restore	%r15
4532	movq	-40(%rsi),%r14
4533.cfi_restore	%r14
4534	movq	-32(%rsi),%r13
4535.cfi_restore	%r13
4536	movq	-24(%rsi),%r12
4537.cfi_restore	%r12
4538	movq	-16(%rsi),%rbx
4539.cfi_restore	%rbx
4540	movq	-8(%rsi),%rbp
4541.cfi_restore	%rbp
4542	leaq	(%rsi),%rsp
4543.cfi_def_cfa_register	%rsp
4544.Ladd_affinex_epilogue:
4545	ret
4546.cfi_endproc
4547.size	ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
4548#endif
4549