xref: /aosp_15_r20/external/boringssl/src/gen/bcm/p256-armv8-asm-win.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1*8fb009dcSAndroid Build Coastguard Worker// This file is generated from a similarly-named Perl script in the BoringSSL
2*8fb009dcSAndroid Build Coastguard Worker// source tree. Do not edit by hand.
3*8fb009dcSAndroid Build Coastguard Worker
4*8fb009dcSAndroid Build Coastguard Worker#include <openssl/asm_base.h>
5*8fb009dcSAndroid Build Coastguard Worker
6*8fb009dcSAndroid Build Coastguard Worker#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32)
7*8fb009dcSAndroid Build Coastguard Worker#include "openssl/arm_arch.h"
8*8fb009dcSAndroid Build Coastguard Worker
9*8fb009dcSAndroid Build Coastguard Worker.section	.rodata
10*8fb009dcSAndroid Build Coastguard Worker.align	5
11*8fb009dcSAndroid Build Coastguard WorkerLpoly:
12*8fb009dcSAndroid Build Coastguard Worker.quad	0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
13*8fb009dcSAndroid Build Coastguard WorkerLRR:	//	2^512 mod P precomputed for NIST P256 polynomial
14*8fb009dcSAndroid Build Coastguard Worker.quad	0x0000000000000003,0xfffffffbffffffff,0xfffffffffffffffe,0x00000004fffffffd
15*8fb009dcSAndroid Build Coastguard WorkerLone_mont:
16*8fb009dcSAndroid Build Coastguard Worker.quad	0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
17*8fb009dcSAndroid Build Coastguard WorkerLone:
18*8fb009dcSAndroid Build Coastguard Worker.quad	1,0,0,0
19*8fb009dcSAndroid Build Coastguard WorkerLord:
20*8fb009dcSAndroid Build Coastguard Worker.quad	0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000
21*8fb009dcSAndroid Build Coastguard WorkerLordK:
22*8fb009dcSAndroid Build Coastguard Worker.quad	0xccd1c8aaee00bc4f
23*8fb009dcSAndroid Build Coastguard Worker.byte	69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
24*8fb009dcSAndroid Build Coastguard Worker.align	2
25*8fb009dcSAndroid Build Coastguard Worker.text
26*8fb009dcSAndroid Build Coastguard Worker
27*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4],
28*8fb009dcSAndroid Build Coastguard Worker//					     const BN_ULONG x2[4]);
29*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_mul_mont
30*8fb009dcSAndroid Build Coastguard Worker
31*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_mul_mont
32*8fb009dcSAndroid Build Coastguard Worker   .type 32
33*8fb009dcSAndroid Build Coastguard Worker.endef
34*8fb009dcSAndroid Build Coastguard Worker.align	4
35*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_mul_mont:
36*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
37*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-32]!
38*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
39*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
40*8fb009dcSAndroid Build Coastguard Worker
41*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2]		// bp[0]
42*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1]
43*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#16]
44*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
45*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
46*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
47*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
48*8fb009dcSAndroid Build Coastguard Worker
49*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont
50*8fb009dcSAndroid Build Coastguard Worker
51*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#16]
52*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#32
53*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
54*8fb009dcSAndroid Build Coastguard Worker	ret
55*8fb009dcSAndroid Build Coastguard Worker
56*8fb009dcSAndroid Build Coastguard Worker
57*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_sqr_mont(BN_ULONG x0[4],const BN_ULONG x1[4]);
58*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_sqr_mont
59*8fb009dcSAndroid Build Coastguard Worker
60*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_sqr_mont
61*8fb009dcSAndroid Build Coastguard Worker   .type 32
62*8fb009dcSAndroid Build Coastguard Worker.endef
63*8fb009dcSAndroid Build Coastguard Worker.align	4
64*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_sqr_mont:
65*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
66*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-32]!
67*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
68*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
69*8fb009dcSAndroid Build Coastguard Worker
70*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1]
71*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#16]
72*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
73*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
74*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
75*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
76*8fb009dcSAndroid Build Coastguard Worker
77*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont
78*8fb009dcSAndroid Build Coastguard Worker
79*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#16]
80*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#32
81*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
82*8fb009dcSAndroid Build Coastguard Worker	ret
83*8fb009dcSAndroid Build Coastguard Worker
84*8fb009dcSAndroid Build Coastguard Worker
85*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_div_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]);
86*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_div_by_2
87*8fb009dcSAndroid Build Coastguard Worker
88*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_div_by_2
89*8fb009dcSAndroid Build Coastguard Worker   .type 32
90*8fb009dcSAndroid Build Coastguard Worker.endef
91*8fb009dcSAndroid Build Coastguard Worker.align	4
92*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_div_by_2:
93*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
94*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
95*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
96*8fb009dcSAndroid Build Coastguard Worker
97*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1]
98*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#16]
99*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
100*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
101*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
102*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
103*8fb009dcSAndroid Build Coastguard Worker
104*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_div_by_2
105*8fb009dcSAndroid Build Coastguard Worker
106*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
107*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
108*8fb009dcSAndroid Build Coastguard Worker	ret
109*8fb009dcSAndroid Build Coastguard Worker
110*8fb009dcSAndroid Build Coastguard Worker
111*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_mul_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]);
112*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_mul_by_2
113*8fb009dcSAndroid Build Coastguard Worker
114*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_mul_by_2
115*8fb009dcSAndroid Build Coastguard Worker   .type 32
116*8fb009dcSAndroid Build Coastguard Worker.endef
117*8fb009dcSAndroid Build Coastguard Worker.align	4
118*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_mul_by_2:
119*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
120*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
121*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
122*8fb009dcSAndroid Build Coastguard Worker
123*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1]
124*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#16]
125*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
126*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
127*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
128*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
129*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
130*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
131*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
132*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
133*8fb009dcSAndroid Build Coastguard Worker
134*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// ret = a+a	// 2*a
135*8fb009dcSAndroid Build Coastguard Worker
136*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
137*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
138*8fb009dcSAndroid Build Coastguard Worker	ret
139*8fb009dcSAndroid Build Coastguard Worker
140*8fb009dcSAndroid Build Coastguard Worker
141*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_mul_by_3(BN_ULONG x0[4],const BN_ULONG x1[4]);
142*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_mul_by_3
143*8fb009dcSAndroid Build Coastguard Worker
144*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_mul_by_3
145*8fb009dcSAndroid Build Coastguard Worker   .type 32
146*8fb009dcSAndroid Build Coastguard Worker.endef
147*8fb009dcSAndroid Build Coastguard Worker.align	4
148*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_mul_by_3:
149*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
150*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
151*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
152*8fb009dcSAndroid Build Coastguard Worker
153*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1]
154*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#16]
155*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
156*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
157*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
158*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
159*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
160*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
161*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
162*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
163*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14
164*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
165*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
166*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
167*8fb009dcSAndroid Build Coastguard Worker
168*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// ret = a+a	// 2*a
169*8fb009dcSAndroid Build Coastguard Worker
170*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x4
171*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x5
172*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x6
173*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x7
174*8fb009dcSAndroid Build Coastguard Worker
175*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// ret += a	// 2*a+a=3*a
176*8fb009dcSAndroid Build Coastguard Worker
177*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
178*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
179*8fb009dcSAndroid Build Coastguard Worker	ret
180*8fb009dcSAndroid Build Coastguard Worker
181*8fb009dcSAndroid Build Coastguard Worker
182*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_sub(BN_ULONG x0[4],const BN_ULONG x1[4],
183*8fb009dcSAndroid Build Coastguard Worker//				        const BN_ULONG x2[4]);
184*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_sub
185*8fb009dcSAndroid Build Coastguard Worker
186*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_sub
187*8fb009dcSAndroid Build Coastguard Worker   .type 32
188*8fb009dcSAndroid Build Coastguard Worker.endef
189*8fb009dcSAndroid Build Coastguard Worker.align	4
190*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_sub:
191*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
192*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
193*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
194*8fb009dcSAndroid Build Coastguard Worker
195*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1]
196*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#16]
197*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
198*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
199*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
200*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
201*8fb009dcSAndroid Build Coastguard Worker
202*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from
203*8fb009dcSAndroid Build Coastguard Worker
204*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
205*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
206*8fb009dcSAndroid Build Coastguard Worker	ret
207*8fb009dcSAndroid Build Coastguard Worker
208*8fb009dcSAndroid Build Coastguard Worker
209*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_neg(BN_ULONG x0[4],const BN_ULONG x1[4]);
210*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_neg
211*8fb009dcSAndroid Build Coastguard Worker
212*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_neg
213*8fb009dcSAndroid Build Coastguard Worker   .type 32
214*8fb009dcSAndroid Build Coastguard Worker.endef
215*8fb009dcSAndroid Build Coastguard Worker.align	4
216*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_neg:
217*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
218*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
219*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
220*8fb009dcSAndroid Build Coastguard Worker
221*8fb009dcSAndroid Build Coastguard Worker	mov	x2,x1
222*8fb009dcSAndroid Build Coastguard Worker	mov	x14,xzr		// a = 0
223*8fb009dcSAndroid Build Coastguard Worker	mov	x15,xzr
224*8fb009dcSAndroid Build Coastguard Worker	mov	x16,xzr
225*8fb009dcSAndroid Build Coastguard Worker	mov	x17,xzr
226*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
227*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
228*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
229*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
230*8fb009dcSAndroid Build Coastguard Worker
231*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from
232*8fb009dcSAndroid Build Coastguard Worker
233*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
234*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
235*8fb009dcSAndroid Build Coastguard Worker	ret
236*8fb009dcSAndroid Build Coastguard Worker
237*8fb009dcSAndroid Build Coastguard Worker
238*8fb009dcSAndroid Build Coastguard Worker// note that __ecp_nistz256_mul_mont expects a[0-3] input pre-loaded
239*8fb009dcSAndroid Build Coastguard Worker// to x4-x7 and b[0] - to x3
240*8fb009dcSAndroid Build Coastguard Worker.def __ecp_nistz256_mul_mont
241*8fb009dcSAndroid Build Coastguard Worker   .type 32
242*8fb009dcSAndroid Build Coastguard Worker.endef
243*8fb009dcSAndroid Build Coastguard Worker.align	4
244*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_mul_mont:
245*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x4,x3		// a[0]*b[0]
246*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
247*8fb009dcSAndroid Build Coastguard Worker
248*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x5,x3		// a[1]*b[0]
249*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
250*8fb009dcSAndroid Build Coastguard Worker
251*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x6,x3		// a[2]*b[0]
252*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
253*8fb009dcSAndroid Build Coastguard Worker
254*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x7,x3		// a[3]*b[0]
255*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3
256*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8]		// b[1]
257*8fb009dcSAndroid Build Coastguard Worker
258*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
259*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
260*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
261*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
262*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
263*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,x11
264*8fb009dcSAndroid Build Coastguard Worker	mov	x20,xzr
265*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
266*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
267*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
268*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3		// lo(a[0]*b[i])
269*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
270*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3		// lo(a[1]*b[i])
271*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
272*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3		// lo(a[2]*b[i])
273*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x11
274*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3		// lo(a[3]*b[i])
275*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
276*8fb009dcSAndroid Build Coastguard Worker
277*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts of multiplication
278*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3		// hi(a[0]*b[i])
279*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
280*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3		// hi(a[1]*b[i])
281*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
282*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3		// hi(a[2]*b[i])
283*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
284*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3		// hi(a[3]*b[i])
285*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
286*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*(1+1)]	// b[1+1]
287*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
288*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
289*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
290*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
291*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
292*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
293*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
294*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
295*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
296*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
297*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3		// lo(a[0]*b[i])
298*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
299*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3		// lo(a[1]*b[i])
300*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
301*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3		// lo(a[2]*b[i])
302*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x11
303*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3		// lo(a[3]*b[i])
304*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
305*8fb009dcSAndroid Build Coastguard Worker
306*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts of multiplication
307*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3		// hi(a[0]*b[i])
308*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
309*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3		// hi(a[1]*b[i])
310*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
311*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3		// hi(a[2]*b[i])
312*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
313*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3		// hi(a[3]*b[i])
314*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
315*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*(2+1)]	// b[2+1]
316*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
317*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
318*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
319*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
320*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
321*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
322*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
323*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
324*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
325*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
326*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3		// lo(a[0]*b[i])
327*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
328*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3		// lo(a[1]*b[i])
329*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
330*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3		// lo(a[2]*b[i])
331*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x11
332*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3		// lo(a[3]*b[i])
333*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
334*8fb009dcSAndroid Build Coastguard Worker
335*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts of multiplication
336*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3		// hi(a[0]*b[i])
337*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
338*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3		// hi(a[1]*b[i])
339*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
340*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3		// hi(a[2]*b[i])
341*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
342*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3		// hi(a[3]*b[i])
343*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
344*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
345*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
346*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
347*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
348*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
349*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
350*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
351*8fb009dcSAndroid Build Coastguard Worker	// last reduction
352*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
353*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
354*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
355*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
356*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
357*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x11
358*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
359*8fb009dcSAndroid Build Coastguard Worker
360*8fb009dcSAndroid Build Coastguard Worker	adds	x8,x14,#1		// subs	x8,x14,#-1 // tmp = ret-modulus
361*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x12
362*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,xzr
363*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x13
364*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x19,xzr		// did it borrow?
365*8fb009dcSAndroid Build Coastguard Worker
366*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,lo	// ret = borrow ? ret : ret-modulus
367*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,lo
368*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,lo
369*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
370*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,lo
371*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
372*8fb009dcSAndroid Build Coastguard Worker
373*8fb009dcSAndroid Build Coastguard Worker	ret
374*8fb009dcSAndroid Build Coastguard Worker
375*8fb009dcSAndroid Build Coastguard Worker
376*8fb009dcSAndroid Build Coastguard Worker// note that __ecp_nistz256_sqr_mont expects a[0-3] input pre-loaded
377*8fb009dcSAndroid Build Coastguard Worker// to x4-x7
378*8fb009dcSAndroid Build Coastguard Worker.def __ecp_nistz256_sqr_mont
379*8fb009dcSAndroid Build Coastguard Worker   .type 32
380*8fb009dcSAndroid Build Coastguard Worker.endef
381*8fb009dcSAndroid Build Coastguard Worker.align	4
382*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_sqr_mont:
383*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |  |  |a1*a0|  |
384*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |  |a2*a0|  |  |
385*8fb009dcSAndroid Build Coastguard Worker	//  |  |a3*a2|a3*a0|  |  |  |
386*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |a2*a1|  |  |  |
387*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |a3*a1|  |  |  |  |
388*8fb009dcSAndroid Build Coastguard Worker	// *|  |  |  |  |  |  |  | 2|
389*8fb009dcSAndroid Build Coastguard Worker	// +|a3*a3|a2*a2|a1*a1|a0*a0|
390*8fb009dcSAndroid Build Coastguard Worker	//  |--+--+--+--+--+--+--+--|
391*8fb009dcSAndroid Build Coastguard Worker	//  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow
392*8fb009dcSAndroid Build Coastguard Worker	//
393*8fb009dcSAndroid Build Coastguard Worker	//  "can't overflow" below mark carrying into high part of
394*8fb009dcSAndroid Build Coastguard Worker	//  multiplication result, which can't overflow, because it
395*8fb009dcSAndroid Build Coastguard Worker	//  can never be all ones.
396*8fb009dcSAndroid Build Coastguard Worker
397*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x5,x4		// a[1]*a[0]
398*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x4
399*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x6,x4		// a[2]*a[0]
400*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x4
401*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x7,x4		// a[3]*a[0]
402*8fb009dcSAndroid Build Coastguard Worker	umulh	x19,x7,x4
403*8fb009dcSAndroid Build Coastguard Worker
404*8fb009dcSAndroid Build Coastguard Worker	adds	x16,x16,x9		// accumulate high parts of multiplication
405*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x6,x5		// a[2]*a[1]
406*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x6,x5
407*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
408*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x7,x5		// a[3]*a[1]
409*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x5
410*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr		// can't overflow
411*8fb009dcSAndroid Build Coastguard Worker
412*8fb009dcSAndroid Build Coastguard Worker	mul	x20,x7,x6		// a[3]*a[2]
413*8fb009dcSAndroid Build Coastguard Worker	umulh	x1,x7,x6
414*8fb009dcSAndroid Build Coastguard Worker
415*8fb009dcSAndroid Build Coastguard Worker	adds	x9,x9,x10		// accumulate high parts of multiplication
416*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x4,x4		// a[0]*a[0]
417*8fb009dcSAndroid Build Coastguard Worker	adc	x10,x11,xzr		// can't overflow
418*8fb009dcSAndroid Build Coastguard Worker
419*8fb009dcSAndroid Build Coastguard Worker	adds	x17,x17,x8		// accumulate low parts of multiplication
420*8fb009dcSAndroid Build Coastguard Worker	umulh	x4,x4,x4
421*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x9
422*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x5		// a[1]*a[1]
423*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x10
424*8fb009dcSAndroid Build Coastguard Worker	umulh	x5,x5,x5
425*8fb009dcSAndroid Build Coastguard Worker	adc	x1,x1,xzr		// can't overflow
426*8fb009dcSAndroid Build Coastguard Worker
427*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x15	// acc[1-6]*=2
428*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x6		// a[2]*a[2]
429*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x16
430*8fb009dcSAndroid Build Coastguard Worker	umulh	x6,x6,x6
431*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x17
432*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x7		// a[3]*a[3]
433*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x19
434*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x7
435*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x20
436*8fb009dcSAndroid Build Coastguard Worker	adcs	x1,x1,x1
437*8fb009dcSAndroid Build Coastguard Worker	adc	x2,xzr,xzr
438*8fb009dcSAndroid Build Coastguard Worker
439*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x4		// +a[i]*a[i]
440*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
441*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x5
442*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x10
443*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x6
444*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
445*8fb009dcSAndroid Build Coastguard Worker	adcs	x1,x1,x11
446*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
447*8fb009dcSAndroid Build Coastguard Worker	adc	x2,x2,x7
448*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
449*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
450*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
451*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
452*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
453*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
454*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
455*8fb009dcSAndroid Build Coastguard Worker	adc	x17,x11,xzr		// can't overflow
456*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
457*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
458*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
459*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
460*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
461*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
462*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
463*8fb009dcSAndroid Build Coastguard Worker	adc	x17,x11,xzr		// can't overflow
464*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
465*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
466*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
467*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
468*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
469*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
470*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
471*8fb009dcSAndroid Build Coastguard Worker	adc	x17,x11,xzr		// can't overflow
472*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
473*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
474*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
475*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
476*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
477*8fb009dcSAndroid Build Coastguard Worker	adc	x17,x11,xzr		// can't overflow
478*8fb009dcSAndroid Build Coastguard Worker
479*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x19	// accumulate upper half
480*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x20
481*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x1
482*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x2
483*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr
484*8fb009dcSAndroid Build Coastguard Worker
485*8fb009dcSAndroid Build Coastguard Worker	adds	x8,x14,#1		// subs	x8,x14,#-1 // tmp = ret-modulus
486*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x12
487*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,xzr
488*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x13
489*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x19,xzr		// did it borrow?
490*8fb009dcSAndroid Build Coastguard Worker
491*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,lo	// ret = borrow ? ret : ret-modulus
492*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,lo
493*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,lo
494*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
495*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,lo
496*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
497*8fb009dcSAndroid Build Coastguard Worker
498*8fb009dcSAndroid Build Coastguard Worker	ret
499*8fb009dcSAndroid Build Coastguard Worker
500*8fb009dcSAndroid Build Coastguard Worker
501*8fb009dcSAndroid Build Coastguard Worker// Note that __ecp_nistz256_add_to expects both input vectors pre-loaded to
502*8fb009dcSAndroid Build Coastguard Worker// x4-x7 and x8-x11. This is done because it's used in multiple
503*8fb009dcSAndroid Build Coastguard Worker// contexts, e.g. in multiplication by 2 and 3...
504*8fb009dcSAndroid Build Coastguard Worker.def __ecp_nistz256_add_to
505*8fb009dcSAndroid Build Coastguard Worker   .type 32
506*8fb009dcSAndroid Build Coastguard Worker.endef
507*8fb009dcSAndroid Build Coastguard Worker.align	4
508*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_add_to:
509*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// ret = a+b
510*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
511*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
512*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
513*8fb009dcSAndroid Build Coastguard Worker	adc	x1,xzr,xzr		// zap x1
514*8fb009dcSAndroid Build Coastguard Worker
515*8fb009dcSAndroid Build Coastguard Worker	adds	x8,x14,#1		// subs	x8,x4,#-1 // tmp = ret-modulus
516*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x12
517*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,xzr
518*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x13
519*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x1,xzr		// did subtraction borrow?
520*8fb009dcSAndroid Build Coastguard Worker
521*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,lo	// ret = borrow ? ret : ret-modulus
522*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,lo
523*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,lo
524*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
525*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,lo
526*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
527*8fb009dcSAndroid Build Coastguard Worker
528*8fb009dcSAndroid Build Coastguard Worker	ret
529*8fb009dcSAndroid Build Coastguard Worker
530*8fb009dcSAndroid Build Coastguard Worker
531*8fb009dcSAndroid Build Coastguard Worker.def __ecp_nistz256_sub_from
532*8fb009dcSAndroid Build Coastguard Worker   .type 32
533*8fb009dcSAndroid Build Coastguard Worker.endef
534*8fb009dcSAndroid Build Coastguard Worker.align	4
535*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_sub_from:
536*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2]
537*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#16]
538*8fb009dcSAndroid Build Coastguard Worker	subs	x14,x14,x8		// ret = a-b
539*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x15,x9
540*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x10
541*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x11
542*8fb009dcSAndroid Build Coastguard Worker	sbc	x1,xzr,xzr		// zap x1
543*8fb009dcSAndroid Build Coastguard Worker
544*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,#1		// adds	x8,x4,#-1 // tmp = ret+modulus
545*8fb009dcSAndroid Build Coastguard Worker	adcs	x9,x15,x12
546*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x16,xzr
547*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x17,x13
548*8fb009dcSAndroid Build Coastguard Worker	cmp	x1,xzr			// did subtraction borrow?
549*8fb009dcSAndroid Build Coastguard Worker
550*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,eq	// ret = borrow ? ret+modulus : ret
551*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,eq
552*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,eq
553*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
554*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,eq
555*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
556*8fb009dcSAndroid Build Coastguard Worker
557*8fb009dcSAndroid Build Coastguard Worker	ret
558*8fb009dcSAndroid Build Coastguard Worker
559*8fb009dcSAndroid Build Coastguard Worker
560*8fb009dcSAndroid Build Coastguard Worker.def __ecp_nistz256_sub_morf
561*8fb009dcSAndroid Build Coastguard Worker   .type 32
562*8fb009dcSAndroid Build Coastguard Worker.endef
563*8fb009dcSAndroid Build Coastguard Worker.align	4
564*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_sub_morf:
565*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2]
566*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#16]
567*8fb009dcSAndroid Build Coastguard Worker	subs	x14,x8,x14		// ret = b-a
568*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x9,x15
569*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x10,x16
570*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x11,x17
571*8fb009dcSAndroid Build Coastguard Worker	sbc	x1,xzr,xzr		// zap x1
572*8fb009dcSAndroid Build Coastguard Worker
573*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,#1		// adds	x8,x4,#-1 // tmp = ret+modulus
574*8fb009dcSAndroid Build Coastguard Worker	adcs	x9,x15,x12
575*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x16,xzr
576*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x17,x13
577*8fb009dcSAndroid Build Coastguard Worker	cmp	x1,xzr			// did subtraction borrow?
578*8fb009dcSAndroid Build Coastguard Worker
579*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,eq	// ret = borrow ? ret+modulus : ret
580*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,eq
581*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,eq
582*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
583*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,eq
584*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
585*8fb009dcSAndroid Build Coastguard Worker
586*8fb009dcSAndroid Build Coastguard Worker	ret
587*8fb009dcSAndroid Build Coastguard Worker
588*8fb009dcSAndroid Build Coastguard Worker
589*8fb009dcSAndroid Build Coastguard Worker.def __ecp_nistz256_div_by_2
590*8fb009dcSAndroid Build Coastguard Worker   .type 32
591*8fb009dcSAndroid Build Coastguard Worker.endef
592*8fb009dcSAndroid Build Coastguard Worker.align	4
593*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_div_by_2:
594*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,#1		// adds	x8,x4,#-1 // tmp = a+modulus
595*8fb009dcSAndroid Build Coastguard Worker	adcs	x9,x15,x12
596*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x16,xzr
597*8fb009dcSAndroid Build Coastguard Worker	adcs	x11,x17,x13
598*8fb009dcSAndroid Build Coastguard Worker	adc	x1,xzr,xzr		// zap x1
599*8fb009dcSAndroid Build Coastguard Worker	tst	x14,#1		// is a even?
600*8fb009dcSAndroid Build Coastguard Worker
601*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,eq	// ret = even ? a : a+modulus
602*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,eq
603*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,eq
604*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,eq
605*8fb009dcSAndroid Build Coastguard Worker	csel	x1,xzr,x1,eq
606*8fb009dcSAndroid Build Coastguard Worker
607*8fb009dcSAndroid Build Coastguard Worker	lsr	x14,x14,#1		// ret >>= 1
608*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x15,lsl#63
609*8fb009dcSAndroid Build Coastguard Worker	lsr	x15,x15,#1
610*8fb009dcSAndroid Build Coastguard Worker	orr	x15,x15,x16,lsl#63
611*8fb009dcSAndroid Build Coastguard Worker	lsr	x16,x16,#1
612*8fb009dcSAndroid Build Coastguard Worker	orr	x16,x16,x17,lsl#63
613*8fb009dcSAndroid Build Coastguard Worker	lsr	x17,x17,#1
614*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
615*8fb009dcSAndroid Build Coastguard Worker	orr	x17,x17,x1,lsl#63
616*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
617*8fb009dcSAndroid Build Coastguard Worker
618*8fb009dcSAndroid Build Coastguard Worker	ret
619*8fb009dcSAndroid Build Coastguard Worker
620*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_point_double
621*8fb009dcSAndroid Build Coastguard Worker
622*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_point_double
623*8fb009dcSAndroid Build Coastguard Worker   .type 32
624*8fb009dcSAndroid Build Coastguard Worker.endef
625*8fb009dcSAndroid Build Coastguard Worker.align	5
626*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_point_double:
627*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
628*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-96]!
629*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
630*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
631*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
632*8fb009dcSAndroid Build Coastguard Worker	sub	sp,sp,#32*4
633*8fb009dcSAndroid Build Coastguard Worker
634*8fb009dcSAndroid Build Coastguard WorkerLdouble_shortcut:
635*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1,#32]
636*8fb009dcSAndroid Build Coastguard Worker	mov	x21,x0
637*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#48]
638*8fb009dcSAndroid Build Coastguard Worker	mov	x22,x1
639*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
640*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
641*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
642*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
643*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
644*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
645*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x22,#64]	// forward load for p256_sqr_mont
646*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
647*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
648*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x22,#64+16]
649*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
650*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(S, in_y);
651*8fb009dcSAndroid Build Coastguard Worker
652*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
653*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Zsqr, in_z);
654*8fb009dcSAndroid Build Coastguard Worker
655*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x22]
656*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x22,#16]
657*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14		// put Zsqr aside for p256_sub
658*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
659*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
660*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
661*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
662*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_add(M, Zsqr, in_x);
663*8fb009dcSAndroid Build Coastguard Worker
664*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#0
665*8fb009dcSAndroid Build Coastguard Worker	mov	x14,x4		// restore Zsqr
666*8fb009dcSAndroid Build Coastguard Worker	mov	x15,x5
667*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]	// forward load for p256_sqr_mont
668*8fb009dcSAndroid Build Coastguard Worker	mov	x16,x6
669*8fb009dcSAndroid Build Coastguard Worker	mov	x17,x7
670*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
671*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
672*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(Zsqr, in_x, Zsqr);
673*8fb009dcSAndroid Build Coastguard Worker
674*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
675*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(S, S);
676*8fb009dcSAndroid Build Coastguard Worker
677*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#32]
678*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x22,#64]
679*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x22,#64+16]
680*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#32
681*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
682*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(tmp0, in_z, in_y);
683*8fb009dcSAndroid Build Coastguard Worker
684*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
685*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
686*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]	// forward load for p256_sqr_mont
687*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
688*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
689*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
690*8fb009dcSAndroid Build Coastguard Worker	add	x0,x21,#64
691*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(res_z, tmp0);
692*8fb009dcSAndroid Build Coastguard Worker
693*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
694*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(tmp0, S);
695*8fb009dcSAndroid Build Coastguard Worker
696*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#64]		// forward load for p256_mul_mont
697*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#32]
698*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#32+16]
699*8fb009dcSAndroid Build Coastguard Worker	add	x0,x21,#32
700*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_div_by_2	// p256_div_by_2(res_y, tmp0);
701*8fb009dcSAndroid Build Coastguard Worker
702*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#64
703*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
704*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(M, M, Zsqr);
705*8fb009dcSAndroid Build Coastguard Worker
706*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14		// duplicate M
707*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
708*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
709*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
710*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14		// put M aside
711*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
712*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
713*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
714*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
715*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to
716*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x4			// restore M
717*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x5
718*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22]		// forward load for p256_mul_mont
719*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x6
720*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]
721*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x7
722*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
723*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_3(M, M);
724*8fb009dcSAndroid Build Coastguard Worker
725*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#0
726*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
727*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S, S, in_x);
728*8fb009dcSAndroid Build Coastguard Worker
729*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
730*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
731*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#32]	// forward load for p256_sqr_mont
732*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
733*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
734*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#32+16]
735*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
736*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(tmp0, S);
737*8fb009dcSAndroid Build Coastguard Worker
738*8fb009dcSAndroid Build Coastguard Worker	add	x0,x21,#0
739*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(res_x, M);
740*8fb009dcSAndroid Build Coastguard Worker
741*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#96
742*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(res_x, res_x, tmp0);
743*8fb009dcSAndroid Build Coastguard Worker
744*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#0
745*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
746*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(S, S, res_x);
747*8fb009dcSAndroid Build Coastguard Worker
748*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#32]
749*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14		// copy S
750*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
751*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
752*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
753*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#32
754*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S, S, M);
755*8fb009dcSAndroid Build Coastguard Worker
756*8fb009dcSAndroid Build Coastguard Worker	add	x2,x21,#32
757*8fb009dcSAndroid Build Coastguard Worker	add	x0,x21,#32
758*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(res_y, S, res_y);
759*8fb009dcSAndroid Build Coastguard Worker
760*8fb009dcSAndroid Build Coastguard Worker	add	sp,x29,#0		// destroy frame
761*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
762*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
763*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#96
764*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
765*8fb009dcSAndroid Build Coastguard Worker	ret
766*8fb009dcSAndroid Build Coastguard Worker
767*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_point_add
768*8fb009dcSAndroid Build Coastguard Worker
769*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_point_add
770*8fb009dcSAndroid Build Coastguard Worker   .type 32
771*8fb009dcSAndroid Build Coastguard Worker.endef
772*8fb009dcSAndroid Build Coastguard Worker.align	5
773*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_point_add:
774*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
775*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-96]!
776*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
777*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
778*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
779*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
780*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[sp,#64]
781*8fb009dcSAndroid Build Coastguard Worker	stp	x27,x28,[sp,#80]
782*8fb009dcSAndroid Build Coastguard Worker	sub	sp,sp,#32*12
783*8fb009dcSAndroid Build Coastguard Worker
784*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x2,#64]	// in2_z
785*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x2,#64+16]
786*8fb009dcSAndroid Build Coastguard Worker	mov	x21,x0
787*8fb009dcSAndroid Build Coastguard Worker	mov	x22,x1
788*8fb009dcSAndroid Build Coastguard Worker	mov	x23,x2
789*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
790*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
791*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
792*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
793*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x4,x5
794*8fb009dcSAndroid Build Coastguard Worker	orr	x10,x6,x7
795*8fb009dcSAndroid Build Coastguard Worker	orr	x25,x8,x10
796*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0
797*8fb009dcSAndroid Build Coastguard Worker	csetm	x25,ne		// ~in2infty
798*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#192
799*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z2sqr, in2_z);
800*8fb009dcSAndroid Build Coastguard Worker
801*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x22,#64]	// in1_z
802*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x22,#64+16]
803*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x4,x5
804*8fb009dcSAndroid Build Coastguard Worker	orr	x10,x6,x7
805*8fb009dcSAndroid Build Coastguard Worker	orr	x24,x8,x10
806*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0
807*8fb009dcSAndroid Build Coastguard Worker	csetm	x24,ne		// ~in1infty
808*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
809*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z1sqr, in1_z);
810*8fb009dcSAndroid Build Coastguard Worker
811*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23,#64]
812*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#192]
813*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#192+16]
814*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#64
815*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#320
816*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S1, Z2sqr, in2_z);
817*8fb009dcSAndroid Build Coastguard Worker
818*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#64]
819*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#128]
820*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#128+16]
821*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#64
822*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#352
823*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, Z1sqr, in1_z);
824*8fb009dcSAndroid Build Coastguard Worker
825*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#32]
826*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#320]
827*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#320+16]
828*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#32
829*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#320
830*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S1, S1, in1_y);
831*8fb009dcSAndroid Build Coastguard Worker
832*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23,#32]
833*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#352]
834*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#352+16]
835*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#32
836*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#352
837*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, S2, in2_y);
838*8fb009dcSAndroid Build Coastguard Worker
839*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#320
840*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#192]	// forward load for p256_mul_mont
841*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x22]
842*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x22,#16]
843*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#160
844*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(R, S2, S1);
845*8fb009dcSAndroid Build Coastguard Worker
846*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x15	// see if result is zero
847*8fb009dcSAndroid Build Coastguard Worker	orr	x16,x16,x17
848*8fb009dcSAndroid Build Coastguard Worker	orr	x26,x14,x16	// ~is_equal(S1,S2)
849*8fb009dcSAndroid Build Coastguard Worker
850*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#192
851*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#256
852*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U1, in1_x, Z2sqr);
853*8fb009dcSAndroid Build Coastguard Worker
854*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#128]
855*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x23]
856*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x23,#16]
857*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#128
858*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#288
859*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U2, in2_x, Z1sqr);
860*8fb009dcSAndroid Build Coastguard Worker
861*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#256
862*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#160]	// forward load for p256_sqr_mont
863*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#160+16]
864*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
865*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(H, U2, U1);
866*8fb009dcSAndroid Build Coastguard Worker
867*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x15	// see if result is zero
868*8fb009dcSAndroid Build Coastguard Worker	orr	x16,x16,x17
869*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x16	// ~is_equal(U1,U2)
870*8fb009dcSAndroid Build Coastguard Worker
871*8fb009dcSAndroid Build Coastguard Worker	mvn	x27,x24	// -1/0 -> 0/-1
872*8fb009dcSAndroid Build Coastguard Worker	mvn	x28,x25	// -1/0 -> 0/-1
873*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x27
874*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x28
875*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x26
876*8fb009dcSAndroid Build Coastguard Worker	cbnz	x14,Ladd_proceed	// if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
877*8fb009dcSAndroid Build Coastguard Worker
878*8fb009dcSAndroid Build Coastguard WorkerLadd_double:
879*8fb009dcSAndroid Build Coastguard Worker	mov	x1,x22
880*8fb009dcSAndroid Build Coastguard Worker	mov	x0,x21
881*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
882*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
883*8fb009dcSAndroid Build Coastguard Worker	ldp	x27,x28,[x29,#80]
884*8fb009dcSAndroid Build Coastguard Worker	add	sp,sp,#256	// #256 is from #32*(12-4). difference in stack frames
885*8fb009dcSAndroid Build Coastguard Worker	b	Ldouble_shortcut
886*8fb009dcSAndroid Build Coastguard Worker
887*8fb009dcSAndroid Build Coastguard Worker.align	4
888*8fb009dcSAndroid Build Coastguard WorkerLadd_proceed:
889*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#192
890*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Rsqr, R);
891*8fb009dcSAndroid Build Coastguard Worker
892*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#64]
893*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#96]
894*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#96+16]
895*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#64
896*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
897*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_z, H, in1_z);
898*8fb009dcSAndroid Build Coastguard Worker
899*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#96]
900*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#96+16]
901*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
902*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Hsqr, H);
903*8fb009dcSAndroid Build Coastguard Worker
904*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23,#64]
905*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#64]
906*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#64+16]
907*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#64
908*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
909*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_z, res_z, in2_z);
910*8fb009dcSAndroid Build Coastguard Worker
911*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#96]
912*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#128]
913*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#128+16]
914*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#96
915*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#224
916*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(Hcub, Hsqr, H);
917*8fb009dcSAndroid Build Coastguard Worker
918*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#128]
919*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#256]
920*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#256+16]
921*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#128
922*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#288
923*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U2, U1, Hsqr);
924*8fb009dcSAndroid Build Coastguard Worker
925*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
926*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
927*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
928*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
929*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
930*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(Hsqr, U2);
931*8fb009dcSAndroid Build Coastguard Worker
932*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#192
933*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
934*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(res_x, Rsqr, Hsqr);
935*8fb009dcSAndroid Build Coastguard Worker
936*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#224
937*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	//  p256_sub(res_x, res_x, Hcub);
938*8fb009dcSAndroid Build Coastguard Worker
939*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#288
940*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#224]		// forward load for p256_mul_mont
941*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#320]
942*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#320+16]
943*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
944*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(res_y, U2, res_x);
945*8fb009dcSAndroid Build Coastguard Worker
946*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#224
947*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#352
948*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, S1, Hcub);
949*8fb009dcSAndroid Build Coastguard Worker
950*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#160]
951*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#32]
952*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#32+16]
953*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#160
954*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
955*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_y, res_y, R);
956*8fb009dcSAndroid Build Coastguard Worker
957*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#352
958*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(res_y, res_y, S2);
959*8fb009dcSAndroid Build Coastguard Worker
960*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]		// res
961*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
962*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23]		// in2
963*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#16]
964*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#0]	// in1
965*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
966*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#0+16]
967*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
968*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
969*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0+0+32]	// res
970*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
971*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
972*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
973*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+0+48]
974*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
975*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
976*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23,#0+32]	// in2
977*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
978*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
979*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#0+48]
980*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#0]
981*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#0+16]
982*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#32]	// in1
983*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
984*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#32+16]
985*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
986*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
987*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0+32+32]	// res
988*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
989*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
990*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
991*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+32+48]
992*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
993*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
994*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23,#32+32]	// in2
995*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
996*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
997*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#32+48]
998*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#32]
999*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#32+16]
1000*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#64]	// in1
1001*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
1002*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#64+16]
1003*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
1004*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
1005*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
1006*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
1007*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
1008*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
1009*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
1010*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
1011*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
1012*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#64]
1013*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#64+16]
1014*8fb009dcSAndroid Build Coastguard Worker
1015*8fb009dcSAndroid Build Coastguard WorkerLadd_done:
1016*8fb009dcSAndroid Build Coastguard Worker	add	sp,x29,#0		// destroy frame
1017*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
1018*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
1019*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
1020*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
1021*8fb009dcSAndroid Build Coastguard Worker	ldp	x27,x28,[x29,#80]
1022*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#96
1023*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
1024*8fb009dcSAndroid Build Coastguard Worker	ret
1025*8fb009dcSAndroid Build Coastguard Worker
1026*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_point_add_affine
1027*8fb009dcSAndroid Build Coastguard Worker
1028*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_point_add_affine
1029*8fb009dcSAndroid Build Coastguard Worker   .type 32
1030*8fb009dcSAndroid Build Coastguard Worker.endef
1031*8fb009dcSAndroid Build Coastguard Worker.align	5
1032*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_point_add_affine:
1033*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
1034*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-80]!
1035*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
1036*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
1037*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
1038*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
1039*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[sp,#64]
1040*8fb009dcSAndroid Build Coastguard Worker	sub	sp,sp,#32*10
1041*8fb009dcSAndroid Build Coastguard Worker
1042*8fb009dcSAndroid Build Coastguard Worker	mov	x21,x0
1043*8fb009dcSAndroid Build Coastguard Worker	mov	x22,x1
1044*8fb009dcSAndroid Build Coastguard Worker	mov	x23,x2
1045*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,Lpoly
1046*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:Lpoly
1047*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
1048*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
1049*8fb009dcSAndroid Build Coastguard Worker
1050*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1,#64]	// in1_z
1051*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#64+16]
1052*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x4,x5
1053*8fb009dcSAndroid Build Coastguard Worker	orr	x10,x6,x7
1054*8fb009dcSAndroid Build Coastguard Worker	orr	x24,x8,x10
1055*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0
1056*8fb009dcSAndroid Build Coastguard Worker	csetm	x24,ne		// ~in1infty
1057*8fb009dcSAndroid Build Coastguard Worker
1058*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x2]	// in2_x
1059*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x2,#16]
1060*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2,#32]	// in2_y
1061*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#48]
1062*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x15
1063*8fb009dcSAndroid Build Coastguard Worker	orr	x16,x16,x17
1064*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x8,x9
1065*8fb009dcSAndroid Build Coastguard Worker	orr	x10,x10,x11
1066*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x16
1067*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x8,x10
1068*8fb009dcSAndroid Build Coastguard Worker	orr	x25,x14,x8
1069*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0
1070*8fb009dcSAndroid Build Coastguard Worker	csetm	x25,ne		// ~in2infty
1071*8fb009dcSAndroid Build Coastguard Worker
1072*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
1073*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z1sqr, in1_z);
1074*8fb009dcSAndroid Build Coastguard Worker
1075*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14
1076*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
1077*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
1078*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
1079*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23]
1080*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#0
1081*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
1082*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U2, Z1sqr, in2_x);
1083*8fb009dcSAndroid Build Coastguard Worker
1084*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#0
1085*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#64]	// forward load for p256_mul_mont
1086*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#128]
1087*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#128+16]
1088*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#160
1089*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(H, U2, in1_x);
1090*8fb009dcSAndroid Build Coastguard Worker
1091*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#64
1092*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
1093*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, Z1sqr, in1_z);
1094*8fb009dcSAndroid Build Coastguard Worker
1095*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#64]
1096*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#160]
1097*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#160+16]
1098*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#64
1099*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
1100*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_z, H, in1_z);
1101*8fb009dcSAndroid Build Coastguard Worker
1102*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23,#32]
1103*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#128]
1104*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#128+16]
1105*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#32
1106*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
1107*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, S2, in2_y);
1108*8fb009dcSAndroid Build Coastguard Worker
1109*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#32
1110*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#160]	// forward load for p256_sqr_mont
1111*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#160+16]
1112*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#192
1113*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(R, S2, in1_y);
1114*8fb009dcSAndroid Build Coastguard Worker
1115*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#224
1116*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Hsqr, H);
1117*8fb009dcSAndroid Build Coastguard Worker
1118*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#192]
1119*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#192+16]
1120*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#288
1121*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Rsqr, R);
1122*8fb009dcSAndroid Build Coastguard Worker
1123*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#160]
1124*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#224]
1125*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#224+16]
1126*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#160
1127*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#256
1128*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(Hcub, Hsqr, H);
1129*8fb009dcSAndroid Build Coastguard Worker
1130*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22]
1131*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#224]
1132*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#224+16]
1133*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#0
1134*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
1135*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U2, in1_x, Hsqr);
1136*8fb009dcSAndroid Build Coastguard Worker
1137*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
1138*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
1139*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
1140*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
1141*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#224
1142*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(Hsqr, U2);
1143*8fb009dcSAndroid Build Coastguard Worker
1144*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#288
1145*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
1146*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(res_x, Rsqr, Hsqr);
1147*8fb009dcSAndroid Build Coastguard Worker
1148*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#256
1149*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	//  p256_sub(res_x, res_x, Hcub);
1150*8fb009dcSAndroid Build Coastguard Worker
1151*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#96
1152*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#32]	// forward load for p256_mul_mont
1153*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#256]
1154*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#256+16]
1155*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
1156*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(res_y, U2, res_x);
1157*8fb009dcSAndroid Build Coastguard Worker
1158*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#32
1159*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
1160*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, in1_y, Hcub);
1161*8fb009dcSAndroid Build Coastguard Worker
1162*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#192]
1163*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#32]
1164*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#32+16]
1165*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#192
1166*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
1167*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_y, res_y, R);
1168*8fb009dcSAndroid Build Coastguard Worker
1169*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#128
1170*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(res_y, res_y, S2);
1171*8fb009dcSAndroid Build Coastguard Worker
1172*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]		// res
1173*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
1174*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23]		// in2
1175*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#16]
1176*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#0]	// in1
1177*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
1178*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#0+16]
1179*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
1180*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
1181*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0+0+32]	// res
1182*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
1183*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
1184*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
1185*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+0+48]
1186*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
1187*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
1188*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23,#0+32]	// in2
1189*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
1190*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
1191*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#0+48]
1192*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#0]
1193*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#0+16]
1194*8fb009dcSAndroid Build Coastguard Worker	adrp	x23,Lone_mont-64
1195*8fb009dcSAndroid Build Coastguard Worker	add	x23,x23,:lo12:Lone_mont-64
1196*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#32]	// in1
1197*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
1198*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#32+16]
1199*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
1200*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
1201*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0+32+32]	// res
1202*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
1203*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
1204*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
1205*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+32+48]
1206*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
1207*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
1208*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23,#32+32]	// in2
1209*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
1210*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
1211*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#32+48]
1212*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#32]
1213*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#32+16]
1214*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#64]	// in1
1215*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
1216*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#64+16]
1217*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
1218*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
1219*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
1220*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
1221*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
1222*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
1223*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
1224*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
1225*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
1226*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#64]
1227*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#64+16]
1228*8fb009dcSAndroid Build Coastguard Worker
1229*8fb009dcSAndroid Build Coastguard Worker	add	sp,x29,#0		// destroy frame
1230*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
1231*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
1232*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
1233*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
1234*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#80
1235*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
1236*8fb009dcSAndroid Build Coastguard Worker	ret
1237*8fb009dcSAndroid Build Coastguard Worker
1238*8fb009dcSAndroid Build Coastguard Worker////////////////////////////////////////////////////////////////////////
1239*8fb009dcSAndroid Build Coastguard Worker// void ecp_nistz256_ord_mul_mont(uint64_t res[4], uint64_t a[4],
1240*8fb009dcSAndroid Build Coastguard Worker//                                uint64_t b[4]);
1241*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_ord_mul_mont
1242*8fb009dcSAndroid Build Coastguard Worker
1243*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_ord_mul_mont
1244*8fb009dcSAndroid Build Coastguard Worker   .type 32
1245*8fb009dcSAndroid Build Coastguard Worker.endef
1246*8fb009dcSAndroid Build Coastguard Worker.align	4
1247*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_ord_mul_mont:
1248*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
1249*8fb009dcSAndroid Build Coastguard Worker	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
1250*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-64]!
1251*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
1252*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
1253*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
1254*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
1255*8fb009dcSAndroid Build Coastguard Worker
1256*8fb009dcSAndroid Build Coastguard Worker	adrp	x23,Lord
1257*8fb009dcSAndroid Build Coastguard Worker	add	x23,x23,:lo12:Lord
1258*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2]		// bp[0]
1259*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1]
1260*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#16]
1261*8fb009dcSAndroid Build Coastguard Worker
1262*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x23,#0]
1263*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x23,#16]
1264*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x23,#32]
1265*8fb009dcSAndroid Build Coastguard Worker
1266*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x4,x3		// a[0]*b[0]
1267*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
1268*8fb009dcSAndroid Build Coastguard Worker
1269*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x5,x3		// a[1]*b[0]
1270*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
1271*8fb009dcSAndroid Build Coastguard Worker
1272*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x6,x3		// a[2]*b[0]
1273*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
1274*8fb009dcSAndroid Build Coastguard Worker
1275*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x7,x3		// a[3]*b[0]
1276*8fb009dcSAndroid Build Coastguard Worker	umulh	x19,x7,x3
1277*8fb009dcSAndroid Build Coastguard Worker
1278*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1279*8fb009dcSAndroid Build Coastguard Worker
1280*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
1281*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1282*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1283*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
1284*8fb009dcSAndroid Build Coastguard Worker	mov	x20,xzr
1285*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*1]		// b[i]
1286*8fb009dcSAndroid Build Coastguard Worker
1287*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1288*8fb009dcSAndroid Build Coastguard Worker	subs	x16,x16,x24
1289*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1290*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x8
1291*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,x9
1292*8fb009dcSAndroid Build Coastguard Worker	sbc	x20,x20,xzr
1293*8fb009dcSAndroid Build Coastguard Worker
1294*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1295*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1296*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1297*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1298*8fb009dcSAndroid Build Coastguard Worker
1299*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1300*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3
1301*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1302*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3
1303*8fb009dcSAndroid Build Coastguard Worker
1304*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1305*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3
1306*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1307*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3
1308*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1309*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x24
1310*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
1311*8fb009dcSAndroid Build Coastguard Worker
1312*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts
1313*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
1314*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
1315*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
1316*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
1317*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
1318*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
1319*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3
1320*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
1321*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1322*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts
1323*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1324*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1325*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
1326*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
1327*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*2]		// b[i]
1328*8fb009dcSAndroid Build Coastguard Worker
1329*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1330*8fb009dcSAndroid Build Coastguard Worker	subs	x16,x16,x24
1331*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1332*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x8
1333*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,x9
1334*8fb009dcSAndroid Build Coastguard Worker	sbc	x20,x20,xzr
1335*8fb009dcSAndroid Build Coastguard Worker
1336*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1337*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1338*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1339*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1340*8fb009dcSAndroid Build Coastguard Worker
1341*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1342*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3
1343*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1344*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3
1345*8fb009dcSAndroid Build Coastguard Worker
1346*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1347*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3
1348*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1349*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3
1350*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1351*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x24
1352*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
1353*8fb009dcSAndroid Build Coastguard Worker
1354*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts
1355*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
1356*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
1357*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
1358*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
1359*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
1360*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
1361*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3
1362*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
1363*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1364*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts
1365*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1366*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1367*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
1368*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
1369*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*3]		// b[i]
1370*8fb009dcSAndroid Build Coastguard Worker
1371*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1372*8fb009dcSAndroid Build Coastguard Worker	subs	x16,x16,x24
1373*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1374*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x8
1375*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,x9
1376*8fb009dcSAndroid Build Coastguard Worker	sbc	x20,x20,xzr
1377*8fb009dcSAndroid Build Coastguard Worker
1378*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1379*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1380*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1381*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1382*8fb009dcSAndroid Build Coastguard Worker
1383*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1384*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3
1385*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1386*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3
1387*8fb009dcSAndroid Build Coastguard Worker
1388*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1389*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3
1390*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1391*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3
1392*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1393*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x24
1394*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
1395*8fb009dcSAndroid Build Coastguard Worker
1396*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts
1397*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
1398*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
1399*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
1400*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
1401*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
1402*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
1403*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3
1404*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
1405*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1406*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts
1407*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1408*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1409*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
1410*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
1411*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32		// last reduction
1412*8fb009dcSAndroid Build Coastguard Worker	subs	x16,x16,x24
1413*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1414*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x8
1415*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,x9
1416*8fb009dcSAndroid Build Coastguard Worker	sbc	x20,x20,xzr
1417*8fb009dcSAndroid Build Coastguard Worker
1418*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1419*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1420*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1421*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1422*8fb009dcSAndroid Build Coastguard Worker
1423*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1424*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1425*8fb009dcSAndroid Build Coastguard Worker
1426*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1427*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1428*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1429*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x24
1430*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
1431*8fb009dcSAndroid Build Coastguard Worker
1432*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,x12		// ret -= modulus
1433*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x13
1434*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,x21
1435*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x22
1436*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x19,xzr
1437*8fb009dcSAndroid Build Coastguard Worker
1438*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,lo	// ret = borrow ? ret : ret-modulus
1439*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,lo
1440*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,lo
1441*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
1442*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,lo
1443*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
1444*8fb009dcSAndroid Build Coastguard Worker
1445*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#16]
1446*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#32]
1447*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[sp,#48]
1448*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#64
1449*8fb009dcSAndroid Build Coastguard Worker	ret
1450*8fb009dcSAndroid Build Coastguard Worker
1451*8fb009dcSAndroid Build Coastguard Worker
1452*8fb009dcSAndroid Build Coastguard Worker////////////////////////////////////////////////////////////////////////
1453*8fb009dcSAndroid Build Coastguard Worker// void ecp_nistz256_ord_sqr_mont(uint64_t res[4], uint64_t a[4],
1454*8fb009dcSAndroid Build Coastguard Worker//                                uint64_t rep);
1455*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_ord_sqr_mont
1456*8fb009dcSAndroid Build Coastguard Worker
1457*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_ord_sqr_mont
1458*8fb009dcSAndroid Build Coastguard Worker   .type 32
1459*8fb009dcSAndroid Build Coastguard Worker.endef
1460*8fb009dcSAndroid Build Coastguard Worker.align	4
1461*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_ord_sqr_mont:
1462*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
1463*8fb009dcSAndroid Build Coastguard Worker	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
1464*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-64]!
1465*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
1466*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
1467*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
1468*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
1469*8fb009dcSAndroid Build Coastguard Worker
1470*8fb009dcSAndroid Build Coastguard Worker	adrp	x23,Lord
1471*8fb009dcSAndroid Build Coastguard Worker	add	x23,x23,:lo12:Lord
1472*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1]
1473*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#16]
1474*8fb009dcSAndroid Build Coastguard Worker
1475*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x23,#0]
1476*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x23,#16]
1477*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x23,#32]
1478*8fb009dcSAndroid Build Coastguard Worker	b	Loop_ord_sqr
1479*8fb009dcSAndroid Build Coastguard Worker
1480*8fb009dcSAndroid Build Coastguard Worker.align	4
1481*8fb009dcSAndroid Build Coastguard WorkerLoop_ord_sqr:
1482*8fb009dcSAndroid Build Coastguard Worker	sub	x2,x2,#1
1483*8fb009dcSAndroid Build Coastguard Worker	////////////////////////////////////////////////////////////////
1484*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |  |  |a1*a0|  |
1485*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |  |a2*a0|  |  |
1486*8fb009dcSAndroid Build Coastguard Worker	//  |  |a3*a2|a3*a0|  |  |  |
1487*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |a2*a1|  |  |  |
1488*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |a3*a1|  |  |  |  |
1489*8fb009dcSAndroid Build Coastguard Worker	// *|  |  |  |  |  |  |  | 2|
1490*8fb009dcSAndroid Build Coastguard Worker	// +|a3*a3|a2*a2|a1*a1|a0*a0|
1491*8fb009dcSAndroid Build Coastguard Worker	//  |--+--+--+--+--+--+--+--|
1492*8fb009dcSAndroid Build Coastguard Worker	//  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow
1493*8fb009dcSAndroid Build Coastguard Worker	//
1494*8fb009dcSAndroid Build Coastguard Worker	//  "can't overflow" below mark carrying into high part of
1495*8fb009dcSAndroid Build Coastguard Worker	//  multiplication result, which can't overflow, because it
1496*8fb009dcSAndroid Build Coastguard Worker	//  can never be all ones.
1497*8fb009dcSAndroid Build Coastguard Worker
1498*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x5,x4		// a[1]*a[0]
1499*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x4
1500*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x6,x4		// a[2]*a[0]
1501*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x4
1502*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x7,x4		// a[3]*a[0]
1503*8fb009dcSAndroid Build Coastguard Worker	umulh	x19,x7,x4
1504*8fb009dcSAndroid Build Coastguard Worker
1505*8fb009dcSAndroid Build Coastguard Worker	adds	x16,x16,x9		// accumulate high parts of multiplication
1506*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x6,x5		// a[2]*a[1]
1507*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x6,x5
1508*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1509*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x7,x5		// a[3]*a[1]
1510*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x5
1511*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr		// can't overflow
1512*8fb009dcSAndroid Build Coastguard Worker
1513*8fb009dcSAndroid Build Coastguard Worker	mul	x20,x7,x6		// a[3]*a[2]
1514*8fb009dcSAndroid Build Coastguard Worker	umulh	x1,x7,x6
1515*8fb009dcSAndroid Build Coastguard Worker
1516*8fb009dcSAndroid Build Coastguard Worker	adds	x9,x9,x10		// accumulate high parts of multiplication
1517*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x4,x4		// a[0]*a[0]
1518*8fb009dcSAndroid Build Coastguard Worker	adc	x10,x11,xzr		// can't overflow
1519*8fb009dcSAndroid Build Coastguard Worker
1520*8fb009dcSAndroid Build Coastguard Worker	adds	x17,x17,x8		// accumulate low parts of multiplication
1521*8fb009dcSAndroid Build Coastguard Worker	umulh	x4,x4,x4
1522*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x9
1523*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x5		// a[1]*a[1]
1524*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x10
1525*8fb009dcSAndroid Build Coastguard Worker	umulh	x5,x5,x5
1526*8fb009dcSAndroid Build Coastguard Worker	adc	x1,x1,xzr		// can't overflow
1527*8fb009dcSAndroid Build Coastguard Worker
1528*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x15	// acc[1-6]*=2
1529*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x6		// a[2]*a[2]
1530*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x16
1531*8fb009dcSAndroid Build Coastguard Worker	umulh	x6,x6,x6
1532*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x17
1533*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x7		// a[3]*a[3]
1534*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x19
1535*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x7
1536*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x20
1537*8fb009dcSAndroid Build Coastguard Worker	adcs	x1,x1,x1
1538*8fb009dcSAndroid Build Coastguard Worker	adc	x3,xzr,xzr
1539*8fb009dcSAndroid Build Coastguard Worker
1540*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x4		// +a[i]*a[i]
1541*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1542*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1543*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x5
1544*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x10
1545*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x6
1546*8fb009dcSAndroid Build Coastguard Worker	adcs	x1,x1,x11
1547*8fb009dcSAndroid Build Coastguard Worker	adc	x3,x3,x7
1548*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1549*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1550*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1551*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1552*8fb009dcSAndroid Build Coastguard Worker
1553*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1554*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1555*8fb009dcSAndroid Build Coastguard Worker
1556*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1557*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1558*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1559*8fb009dcSAndroid Build Coastguard Worker	adc	x17,xzr,x24		// can't overflow
1560*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x14,x23
1561*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1562*8fb009dcSAndroid Build Coastguard Worker	subs	x15,x15,x24
1563*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1564*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x8
1565*8fb009dcSAndroid Build Coastguard Worker	sbc	x17,x17,x9		// can't borrow
1566*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1567*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x11
1568*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x11
1569*8fb009dcSAndroid Build Coastguard Worker	umulh	x24,x13,x11
1570*8fb009dcSAndroid Build Coastguard Worker
1571*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1572*8fb009dcSAndroid Build Coastguard Worker	adc	x24,x24,xzr
1573*8fb009dcSAndroid Build Coastguard Worker
1574*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1575*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x24
1576*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x11
1577*8fb009dcSAndroid Build Coastguard Worker	adc	x17,xzr,x11		// can't overflow
1578*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1579*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x11,#32
1580*8fb009dcSAndroid Build Coastguard Worker	subs	x15,x15,x11
1581*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x11,#32
1582*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x8
1583*8fb009dcSAndroid Build Coastguard Worker	sbc	x17,x17,x9		// can't borrow
1584*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1585*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1586*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1587*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1588*8fb009dcSAndroid Build Coastguard Worker
1589*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1590*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1591*8fb009dcSAndroid Build Coastguard Worker
1592*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1593*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1594*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1595*8fb009dcSAndroid Build Coastguard Worker	adc	x17,xzr,x24		// can't overflow
1596*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x14,x23
1597*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1598*8fb009dcSAndroid Build Coastguard Worker	subs	x15,x15,x24
1599*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1600*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x8
1601*8fb009dcSAndroid Build Coastguard Worker	sbc	x17,x17,x9		// can't borrow
1602*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1603*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x11
1604*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x11
1605*8fb009dcSAndroid Build Coastguard Worker	umulh	x24,x13,x11
1606*8fb009dcSAndroid Build Coastguard Worker
1607*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1608*8fb009dcSAndroid Build Coastguard Worker	adc	x24,x24,xzr
1609*8fb009dcSAndroid Build Coastguard Worker
1610*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1611*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x24
1612*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x11
1613*8fb009dcSAndroid Build Coastguard Worker	adc	x17,xzr,x11		// can't overflow
1614*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x11,#32
1615*8fb009dcSAndroid Build Coastguard Worker	subs	x15,x15,x11
1616*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x11,#32
1617*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x8
1618*8fb009dcSAndroid Build Coastguard Worker	sbc	x17,x17,x9		// can't borrow
1619*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x19	// accumulate upper half
1620*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x20
1621*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x1
1622*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x3
1623*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr
1624*8fb009dcSAndroid Build Coastguard Worker
1625*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,x12		// ret -= modulus
1626*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x13
1627*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,x21
1628*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x22
1629*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x19,xzr
1630*8fb009dcSAndroid Build Coastguard Worker
1631*8fb009dcSAndroid Build Coastguard Worker	csel	x4,x14,x8,lo	// ret = borrow ? ret : ret-modulus
1632*8fb009dcSAndroid Build Coastguard Worker	csel	x5,x15,x9,lo
1633*8fb009dcSAndroid Build Coastguard Worker	csel	x6,x16,x10,lo
1634*8fb009dcSAndroid Build Coastguard Worker	csel	x7,x17,x11,lo
1635*8fb009dcSAndroid Build Coastguard Worker
1636*8fb009dcSAndroid Build Coastguard Worker	cbnz	x2,Loop_ord_sqr
1637*8fb009dcSAndroid Build Coastguard Worker
1638*8fb009dcSAndroid Build Coastguard Worker	stp	x4,x5,[x0]
1639*8fb009dcSAndroid Build Coastguard Worker	stp	x6,x7,[x0,#16]
1640*8fb009dcSAndroid Build Coastguard Worker
1641*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#16]
1642*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#32]
1643*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[sp,#48]
1644*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#64
1645*8fb009dcSAndroid Build Coastguard Worker	ret
1646*8fb009dcSAndroid Build Coastguard Worker
1647*8fb009dcSAndroid Build Coastguard Worker////////////////////////////////////////////////////////////////////////
1648*8fb009dcSAndroid Build Coastguard Worker// void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index);
1649*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_select_w5
1650*8fb009dcSAndroid Build Coastguard Worker
1651*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_select_w5
1652*8fb009dcSAndroid Build Coastguard Worker   .type 32
1653*8fb009dcSAndroid Build Coastguard Worker.endef
1654*8fb009dcSAndroid Build Coastguard Worker.align	4
1655*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_select_w5:
1656*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
1657*8fb009dcSAndroid Build Coastguard Worker
1658*8fb009dcSAndroid Build Coastguard Worker    // x10 := x0
1659*8fb009dcSAndroid Build Coastguard Worker    // w9 := 0; loop counter and incremented internal index
1660*8fb009dcSAndroid Build Coastguard Worker	mov	x10, x0
1661*8fb009dcSAndroid Build Coastguard Worker	mov	w9, #0
1662*8fb009dcSAndroid Build Coastguard Worker
1663*8fb009dcSAndroid Build Coastguard Worker    // [v16-v21] := 0
1664*8fb009dcSAndroid Build Coastguard Worker	movi	v16.16b, #0
1665*8fb009dcSAndroid Build Coastguard Worker	movi	v17.16b, #0
1666*8fb009dcSAndroid Build Coastguard Worker	movi	v18.16b, #0
1667*8fb009dcSAndroid Build Coastguard Worker	movi	v19.16b, #0
1668*8fb009dcSAndroid Build Coastguard Worker	movi	v20.16b, #0
1669*8fb009dcSAndroid Build Coastguard Worker	movi	v21.16b, #0
1670*8fb009dcSAndroid Build Coastguard Worker
1671*8fb009dcSAndroid Build Coastguard WorkerLselect_w5_loop:
1672*8fb009dcSAndroid Build Coastguard Worker    // Loop 16 times.
1673*8fb009dcSAndroid Build Coastguard Worker
1674*8fb009dcSAndroid Build Coastguard Worker    // Increment index (loop counter); tested at the end of the loop
1675*8fb009dcSAndroid Build Coastguard Worker	add	w9, w9, #1
1676*8fb009dcSAndroid Build Coastguard Worker
1677*8fb009dcSAndroid Build Coastguard Worker    // [v22-v27] := Load a (3*256-bit = 6*128-bit) table entry starting at x1
1678*8fb009dcSAndroid Build Coastguard Worker    //  and advance x1 to point to the next entry
1679*8fb009dcSAndroid Build Coastguard Worker	ld1	{v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64
1680*8fb009dcSAndroid Build Coastguard Worker
1681*8fb009dcSAndroid Build Coastguard Worker    // x11 := (w9 == w2)? All 1s : All 0s
1682*8fb009dcSAndroid Build Coastguard Worker	cmp	w9, w2
1683*8fb009dcSAndroid Build Coastguard Worker	csetm	x11, eq
1684*8fb009dcSAndroid Build Coastguard Worker
1685*8fb009dcSAndroid Build Coastguard Worker    // continue loading ...
1686*8fb009dcSAndroid Build Coastguard Worker	ld1	{v26.2d, v27.2d}, [x1],#32
1687*8fb009dcSAndroid Build Coastguard Worker
1688*8fb009dcSAndroid Build Coastguard Worker    // duplicate mask_64 into Mask (all 0s or all 1s)
1689*8fb009dcSAndroid Build Coastguard Worker	dup	v3.2d, x11
1690*8fb009dcSAndroid Build Coastguard Worker
1691*8fb009dcSAndroid Build Coastguard Worker    // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19]
1692*8fb009dcSAndroid Build Coastguard Worker    // i.e., values in output registers will remain the same if w9 != w2
1693*8fb009dcSAndroid Build Coastguard Worker	bit	v16.16b, v22.16b, v3.16b
1694*8fb009dcSAndroid Build Coastguard Worker	bit	v17.16b, v23.16b, v3.16b
1695*8fb009dcSAndroid Build Coastguard Worker
1696*8fb009dcSAndroid Build Coastguard Worker	bit	v18.16b, v24.16b, v3.16b
1697*8fb009dcSAndroid Build Coastguard Worker	bit	v19.16b, v25.16b, v3.16b
1698*8fb009dcSAndroid Build Coastguard Worker
1699*8fb009dcSAndroid Build Coastguard Worker	bit	v20.16b, v26.16b, v3.16b
1700*8fb009dcSAndroid Build Coastguard Worker	bit	v21.16b, v27.16b, v3.16b
1701*8fb009dcSAndroid Build Coastguard Worker
1702*8fb009dcSAndroid Build Coastguard Worker    // If bit #4 is not 0 (i.e. idx_ctr < 16) loop back
1703*8fb009dcSAndroid Build Coastguard Worker	tbz	w9, #4, Lselect_w5_loop
1704*8fb009dcSAndroid Build Coastguard Worker
1705*8fb009dcSAndroid Build Coastguard Worker    // Write [v16-v21] to memory at the output pointer
1706*8fb009dcSAndroid Build Coastguard Worker	st1	{v16.2d, v17.2d, v18.2d, v19.2d}, [x10],#64
1707*8fb009dcSAndroid Build Coastguard Worker	st1	{v20.2d, v21.2d}, [x10]
1708*8fb009dcSAndroid Build Coastguard Worker
1709*8fb009dcSAndroid Build Coastguard Worker	ret
1710*8fb009dcSAndroid Build Coastguard Worker
1711*8fb009dcSAndroid Build Coastguard Worker
1712*8fb009dcSAndroid Build Coastguard Worker
1713*8fb009dcSAndroid Build Coastguard Worker////////////////////////////////////////////////////////////////////////
1714*8fb009dcSAndroid Build Coastguard Worker// void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index);
1715*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_select_w7
1716*8fb009dcSAndroid Build Coastguard Worker
1717*8fb009dcSAndroid Build Coastguard Worker.def ecp_nistz256_select_w7
1718*8fb009dcSAndroid Build Coastguard Worker   .type 32
1719*8fb009dcSAndroid Build Coastguard Worker.endef
1720*8fb009dcSAndroid Build Coastguard Worker.align	4
1721*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_select_w7:
1722*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
1723*8fb009dcSAndroid Build Coastguard Worker
1724*8fb009dcSAndroid Build Coastguard Worker    // w9 := 0; loop counter and incremented internal index
1725*8fb009dcSAndroid Build Coastguard Worker	mov	w9, #0
1726*8fb009dcSAndroid Build Coastguard Worker
1727*8fb009dcSAndroid Build Coastguard Worker    // [v16-v21] := 0
1728*8fb009dcSAndroid Build Coastguard Worker	movi	v16.16b, #0
1729*8fb009dcSAndroid Build Coastguard Worker	movi	v17.16b, #0
1730*8fb009dcSAndroid Build Coastguard Worker	movi	v18.16b, #0
1731*8fb009dcSAndroid Build Coastguard Worker	movi	v19.16b, #0
1732*8fb009dcSAndroid Build Coastguard Worker
1733*8fb009dcSAndroid Build Coastguard WorkerLselect_w7_loop:
1734*8fb009dcSAndroid Build Coastguard Worker    // Loop 64 times.
1735*8fb009dcSAndroid Build Coastguard Worker
1736*8fb009dcSAndroid Build Coastguard Worker    // Increment index (loop counter); tested at the end of the loop
1737*8fb009dcSAndroid Build Coastguard Worker	add	w9, w9, #1
1738*8fb009dcSAndroid Build Coastguard Worker
1739*8fb009dcSAndroid Build Coastguard Worker    // [v22-v25] := Load a (2*256-bit = 4*128-bit) table entry starting at x1
1740*8fb009dcSAndroid Build Coastguard Worker    //  and advance x1 to point to the next entry
1741*8fb009dcSAndroid Build Coastguard Worker	ld1	{v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64
1742*8fb009dcSAndroid Build Coastguard Worker
1743*8fb009dcSAndroid Build Coastguard Worker    // x11 := (w9 == w2)? All 1s : All 0s
1744*8fb009dcSAndroid Build Coastguard Worker	cmp	w9, w2
1745*8fb009dcSAndroid Build Coastguard Worker	csetm	x11, eq
1746*8fb009dcSAndroid Build Coastguard Worker
1747*8fb009dcSAndroid Build Coastguard Worker    // duplicate mask_64 into Mask (all 0s or all 1s)
1748*8fb009dcSAndroid Build Coastguard Worker	dup	v3.2d, x11
1749*8fb009dcSAndroid Build Coastguard Worker
1750*8fb009dcSAndroid Build Coastguard Worker    // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19]
1751*8fb009dcSAndroid Build Coastguard Worker    // i.e., values in output registers will remain the same if w9 != w2
1752*8fb009dcSAndroid Build Coastguard Worker	bit	v16.16b, v22.16b, v3.16b
1753*8fb009dcSAndroid Build Coastguard Worker	bit	v17.16b, v23.16b, v3.16b
1754*8fb009dcSAndroid Build Coastguard Worker
1755*8fb009dcSAndroid Build Coastguard Worker	bit	v18.16b, v24.16b, v3.16b
1756*8fb009dcSAndroid Build Coastguard Worker	bit	v19.16b, v25.16b, v3.16b
1757*8fb009dcSAndroid Build Coastguard Worker
1758*8fb009dcSAndroid Build Coastguard Worker    // If bit #6 is not 0 (i.e. idx_ctr < 64) loop back
1759*8fb009dcSAndroid Build Coastguard Worker	tbz	w9, #6, Lselect_w7_loop
1760*8fb009dcSAndroid Build Coastguard Worker
1761*8fb009dcSAndroid Build Coastguard Worker    // Write [v16-v19] to memory at the output pointer
1762*8fb009dcSAndroid Build Coastguard Worker	st1	{v16.2d, v17.2d, v18.2d, v19.2d}, [x0]
1763*8fb009dcSAndroid Build Coastguard Worker
1764*8fb009dcSAndroid Build Coastguard Worker	ret
1765*8fb009dcSAndroid Build Coastguard Worker
1766*8fb009dcSAndroid Build Coastguard Worker#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32)
1767