xref: /aosp_15_r20/external/boringssl/src/gen/bcm/p256-armv8-asm-linux.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1*8fb009dcSAndroid Build Coastguard Worker// This file is generated from a similarly-named Perl script in the BoringSSL
2*8fb009dcSAndroid Build Coastguard Worker// source tree. Do not edit by hand.
3*8fb009dcSAndroid Build Coastguard Worker
4*8fb009dcSAndroid Build Coastguard Worker#include <openssl/asm_base.h>
5*8fb009dcSAndroid Build Coastguard Worker
6*8fb009dcSAndroid Build Coastguard Worker#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
7*8fb009dcSAndroid Build Coastguard Worker#include "openssl/arm_arch.h"
8*8fb009dcSAndroid Build Coastguard Worker
9*8fb009dcSAndroid Build Coastguard Worker.section	.rodata
10*8fb009dcSAndroid Build Coastguard Worker.align	5
11*8fb009dcSAndroid Build Coastguard Worker.Lpoly:
12*8fb009dcSAndroid Build Coastguard Worker.quad	0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
13*8fb009dcSAndroid Build Coastguard Worker.LRR:	//	2^512 mod P precomputed for NIST P256 polynomial
14*8fb009dcSAndroid Build Coastguard Worker.quad	0x0000000000000003,0xfffffffbffffffff,0xfffffffffffffffe,0x00000004fffffffd
15*8fb009dcSAndroid Build Coastguard Worker.Lone_mont:
16*8fb009dcSAndroid Build Coastguard Worker.quad	0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
17*8fb009dcSAndroid Build Coastguard Worker.Lone:
18*8fb009dcSAndroid Build Coastguard Worker.quad	1,0,0,0
19*8fb009dcSAndroid Build Coastguard Worker.Lord:
20*8fb009dcSAndroid Build Coastguard Worker.quad	0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000
21*8fb009dcSAndroid Build Coastguard Worker.LordK:
22*8fb009dcSAndroid Build Coastguard Worker.quad	0xccd1c8aaee00bc4f
23*8fb009dcSAndroid Build Coastguard Worker.byte	69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
24*8fb009dcSAndroid Build Coastguard Worker.align	2
25*8fb009dcSAndroid Build Coastguard Worker.text
26*8fb009dcSAndroid Build Coastguard Worker
27*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4],
28*8fb009dcSAndroid Build Coastguard Worker//					     const BN_ULONG x2[4]);
29*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_mul_mont
30*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_mul_mont
31*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_mul_mont,%function
32*8fb009dcSAndroid Build Coastguard Worker.align	4
33*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_mul_mont:
34*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
35*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-32]!
36*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
37*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
38*8fb009dcSAndroid Build Coastguard Worker
39*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2]		// bp[0]
40*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1]
41*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#16]
42*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
43*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
44*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
45*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
46*8fb009dcSAndroid Build Coastguard Worker
47*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont
48*8fb009dcSAndroid Build Coastguard Worker
49*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#16]
50*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#32
51*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
52*8fb009dcSAndroid Build Coastguard Worker	ret
53*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
54*8fb009dcSAndroid Build Coastguard Worker
55*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_sqr_mont(BN_ULONG x0[4],const BN_ULONG x1[4]);
56*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_sqr_mont
57*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_sqr_mont
58*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_sqr_mont,%function
59*8fb009dcSAndroid Build Coastguard Worker.align	4
60*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_sqr_mont:
61*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
62*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-32]!
63*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
64*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
65*8fb009dcSAndroid Build Coastguard Worker
66*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1]
67*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#16]
68*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
69*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
70*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
71*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
72*8fb009dcSAndroid Build Coastguard Worker
73*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont
74*8fb009dcSAndroid Build Coastguard Worker
75*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#16]
76*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#32
77*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
78*8fb009dcSAndroid Build Coastguard Worker	ret
79*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
80*8fb009dcSAndroid Build Coastguard Worker
81*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_div_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]);
82*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_div_by_2
83*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_div_by_2
84*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_div_by_2,%function
85*8fb009dcSAndroid Build Coastguard Worker.align	4
86*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_div_by_2:
87*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
88*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
89*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
90*8fb009dcSAndroid Build Coastguard Worker
91*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1]
92*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#16]
93*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
94*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
95*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
96*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
97*8fb009dcSAndroid Build Coastguard Worker
98*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_div_by_2
99*8fb009dcSAndroid Build Coastguard Worker
100*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
101*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
102*8fb009dcSAndroid Build Coastguard Worker	ret
103*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
104*8fb009dcSAndroid Build Coastguard Worker
105*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_mul_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]);
106*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_mul_by_2
107*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_mul_by_2
108*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_mul_by_2,%function
109*8fb009dcSAndroid Build Coastguard Worker.align	4
110*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_mul_by_2:
111*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
112*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
113*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
114*8fb009dcSAndroid Build Coastguard Worker
115*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1]
116*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#16]
117*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
118*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
119*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
120*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
121*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
122*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
123*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
124*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
125*8fb009dcSAndroid Build Coastguard Worker
126*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// ret = a+a	// 2*a
127*8fb009dcSAndroid Build Coastguard Worker
128*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
129*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
130*8fb009dcSAndroid Build Coastguard Worker	ret
131*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
132*8fb009dcSAndroid Build Coastguard Worker
133*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_mul_by_3(BN_ULONG x0[4],const BN_ULONG x1[4]);
134*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_mul_by_3
135*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_mul_by_3
136*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_mul_by_3,%function
137*8fb009dcSAndroid Build Coastguard Worker.align	4
138*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_mul_by_3:
139*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
140*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
141*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
142*8fb009dcSAndroid Build Coastguard Worker
143*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1]
144*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#16]
145*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
146*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
147*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
148*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
149*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
150*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
151*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
152*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
153*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14
154*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
155*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
156*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
157*8fb009dcSAndroid Build Coastguard Worker
158*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// ret = a+a	// 2*a
159*8fb009dcSAndroid Build Coastguard Worker
160*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x4
161*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x5
162*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x6
163*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x7
164*8fb009dcSAndroid Build Coastguard Worker
165*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// ret += a	// 2*a+a=3*a
166*8fb009dcSAndroid Build Coastguard Worker
167*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
168*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
169*8fb009dcSAndroid Build Coastguard Worker	ret
170*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
171*8fb009dcSAndroid Build Coastguard Worker
172*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_sub(BN_ULONG x0[4],const BN_ULONG x1[4],
173*8fb009dcSAndroid Build Coastguard Worker//				        const BN_ULONG x2[4]);
174*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_sub
175*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_sub
176*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_sub,%function
177*8fb009dcSAndroid Build Coastguard Worker.align	4
178*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_sub:
179*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
180*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
181*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
182*8fb009dcSAndroid Build Coastguard Worker
183*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1]
184*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#16]
185*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
186*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
187*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
188*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
189*8fb009dcSAndroid Build Coastguard Worker
190*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from
191*8fb009dcSAndroid Build Coastguard Worker
192*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
193*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
194*8fb009dcSAndroid Build Coastguard Worker	ret
195*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_sub,.-ecp_nistz256_sub
196*8fb009dcSAndroid Build Coastguard Worker
197*8fb009dcSAndroid Build Coastguard Worker// void	ecp_nistz256_neg(BN_ULONG x0[4],const BN_ULONG x1[4]);
198*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_neg
199*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_neg
200*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_neg,%function
201*8fb009dcSAndroid Build Coastguard Worker.align	4
202*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_neg:
203*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
204*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-16]!
205*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
206*8fb009dcSAndroid Build Coastguard Worker
207*8fb009dcSAndroid Build Coastguard Worker	mov	x2,x1
208*8fb009dcSAndroid Build Coastguard Worker	mov	x14,xzr		// a = 0
209*8fb009dcSAndroid Build Coastguard Worker	mov	x15,xzr
210*8fb009dcSAndroid Build Coastguard Worker	mov	x16,xzr
211*8fb009dcSAndroid Build Coastguard Worker	mov	x17,xzr
212*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
213*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
214*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
215*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
216*8fb009dcSAndroid Build Coastguard Worker
217*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from
218*8fb009dcSAndroid Build Coastguard Worker
219*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#16
220*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
221*8fb009dcSAndroid Build Coastguard Worker	ret
222*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_neg,.-ecp_nistz256_neg
223*8fb009dcSAndroid Build Coastguard Worker
224*8fb009dcSAndroid Build Coastguard Worker// note that __ecp_nistz256_mul_mont expects a[0-3] input pre-loaded
225*8fb009dcSAndroid Build Coastguard Worker// to x4-x7 and b[0] - to x3
226*8fb009dcSAndroid Build Coastguard Worker.type	__ecp_nistz256_mul_mont,%function
227*8fb009dcSAndroid Build Coastguard Worker.align	4
228*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_mul_mont:
229*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x4,x3		// a[0]*b[0]
230*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
231*8fb009dcSAndroid Build Coastguard Worker
232*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x5,x3		// a[1]*b[0]
233*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
234*8fb009dcSAndroid Build Coastguard Worker
235*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x6,x3		// a[2]*b[0]
236*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
237*8fb009dcSAndroid Build Coastguard Worker
238*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x7,x3		// a[3]*b[0]
239*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3
240*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8]		// b[1]
241*8fb009dcSAndroid Build Coastguard Worker
242*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
243*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
244*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
245*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
246*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
247*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,x11
248*8fb009dcSAndroid Build Coastguard Worker	mov	x20,xzr
249*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
250*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
251*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
252*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3		// lo(a[0]*b[i])
253*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
254*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3		// lo(a[1]*b[i])
255*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
256*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3		// lo(a[2]*b[i])
257*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x11
258*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3		// lo(a[3]*b[i])
259*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
260*8fb009dcSAndroid Build Coastguard Worker
261*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts of multiplication
262*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3		// hi(a[0]*b[i])
263*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
264*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3		// hi(a[1]*b[i])
265*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
266*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3		// hi(a[2]*b[i])
267*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
268*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3		// hi(a[3]*b[i])
269*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
270*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*(1+1)]	// b[1+1]
271*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
272*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
273*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
274*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
275*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
276*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
277*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
278*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
279*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
280*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
281*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3		// lo(a[0]*b[i])
282*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
283*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3		// lo(a[1]*b[i])
284*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
285*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3		// lo(a[2]*b[i])
286*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x11
287*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3		// lo(a[3]*b[i])
288*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
289*8fb009dcSAndroid Build Coastguard Worker
290*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts of multiplication
291*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3		// hi(a[0]*b[i])
292*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
293*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3		// hi(a[1]*b[i])
294*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
295*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3		// hi(a[2]*b[i])
296*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
297*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3		// hi(a[3]*b[i])
298*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
299*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*(2+1)]	// b[2+1]
300*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
301*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
302*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
303*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
304*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
305*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
306*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
307*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
308*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
309*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
310*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3		// lo(a[0]*b[i])
311*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
312*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3		// lo(a[1]*b[i])
313*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
314*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3		// lo(a[2]*b[i])
315*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x11
316*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3		// lo(a[3]*b[i])
317*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
318*8fb009dcSAndroid Build Coastguard Worker
319*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts of multiplication
320*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3		// hi(a[0]*b[i])
321*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
322*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3		// hi(a[1]*b[i])
323*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
324*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3		// hi(a[2]*b[i])
325*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
326*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3		// hi(a[3]*b[i])
327*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
328*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
329*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
330*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
331*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
332*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
333*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
334*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
335*8fb009dcSAndroid Build Coastguard Worker	// last reduction
336*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
337*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
338*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
339*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
340*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
341*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x11
342*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
343*8fb009dcSAndroid Build Coastguard Worker
344*8fb009dcSAndroid Build Coastguard Worker	adds	x8,x14,#1		// subs	x8,x14,#-1 // tmp = ret-modulus
345*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x12
346*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,xzr
347*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x13
348*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x19,xzr		// did it borrow?
349*8fb009dcSAndroid Build Coastguard Worker
350*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,lo	// ret = borrow ? ret : ret-modulus
351*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,lo
352*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,lo
353*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
354*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,lo
355*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
356*8fb009dcSAndroid Build Coastguard Worker
357*8fb009dcSAndroid Build Coastguard Worker	ret
358*8fb009dcSAndroid Build Coastguard Worker.size	__ecp_nistz256_mul_mont,.-__ecp_nistz256_mul_mont
359*8fb009dcSAndroid Build Coastguard Worker
360*8fb009dcSAndroid Build Coastguard Worker// note that __ecp_nistz256_sqr_mont expects a[0-3] input pre-loaded
361*8fb009dcSAndroid Build Coastguard Worker// to x4-x7
362*8fb009dcSAndroid Build Coastguard Worker.type	__ecp_nistz256_sqr_mont,%function
363*8fb009dcSAndroid Build Coastguard Worker.align	4
364*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_sqr_mont:
365*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |  |  |a1*a0|  |
366*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |  |a2*a0|  |  |
367*8fb009dcSAndroid Build Coastguard Worker	//  |  |a3*a2|a3*a0|  |  |  |
368*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |a2*a1|  |  |  |
369*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |a3*a1|  |  |  |  |
370*8fb009dcSAndroid Build Coastguard Worker	// *|  |  |  |  |  |  |  | 2|
371*8fb009dcSAndroid Build Coastguard Worker	// +|a3*a3|a2*a2|a1*a1|a0*a0|
372*8fb009dcSAndroid Build Coastguard Worker	//  |--+--+--+--+--+--+--+--|
373*8fb009dcSAndroid Build Coastguard Worker	//  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow
374*8fb009dcSAndroid Build Coastguard Worker	//
375*8fb009dcSAndroid Build Coastguard Worker	//  "can't overflow" below mark carrying into high part of
376*8fb009dcSAndroid Build Coastguard Worker	//  multiplication result, which can't overflow, because it
377*8fb009dcSAndroid Build Coastguard Worker	//  can never be all ones.
378*8fb009dcSAndroid Build Coastguard Worker
379*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x5,x4		// a[1]*a[0]
380*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x4
381*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x6,x4		// a[2]*a[0]
382*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x4
383*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x7,x4		// a[3]*a[0]
384*8fb009dcSAndroid Build Coastguard Worker	umulh	x19,x7,x4
385*8fb009dcSAndroid Build Coastguard Worker
386*8fb009dcSAndroid Build Coastguard Worker	adds	x16,x16,x9		// accumulate high parts of multiplication
387*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x6,x5		// a[2]*a[1]
388*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x6,x5
389*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
390*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x7,x5		// a[3]*a[1]
391*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x5
392*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr		// can't overflow
393*8fb009dcSAndroid Build Coastguard Worker
394*8fb009dcSAndroid Build Coastguard Worker	mul	x20,x7,x6		// a[3]*a[2]
395*8fb009dcSAndroid Build Coastguard Worker	umulh	x1,x7,x6
396*8fb009dcSAndroid Build Coastguard Worker
397*8fb009dcSAndroid Build Coastguard Worker	adds	x9,x9,x10		// accumulate high parts of multiplication
398*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x4,x4		// a[0]*a[0]
399*8fb009dcSAndroid Build Coastguard Worker	adc	x10,x11,xzr		// can't overflow
400*8fb009dcSAndroid Build Coastguard Worker
401*8fb009dcSAndroid Build Coastguard Worker	adds	x17,x17,x8		// accumulate low parts of multiplication
402*8fb009dcSAndroid Build Coastguard Worker	umulh	x4,x4,x4
403*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x9
404*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x5		// a[1]*a[1]
405*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x10
406*8fb009dcSAndroid Build Coastguard Worker	umulh	x5,x5,x5
407*8fb009dcSAndroid Build Coastguard Worker	adc	x1,x1,xzr		// can't overflow
408*8fb009dcSAndroid Build Coastguard Worker
409*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x15	// acc[1-6]*=2
410*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x6		// a[2]*a[2]
411*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x16
412*8fb009dcSAndroid Build Coastguard Worker	umulh	x6,x6,x6
413*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x17
414*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x7		// a[3]*a[3]
415*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x19
416*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x7
417*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x20
418*8fb009dcSAndroid Build Coastguard Worker	adcs	x1,x1,x1
419*8fb009dcSAndroid Build Coastguard Worker	adc	x2,xzr,xzr
420*8fb009dcSAndroid Build Coastguard Worker
421*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x4		// +a[i]*a[i]
422*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
423*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x5
424*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x10
425*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x6
426*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
427*8fb009dcSAndroid Build Coastguard Worker	adcs	x1,x1,x11
428*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
429*8fb009dcSAndroid Build Coastguard Worker	adc	x2,x2,x7
430*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
431*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
432*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
433*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
434*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
435*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
436*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
437*8fb009dcSAndroid Build Coastguard Worker	adc	x17,x11,xzr		// can't overflow
438*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
439*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
440*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
441*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
442*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
443*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
444*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
445*8fb009dcSAndroid Build Coastguard Worker	adc	x17,x11,xzr		// can't overflow
446*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
447*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
448*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
449*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
450*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x14,#32
451*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
452*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x14,#32
453*8fb009dcSAndroid Build Coastguard Worker	adc	x17,x11,xzr		// can't overflow
454*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x14,x8		// "*0xffff0001"
455*8fb009dcSAndroid Build Coastguard Worker	sbc	x11,x14,x9
456*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x8		// +=acc[0]<<96 and omit acc[0]
457*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x9
458*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x10		// +=acc[0]*0xffff0001
459*8fb009dcSAndroid Build Coastguard Worker	adc	x17,x11,xzr		// can't overflow
460*8fb009dcSAndroid Build Coastguard Worker
461*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x19	// accumulate upper half
462*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x20
463*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x1
464*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x2
465*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr
466*8fb009dcSAndroid Build Coastguard Worker
467*8fb009dcSAndroid Build Coastguard Worker	adds	x8,x14,#1		// subs	x8,x14,#-1 // tmp = ret-modulus
468*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x12
469*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,xzr
470*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x13
471*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x19,xzr		// did it borrow?
472*8fb009dcSAndroid Build Coastguard Worker
473*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,lo	// ret = borrow ? ret : ret-modulus
474*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,lo
475*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,lo
476*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
477*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,lo
478*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
479*8fb009dcSAndroid Build Coastguard Worker
480*8fb009dcSAndroid Build Coastguard Worker	ret
481*8fb009dcSAndroid Build Coastguard Worker.size	__ecp_nistz256_sqr_mont,.-__ecp_nistz256_sqr_mont
482*8fb009dcSAndroid Build Coastguard Worker
483*8fb009dcSAndroid Build Coastguard Worker// Note that __ecp_nistz256_add_to expects both input vectors pre-loaded to
484*8fb009dcSAndroid Build Coastguard Worker// x4-x7 and x8-x11. This is done because it's used in multiple
485*8fb009dcSAndroid Build Coastguard Worker// contexts, e.g. in multiplication by 2 and 3...
486*8fb009dcSAndroid Build Coastguard Worker.type	__ecp_nistz256_add_to,%function
487*8fb009dcSAndroid Build Coastguard Worker.align	4
488*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_add_to:
489*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// ret = a+b
490*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
491*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
492*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
493*8fb009dcSAndroid Build Coastguard Worker	adc	x1,xzr,xzr		// zap x1
494*8fb009dcSAndroid Build Coastguard Worker
495*8fb009dcSAndroid Build Coastguard Worker	adds	x8,x14,#1		// subs	x8,x4,#-1 // tmp = ret-modulus
496*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x12
497*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,xzr
498*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x13
499*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x1,xzr		// did subtraction borrow?
500*8fb009dcSAndroid Build Coastguard Worker
501*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,lo	// ret = borrow ? ret : ret-modulus
502*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,lo
503*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,lo
504*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
505*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,lo
506*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
507*8fb009dcSAndroid Build Coastguard Worker
508*8fb009dcSAndroid Build Coastguard Worker	ret
509*8fb009dcSAndroid Build Coastguard Worker.size	__ecp_nistz256_add_to,.-__ecp_nistz256_add_to
510*8fb009dcSAndroid Build Coastguard Worker
511*8fb009dcSAndroid Build Coastguard Worker.type	__ecp_nistz256_sub_from,%function
512*8fb009dcSAndroid Build Coastguard Worker.align	4
513*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_sub_from:
514*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2]
515*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#16]
516*8fb009dcSAndroid Build Coastguard Worker	subs	x14,x14,x8		// ret = a-b
517*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x15,x9
518*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x10
519*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x11
520*8fb009dcSAndroid Build Coastguard Worker	sbc	x1,xzr,xzr		// zap x1
521*8fb009dcSAndroid Build Coastguard Worker
522*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,#1		// adds	x8,x4,#-1 // tmp = ret+modulus
523*8fb009dcSAndroid Build Coastguard Worker	adcs	x9,x15,x12
524*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x16,xzr
525*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x17,x13
526*8fb009dcSAndroid Build Coastguard Worker	cmp	x1,xzr			// did subtraction borrow?
527*8fb009dcSAndroid Build Coastguard Worker
528*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,eq	// ret = borrow ? ret+modulus : ret
529*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,eq
530*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,eq
531*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
532*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,eq
533*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
534*8fb009dcSAndroid Build Coastguard Worker
535*8fb009dcSAndroid Build Coastguard Worker	ret
536*8fb009dcSAndroid Build Coastguard Worker.size	__ecp_nistz256_sub_from,.-__ecp_nistz256_sub_from
537*8fb009dcSAndroid Build Coastguard Worker
538*8fb009dcSAndroid Build Coastguard Worker.type	__ecp_nistz256_sub_morf,%function
539*8fb009dcSAndroid Build Coastguard Worker.align	4
540*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_sub_morf:
541*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2]
542*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#16]
543*8fb009dcSAndroid Build Coastguard Worker	subs	x14,x8,x14		// ret = b-a
544*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x9,x15
545*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x10,x16
546*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x11,x17
547*8fb009dcSAndroid Build Coastguard Worker	sbc	x1,xzr,xzr		// zap x1
548*8fb009dcSAndroid Build Coastguard Worker
549*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,#1		// adds	x8,x4,#-1 // tmp = ret+modulus
550*8fb009dcSAndroid Build Coastguard Worker	adcs	x9,x15,x12
551*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x16,xzr
552*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x17,x13
553*8fb009dcSAndroid Build Coastguard Worker	cmp	x1,xzr			// did subtraction borrow?
554*8fb009dcSAndroid Build Coastguard Worker
555*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,eq	// ret = borrow ? ret+modulus : ret
556*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,eq
557*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,eq
558*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
559*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,eq
560*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
561*8fb009dcSAndroid Build Coastguard Worker
562*8fb009dcSAndroid Build Coastguard Worker	ret
563*8fb009dcSAndroid Build Coastguard Worker.size	__ecp_nistz256_sub_morf,.-__ecp_nistz256_sub_morf
564*8fb009dcSAndroid Build Coastguard Worker
565*8fb009dcSAndroid Build Coastguard Worker.type	__ecp_nistz256_div_by_2,%function
566*8fb009dcSAndroid Build Coastguard Worker.align	4
567*8fb009dcSAndroid Build Coastguard Worker__ecp_nistz256_div_by_2:
568*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,#1		// adds	x8,x4,#-1 // tmp = a+modulus
569*8fb009dcSAndroid Build Coastguard Worker	adcs	x9,x15,x12
570*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x16,xzr
571*8fb009dcSAndroid Build Coastguard Worker	adcs	x11,x17,x13
572*8fb009dcSAndroid Build Coastguard Worker	adc	x1,xzr,xzr		// zap x1
573*8fb009dcSAndroid Build Coastguard Worker	tst	x14,#1		// is a even?
574*8fb009dcSAndroid Build Coastguard Worker
575*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,eq	// ret = even ? a : a+modulus
576*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,eq
577*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,eq
578*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,eq
579*8fb009dcSAndroid Build Coastguard Worker	csel	x1,xzr,x1,eq
580*8fb009dcSAndroid Build Coastguard Worker
581*8fb009dcSAndroid Build Coastguard Worker	lsr	x14,x14,#1		// ret >>= 1
582*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x15,lsl#63
583*8fb009dcSAndroid Build Coastguard Worker	lsr	x15,x15,#1
584*8fb009dcSAndroid Build Coastguard Worker	orr	x15,x15,x16,lsl#63
585*8fb009dcSAndroid Build Coastguard Worker	lsr	x16,x16,#1
586*8fb009dcSAndroid Build Coastguard Worker	orr	x16,x16,x17,lsl#63
587*8fb009dcSAndroid Build Coastguard Worker	lsr	x17,x17,#1
588*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
589*8fb009dcSAndroid Build Coastguard Worker	orr	x17,x17,x1,lsl#63
590*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
591*8fb009dcSAndroid Build Coastguard Worker
592*8fb009dcSAndroid Build Coastguard Worker	ret
593*8fb009dcSAndroid Build Coastguard Worker.size	__ecp_nistz256_div_by_2,.-__ecp_nistz256_div_by_2
594*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_point_double
595*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_point_double
596*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_point_double,%function
597*8fb009dcSAndroid Build Coastguard Worker.align	5
598*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_point_double:
599*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
600*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-96]!
601*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
602*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
603*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
604*8fb009dcSAndroid Build Coastguard Worker	sub	sp,sp,#32*4
605*8fb009dcSAndroid Build Coastguard Worker
606*8fb009dcSAndroid Build Coastguard Worker.Ldouble_shortcut:
607*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x1,#32]
608*8fb009dcSAndroid Build Coastguard Worker	mov	x21,x0
609*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x1,#48]
610*8fb009dcSAndroid Build Coastguard Worker	mov	x22,x1
611*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
612*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
613*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
614*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
615*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
616*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
617*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x22,#64]	// forward load for p256_sqr_mont
618*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
619*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
620*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x22,#64+16]
621*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
622*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(S, in_y);
623*8fb009dcSAndroid Build Coastguard Worker
624*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
625*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Zsqr, in_z);
626*8fb009dcSAndroid Build Coastguard Worker
627*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x22]
628*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x22,#16]
629*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14		// put Zsqr aside for p256_sub
630*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
631*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
632*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
633*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
634*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_add(M, Zsqr, in_x);
635*8fb009dcSAndroid Build Coastguard Worker
636*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#0
637*8fb009dcSAndroid Build Coastguard Worker	mov	x14,x4		// restore Zsqr
638*8fb009dcSAndroid Build Coastguard Worker	mov	x15,x5
639*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]	// forward load for p256_sqr_mont
640*8fb009dcSAndroid Build Coastguard Worker	mov	x16,x6
641*8fb009dcSAndroid Build Coastguard Worker	mov	x17,x7
642*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
643*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
644*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(Zsqr, in_x, Zsqr);
645*8fb009dcSAndroid Build Coastguard Worker
646*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
647*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(S, S);
648*8fb009dcSAndroid Build Coastguard Worker
649*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#32]
650*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x22,#64]
651*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x22,#64+16]
652*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#32
653*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
654*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(tmp0, in_z, in_y);
655*8fb009dcSAndroid Build Coastguard Worker
656*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
657*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
658*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]	// forward load for p256_sqr_mont
659*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
660*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
661*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
662*8fb009dcSAndroid Build Coastguard Worker	add	x0,x21,#64
663*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(res_z, tmp0);
664*8fb009dcSAndroid Build Coastguard Worker
665*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
666*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(tmp0, S);
667*8fb009dcSAndroid Build Coastguard Worker
668*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#64]		// forward load for p256_mul_mont
669*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#32]
670*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#32+16]
671*8fb009dcSAndroid Build Coastguard Worker	add	x0,x21,#32
672*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_div_by_2	// p256_div_by_2(res_y, tmp0);
673*8fb009dcSAndroid Build Coastguard Worker
674*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#64
675*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
676*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(M, M, Zsqr);
677*8fb009dcSAndroid Build Coastguard Worker
678*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14		// duplicate M
679*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
680*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
681*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
682*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14		// put M aside
683*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
684*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
685*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
686*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
687*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to
688*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x4			// restore M
689*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x5
690*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22]		// forward load for p256_mul_mont
691*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x6
692*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]
693*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x7
694*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
695*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_3(M, M);
696*8fb009dcSAndroid Build Coastguard Worker
697*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#0
698*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
699*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S, S, in_x);
700*8fb009dcSAndroid Build Coastguard Worker
701*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
702*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
703*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#32]	// forward load for p256_sqr_mont
704*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
705*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
706*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#32+16]
707*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
708*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(tmp0, S);
709*8fb009dcSAndroid Build Coastguard Worker
710*8fb009dcSAndroid Build Coastguard Worker	add	x0,x21,#0
711*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(res_x, M);
712*8fb009dcSAndroid Build Coastguard Worker
713*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#96
714*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(res_x, res_x, tmp0);
715*8fb009dcSAndroid Build Coastguard Worker
716*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#0
717*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
718*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(S, S, res_x);
719*8fb009dcSAndroid Build Coastguard Worker
720*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#32]
721*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14		// copy S
722*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
723*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
724*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
725*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#32
726*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S, S, M);
727*8fb009dcSAndroid Build Coastguard Worker
728*8fb009dcSAndroid Build Coastguard Worker	add	x2,x21,#32
729*8fb009dcSAndroid Build Coastguard Worker	add	x0,x21,#32
730*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(res_y, S, res_y);
731*8fb009dcSAndroid Build Coastguard Worker
732*8fb009dcSAndroid Build Coastguard Worker	add	sp,x29,#0		// destroy frame
733*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
734*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
735*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#96
736*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
737*8fb009dcSAndroid Build Coastguard Worker	ret
738*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_point_double,.-ecp_nistz256_point_double
739*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_point_add
740*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_point_add
741*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_point_add,%function
742*8fb009dcSAndroid Build Coastguard Worker.align	5
743*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_point_add:
744*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
745*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-96]!
746*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
747*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
748*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
749*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
750*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[sp,#64]
751*8fb009dcSAndroid Build Coastguard Worker	stp	x27,x28,[sp,#80]
752*8fb009dcSAndroid Build Coastguard Worker	sub	sp,sp,#32*12
753*8fb009dcSAndroid Build Coastguard Worker
754*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x2,#64]	// in2_z
755*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x2,#64+16]
756*8fb009dcSAndroid Build Coastguard Worker	mov	x21,x0
757*8fb009dcSAndroid Build Coastguard Worker	mov	x22,x1
758*8fb009dcSAndroid Build Coastguard Worker	mov	x23,x2
759*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
760*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
761*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
762*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
763*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x4,x5
764*8fb009dcSAndroid Build Coastguard Worker	orr	x10,x6,x7
765*8fb009dcSAndroid Build Coastguard Worker	orr	x25,x8,x10
766*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0
767*8fb009dcSAndroid Build Coastguard Worker	csetm	x25,ne		// ~in2infty
768*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#192
769*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z2sqr, in2_z);
770*8fb009dcSAndroid Build Coastguard Worker
771*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x22,#64]	// in1_z
772*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x22,#64+16]
773*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x4,x5
774*8fb009dcSAndroid Build Coastguard Worker	orr	x10,x6,x7
775*8fb009dcSAndroid Build Coastguard Worker	orr	x24,x8,x10
776*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0
777*8fb009dcSAndroid Build Coastguard Worker	csetm	x24,ne		// ~in1infty
778*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
779*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z1sqr, in1_z);
780*8fb009dcSAndroid Build Coastguard Worker
781*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23,#64]
782*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#192]
783*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#192+16]
784*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#64
785*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#320
786*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S1, Z2sqr, in2_z);
787*8fb009dcSAndroid Build Coastguard Worker
788*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#64]
789*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#128]
790*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#128+16]
791*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#64
792*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#352
793*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, Z1sqr, in1_z);
794*8fb009dcSAndroid Build Coastguard Worker
795*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#32]
796*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#320]
797*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#320+16]
798*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#32
799*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#320
800*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S1, S1, in1_y);
801*8fb009dcSAndroid Build Coastguard Worker
802*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23,#32]
803*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#352]
804*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#352+16]
805*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#32
806*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#352
807*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, S2, in2_y);
808*8fb009dcSAndroid Build Coastguard Worker
809*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#320
810*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#192]	// forward load for p256_mul_mont
811*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x22]
812*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x22,#16]
813*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#160
814*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(R, S2, S1);
815*8fb009dcSAndroid Build Coastguard Worker
816*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x15	// see if result is zero
817*8fb009dcSAndroid Build Coastguard Worker	orr	x16,x16,x17
818*8fb009dcSAndroid Build Coastguard Worker	orr	x26,x14,x16	// ~is_equal(S1,S2)
819*8fb009dcSAndroid Build Coastguard Worker
820*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#192
821*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#256
822*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U1, in1_x, Z2sqr);
823*8fb009dcSAndroid Build Coastguard Worker
824*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#128]
825*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x23]
826*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x23,#16]
827*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#128
828*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#288
829*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U2, in2_x, Z1sqr);
830*8fb009dcSAndroid Build Coastguard Worker
831*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#256
832*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#160]	// forward load for p256_sqr_mont
833*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#160+16]
834*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
835*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(H, U2, U1);
836*8fb009dcSAndroid Build Coastguard Worker
837*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x15	// see if result is zero
838*8fb009dcSAndroid Build Coastguard Worker	orr	x16,x16,x17
839*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x16	// ~is_equal(U1,U2)
840*8fb009dcSAndroid Build Coastguard Worker
841*8fb009dcSAndroid Build Coastguard Worker	mvn	x27,x24	// -1/0 -> 0/-1
842*8fb009dcSAndroid Build Coastguard Worker	mvn	x28,x25	// -1/0 -> 0/-1
843*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x27
844*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x28
845*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x26
846*8fb009dcSAndroid Build Coastguard Worker	cbnz	x14,.Ladd_proceed	// if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
847*8fb009dcSAndroid Build Coastguard Worker
848*8fb009dcSAndroid Build Coastguard Worker.Ladd_double:
849*8fb009dcSAndroid Build Coastguard Worker	mov	x1,x22
850*8fb009dcSAndroid Build Coastguard Worker	mov	x0,x21
851*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
852*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
853*8fb009dcSAndroid Build Coastguard Worker	ldp	x27,x28,[x29,#80]
854*8fb009dcSAndroid Build Coastguard Worker	add	sp,sp,#256	// #256 is from #32*(12-4). difference in stack frames
855*8fb009dcSAndroid Build Coastguard Worker	b	.Ldouble_shortcut
856*8fb009dcSAndroid Build Coastguard Worker
857*8fb009dcSAndroid Build Coastguard Worker.align	4
858*8fb009dcSAndroid Build Coastguard Worker.Ladd_proceed:
859*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#192
860*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Rsqr, R);
861*8fb009dcSAndroid Build Coastguard Worker
862*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#64]
863*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#96]
864*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#96+16]
865*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#64
866*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
867*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_z, H, in1_z);
868*8fb009dcSAndroid Build Coastguard Worker
869*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#96]
870*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#96+16]
871*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
872*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Hsqr, H);
873*8fb009dcSAndroid Build Coastguard Worker
874*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23,#64]
875*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#64]
876*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#64+16]
877*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#64
878*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
879*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_z, res_z, in2_z);
880*8fb009dcSAndroid Build Coastguard Worker
881*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#96]
882*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#128]
883*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#128+16]
884*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#96
885*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#224
886*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(Hcub, Hsqr, H);
887*8fb009dcSAndroid Build Coastguard Worker
888*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#128]
889*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#256]
890*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#256+16]
891*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#128
892*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#288
893*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U2, U1, Hsqr);
894*8fb009dcSAndroid Build Coastguard Worker
895*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
896*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
897*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
898*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
899*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
900*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(Hsqr, U2);
901*8fb009dcSAndroid Build Coastguard Worker
902*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#192
903*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
904*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(res_x, Rsqr, Hsqr);
905*8fb009dcSAndroid Build Coastguard Worker
906*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#224
907*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	//  p256_sub(res_x, res_x, Hcub);
908*8fb009dcSAndroid Build Coastguard Worker
909*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#288
910*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#224]		// forward load for p256_mul_mont
911*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#320]
912*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#320+16]
913*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
914*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(res_y, U2, res_x);
915*8fb009dcSAndroid Build Coastguard Worker
916*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#224
917*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#352
918*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, S1, Hcub);
919*8fb009dcSAndroid Build Coastguard Worker
920*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#160]
921*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#32]
922*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#32+16]
923*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#160
924*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
925*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_y, res_y, R);
926*8fb009dcSAndroid Build Coastguard Worker
927*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#352
928*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(res_y, res_y, S2);
929*8fb009dcSAndroid Build Coastguard Worker
930*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]		// res
931*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
932*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23]		// in2
933*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#16]
934*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#0]	// in1
935*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
936*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#0+16]
937*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
938*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
939*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0+0+32]	// res
940*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
941*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
942*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
943*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+0+48]
944*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
945*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
946*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23,#0+32]	// in2
947*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
948*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
949*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#0+48]
950*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#0]
951*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#0+16]
952*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#32]	// in1
953*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
954*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#32+16]
955*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
956*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
957*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0+32+32]	// res
958*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
959*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
960*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
961*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+32+48]
962*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
963*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
964*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23,#32+32]	// in2
965*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
966*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
967*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#32+48]
968*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#32]
969*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#32+16]
970*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#64]	// in1
971*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
972*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#64+16]
973*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
974*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
975*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
976*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
977*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
978*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
979*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
980*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
981*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
982*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#64]
983*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#64+16]
984*8fb009dcSAndroid Build Coastguard Worker
985*8fb009dcSAndroid Build Coastguard Worker.Ladd_done:
986*8fb009dcSAndroid Build Coastguard Worker	add	sp,x29,#0		// destroy frame
987*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
988*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
989*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
990*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
991*8fb009dcSAndroid Build Coastguard Worker	ldp	x27,x28,[x29,#80]
992*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#96
993*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
994*8fb009dcSAndroid Build Coastguard Worker	ret
995*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_point_add,.-ecp_nistz256_point_add
996*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_point_add_affine
997*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_point_add_affine
998*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_point_add_affine,%function
999*8fb009dcSAndroid Build Coastguard Worker.align	5
1000*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_point_add_affine:
1001*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
1002*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-80]!
1003*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
1004*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
1005*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
1006*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
1007*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[sp,#64]
1008*8fb009dcSAndroid Build Coastguard Worker	sub	sp,sp,#32*10
1009*8fb009dcSAndroid Build Coastguard Worker
1010*8fb009dcSAndroid Build Coastguard Worker	mov	x21,x0
1011*8fb009dcSAndroid Build Coastguard Worker	mov	x22,x1
1012*8fb009dcSAndroid Build Coastguard Worker	mov	x23,x2
1013*8fb009dcSAndroid Build Coastguard Worker	adrp	x13,.Lpoly
1014*8fb009dcSAndroid Build Coastguard Worker	add	x13,x13,:lo12:.Lpoly
1015*8fb009dcSAndroid Build Coastguard Worker	ldr	x12,[x13,#8]
1016*8fb009dcSAndroid Build Coastguard Worker	ldr	x13,[x13,#24]
1017*8fb009dcSAndroid Build Coastguard Worker
1018*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1,#64]	// in1_z
1019*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#64+16]
1020*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x4,x5
1021*8fb009dcSAndroid Build Coastguard Worker	orr	x10,x6,x7
1022*8fb009dcSAndroid Build Coastguard Worker	orr	x24,x8,x10
1023*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0
1024*8fb009dcSAndroid Build Coastguard Worker	csetm	x24,ne		// ~in1infty
1025*8fb009dcSAndroid Build Coastguard Worker
1026*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x2]	// in2_x
1027*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x2,#16]
1028*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2,#32]	// in2_y
1029*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#48]
1030*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x15
1031*8fb009dcSAndroid Build Coastguard Worker	orr	x16,x16,x17
1032*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x8,x9
1033*8fb009dcSAndroid Build Coastguard Worker	orr	x10,x10,x11
1034*8fb009dcSAndroid Build Coastguard Worker	orr	x14,x14,x16
1035*8fb009dcSAndroid Build Coastguard Worker	orr	x8,x8,x10
1036*8fb009dcSAndroid Build Coastguard Worker	orr	x25,x14,x8
1037*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0
1038*8fb009dcSAndroid Build Coastguard Worker	csetm	x25,ne		// ~in2infty
1039*8fb009dcSAndroid Build Coastguard Worker
1040*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
1041*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z1sqr, in1_z);
1042*8fb009dcSAndroid Build Coastguard Worker
1043*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x14
1044*8fb009dcSAndroid Build Coastguard Worker	mov	x5,x15
1045*8fb009dcSAndroid Build Coastguard Worker	mov	x6,x16
1046*8fb009dcSAndroid Build Coastguard Worker	mov	x7,x17
1047*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23]
1048*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#0
1049*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
1050*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U2, Z1sqr, in2_x);
1051*8fb009dcSAndroid Build Coastguard Worker
1052*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#0
1053*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#64]	// forward load for p256_mul_mont
1054*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#128]
1055*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#128+16]
1056*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#160
1057*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(H, U2, in1_x);
1058*8fb009dcSAndroid Build Coastguard Worker
1059*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#64
1060*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
1061*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, Z1sqr, in1_z);
1062*8fb009dcSAndroid Build Coastguard Worker
1063*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#64]
1064*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#160]
1065*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#160+16]
1066*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#64
1067*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#64
1068*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_z, H, in1_z);
1069*8fb009dcSAndroid Build Coastguard Worker
1070*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x23,#32]
1071*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#128]
1072*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#128+16]
1073*8fb009dcSAndroid Build Coastguard Worker	add	x2,x23,#32
1074*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
1075*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, S2, in2_y);
1076*8fb009dcSAndroid Build Coastguard Worker
1077*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#32
1078*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#160]	// forward load for p256_sqr_mont
1079*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#160+16]
1080*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#192
1081*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(R, S2, in1_y);
1082*8fb009dcSAndroid Build Coastguard Worker
1083*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#224
1084*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Hsqr, H);
1085*8fb009dcSAndroid Build Coastguard Worker
1086*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#192]
1087*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#192+16]
1088*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#288
1089*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Rsqr, R);
1090*8fb009dcSAndroid Build Coastguard Worker
1091*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#160]
1092*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#224]
1093*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#224+16]
1094*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#160
1095*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#256
1096*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(Hcub, Hsqr, H);
1097*8fb009dcSAndroid Build Coastguard Worker
1098*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22]
1099*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#224]
1100*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#224+16]
1101*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#0
1102*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#96
1103*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(U2, in1_x, Hsqr);
1104*8fb009dcSAndroid Build Coastguard Worker
1105*8fb009dcSAndroid Build Coastguard Worker	mov	x8,x14
1106*8fb009dcSAndroid Build Coastguard Worker	mov	x9,x15
1107*8fb009dcSAndroid Build Coastguard Worker	mov	x10,x16
1108*8fb009dcSAndroid Build Coastguard Worker	mov	x11,x17
1109*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#224
1110*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_add_to	// p256_mul_by_2(Hsqr, U2);
1111*8fb009dcSAndroid Build Coastguard Worker
1112*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#288
1113*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#0
1114*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(res_x, Rsqr, Hsqr);
1115*8fb009dcSAndroid Build Coastguard Worker
1116*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#256
1117*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	//  p256_sub(res_x, res_x, Hcub);
1118*8fb009dcSAndroid Build Coastguard Worker
1119*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#96
1120*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x22,#32]	// forward load for p256_mul_mont
1121*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#256]
1122*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#256+16]
1123*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
1124*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_morf	// p256_sub(res_y, U2, res_x);
1125*8fb009dcSAndroid Build Coastguard Worker
1126*8fb009dcSAndroid Build Coastguard Worker	add	x2,x22,#32
1127*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#128
1128*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(S2, in1_y, Hcub);
1129*8fb009dcSAndroid Build Coastguard Worker
1130*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[sp,#192]
1131*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#32]
1132*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#32+16]
1133*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#192
1134*8fb009dcSAndroid Build Coastguard Worker	add	x0,sp,#32
1135*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_mul_mont	// p256_mul_mont(res_y, res_y, R);
1136*8fb009dcSAndroid Build Coastguard Worker
1137*8fb009dcSAndroid Build Coastguard Worker	add	x2,sp,#128
1138*8fb009dcSAndroid Build Coastguard Worker	bl	__ecp_nistz256_sub_from	// p256_sub(res_y, res_y, S2);
1139*8fb009dcSAndroid Build Coastguard Worker
1140*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0]		// res
1141*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+16]
1142*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23]		// in2
1143*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#16]
1144*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#0]	// in1
1145*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
1146*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#0+16]
1147*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
1148*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
1149*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0+0+32]	// res
1150*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
1151*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
1152*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
1153*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+0+48]
1154*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
1155*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
1156*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23,#0+32]	// in2
1157*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
1158*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
1159*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#0+48]
1160*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#0]
1161*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#0+16]
1162*8fb009dcSAndroid Build Coastguard Worker	adrp	x23,.Lone_mont-64
1163*8fb009dcSAndroid Build Coastguard Worker	add	x23,x23,:lo12:.Lone_mont-64
1164*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#32]	// in1
1165*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
1166*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#32+16]
1167*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
1168*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
1169*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[sp,#0+32+32]	// res
1170*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
1171*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
1172*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
1173*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[sp,#0+32+48]
1174*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
1175*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
1176*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x23,#32+32]	// in2
1177*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
1178*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
1179*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x23,#32+48]
1180*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#32]
1181*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#32+16]
1182*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x22,#64]	// in1
1183*8fb009dcSAndroid Build Coastguard Worker	cmp	x24,#0			// ~, remember?
1184*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x22,#64+16]
1185*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x4,x8,ne
1186*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x5,x9,ne
1187*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x6,x10,ne
1188*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x7,x11,ne
1189*8fb009dcSAndroid Build Coastguard Worker	cmp	x25,#0			// ~, remember?
1190*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x8,x14,ne
1191*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x9,x15,ne
1192*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x10,x16,ne
1193*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x11,x17,ne
1194*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x21,#64]
1195*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x21,#64+16]
1196*8fb009dcSAndroid Build Coastguard Worker
1197*8fb009dcSAndroid Build Coastguard Worker	add	sp,x29,#0		// destroy frame
1198*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
1199*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
1200*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
1201*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
1202*8fb009dcSAndroid Build Coastguard Worker	ldp	x29,x30,[sp],#80
1203*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
1204*8fb009dcSAndroid Build Coastguard Worker	ret
1205*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
1206*8fb009dcSAndroid Build Coastguard Worker////////////////////////////////////////////////////////////////////////
1207*8fb009dcSAndroid Build Coastguard Worker// void ecp_nistz256_ord_mul_mont(uint64_t res[4], uint64_t a[4],
1208*8fb009dcSAndroid Build Coastguard Worker//                                uint64_t b[4]);
1209*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_ord_mul_mont
1210*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_ord_mul_mont
1211*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_ord_mul_mont,%function
1212*8fb009dcSAndroid Build Coastguard Worker.align	4
1213*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_ord_mul_mont:
1214*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
1215*8fb009dcSAndroid Build Coastguard Worker	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
1216*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-64]!
1217*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
1218*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
1219*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
1220*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
1221*8fb009dcSAndroid Build Coastguard Worker
1222*8fb009dcSAndroid Build Coastguard Worker	adrp	x23,.Lord
1223*8fb009dcSAndroid Build Coastguard Worker	add	x23,x23,:lo12:.Lord
1224*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2]		// bp[0]
1225*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1]
1226*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#16]
1227*8fb009dcSAndroid Build Coastguard Worker
1228*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x23,#0]
1229*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x23,#16]
1230*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x23,#32]
1231*8fb009dcSAndroid Build Coastguard Worker
1232*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x4,x3		// a[0]*b[0]
1233*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
1234*8fb009dcSAndroid Build Coastguard Worker
1235*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x5,x3		// a[1]*b[0]
1236*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
1237*8fb009dcSAndroid Build Coastguard Worker
1238*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x6,x3		// a[2]*b[0]
1239*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
1240*8fb009dcSAndroid Build Coastguard Worker
1241*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x7,x3		// a[3]*b[0]
1242*8fb009dcSAndroid Build Coastguard Worker	umulh	x19,x7,x3
1243*8fb009dcSAndroid Build Coastguard Worker
1244*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1245*8fb009dcSAndroid Build Coastguard Worker
1246*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts of multiplication
1247*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1248*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1249*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
1250*8fb009dcSAndroid Build Coastguard Worker	mov	x20,xzr
1251*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*1]		// b[i]
1252*8fb009dcSAndroid Build Coastguard Worker
1253*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1254*8fb009dcSAndroid Build Coastguard Worker	subs	x16,x16,x24
1255*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1256*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x8
1257*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,x9
1258*8fb009dcSAndroid Build Coastguard Worker	sbc	x20,x20,xzr
1259*8fb009dcSAndroid Build Coastguard Worker
1260*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1261*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1262*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1263*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1264*8fb009dcSAndroid Build Coastguard Worker
1265*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1266*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3
1267*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1268*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3
1269*8fb009dcSAndroid Build Coastguard Worker
1270*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1271*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3
1272*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1273*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3
1274*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1275*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x24
1276*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
1277*8fb009dcSAndroid Build Coastguard Worker
1278*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts
1279*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
1280*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
1281*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
1282*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
1283*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
1284*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
1285*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3
1286*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
1287*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1288*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts
1289*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1290*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1291*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
1292*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
1293*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*2]		// b[i]
1294*8fb009dcSAndroid Build Coastguard Worker
1295*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1296*8fb009dcSAndroid Build Coastguard Worker	subs	x16,x16,x24
1297*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1298*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x8
1299*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,x9
1300*8fb009dcSAndroid Build Coastguard Worker	sbc	x20,x20,xzr
1301*8fb009dcSAndroid Build Coastguard Worker
1302*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1303*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1304*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1305*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1306*8fb009dcSAndroid Build Coastguard Worker
1307*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1308*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3
1309*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1310*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3
1311*8fb009dcSAndroid Build Coastguard Worker
1312*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1313*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3
1314*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1315*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3
1316*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1317*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x24
1318*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
1319*8fb009dcSAndroid Build Coastguard Worker
1320*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts
1321*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
1322*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
1323*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
1324*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
1325*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
1326*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
1327*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3
1328*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
1329*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1330*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts
1331*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1332*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1333*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
1334*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
1335*8fb009dcSAndroid Build Coastguard Worker	ldr	x3,[x2,#8*3]		// b[i]
1336*8fb009dcSAndroid Build Coastguard Worker
1337*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1338*8fb009dcSAndroid Build Coastguard Worker	subs	x16,x16,x24
1339*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1340*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x8
1341*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,x9
1342*8fb009dcSAndroid Build Coastguard Worker	sbc	x20,x20,xzr
1343*8fb009dcSAndroid Build Coastguard Worker
1344*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1345*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1346*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1347*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1348*8fb009dcSAndroid Build Coastguard Worker
1349*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1350*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x4,x3
1351*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1352*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x3
1353*8fb009dcSAndroid Build Coastguard Worker
1354*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1355*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x3
1356*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1357*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x3
1358*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1359*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x24
1360*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
1361*8fb009dcSAndroid Build Coastguard Worker
1362*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x8		// accumulate low parts
1363*8fb009dcSAndroid Build Coastguard Worker	umulh	x8,x4,x3
1364*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x9
1365*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x3
1366*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x10
1367*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x3
1368*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x11
1369*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x3
1370*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr
1371*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1372*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x8		// accumulate high parts
1373*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1374*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1375*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x11
1376*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr
1377*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32		// last reduction
1378*8fb009dcSAndroid Build Coastguard Worker	subs	x16,x16,x24
1379*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1380*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x17,x8
1381*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,x9
1382*8fb009dcSAndroid Build Coastguard Worker	sbc	x20,x20,xzr
1383*8fb009dcSAndroid Build Coastguard Worker
1384*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1385*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1386*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1387*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1388*8fb009dcSAndroid Build Coastguard Worker
1389*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1390*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1391*8fb009dcSAndroid Build Coastguard Worker
1392*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1393*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1394*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1395*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x19,x24
1396*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x20,xzr
1397*8fb009dcSAndroid Build Coastguard Worker
1398*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,x12		// ret -= modulus
1399*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x13
1400*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,x21
1401*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x22
1402*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x19,xzr
1403*8fb009dcSAndroid Build Coastguard Worker
1404*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x14,x8,lo	// ret = borrow ? ret : ret-modulus
1405*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x15,x9,lo
1406*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x16,x10,lo
1407*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0]
1408*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x17,x11,lo
1409*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#16]
1410*8fb009dcSAndroid Build Coastguard Worker
1411*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#16]
1412*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#32]
1413*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[sp,#48]
1414*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#64
1415*8fb009dcSAndroid Build Coastguard Worker	ret
1416*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
1417*8fb009dcSAndroid Build Coastguard Worker
1418*8fb009dcSAndroid Build Coastguard Worker////////////////////////////////////////////////////////////////////////
1419*8fb009dcSAndroid Build Coastguard Worker// void ecp_nistz256_ord_sqr_mont(uint64_t res[4], uint64_t a[4],
1420*8fb009dcSAndroid Build Coastguard Worker//                                uint64_t rep);
1421*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_ord_sqr_mont
1422*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_ord_sqr_mont
1423*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_ord_sqr_mont,%function
1424*8fb009dcSAndroid Build Coastguard Worker.align	4
1425*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_ord_sqr_mont:
1426*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
1427*8fb009dcSAndroid Build Coastguard Worker	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
1428*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-64]!
1429*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
1430*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
1431*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
1432*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
1433*8fb009dcSAndroid Build Coastguard Worker
1434*8fb009dcSAndroid Build Coastguard Worker	adrp	x23,.Lord
1435*8fb009dcSAndroid Build Coastguard Worker	add	x23,x23,:lo12:.Lord
1436*8fb009dcSAndroid Build Coastguard Worker	ldp	x4,x5,[x1]
1437*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#16]
1438*8fb009dcSAndroid Build Coastguard Worker
1439*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x23,#0]
1440*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x23,#16]
1441*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x23,#32]
1442*8fb009dcSAndroid Build Coastguard Worker	b	.Loop_ord_sqr
1443*8fb009dcSAndroid Build Coastguard Worker
1444*8fb009dcSAndroid Build Coastguard Worker.align	4
1445*8fb009dcSAndroid Build Coastguard Worker.Loop_ord_sqr:
1446*8fb009dcSAndroid Build Coastguard Worker	sub	x2,x2,#1
1447*8fb009dcSAndroid Build Coastguard Worker	////////////////////////////////////////////////////////////////
1448*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |  |  |a1*a0|  |
1449*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |  |a2*a0|  |  |
1450*8fb009dcSAndroid Build Coastguard Worker	//  |  |a3*a2|a3*a0|  |  |  |
1451*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |  |a2*a1|  |  |  |
1452*8fb009dcSAndroid Build Coastguard Worker	//  |  |  |a3*a1|  |  |  |  |
1453*8fb009dcSAndroid Build Coastguard Worker	// *|  |  |  |  |  |  |  | 2|
1454*8fb009dcSAndroid Build Coastguard Worker	// +|a3*a3|a2*a2|a1*a1|a0*a0|
1455*8fb009dcSAndroid Build Coastguard Worker	//  |--+--+--+--+--+--+--+--|
1456*8fb009dcSAndroid Build Coastguard Worker	//  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow
1457*8fb009dcSAndroid Build Coastguard Worker	//
1458*8fb009dcSAndroid Build Coastguard Worker	//  "can't overflow" below mark carrying into high part of
1459*8fb009dcSAndroid Build Coastguard Worker	//  multiplication result, which can't overflow, because it
1460*8fb009dcSAndroid Build Coastguard Worker	//  can never be all ones.
1461*8fb009dcSAndroid Build Coastguard Worker
1462*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x5,x4		// a[1]*a[0]
1463*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x5,x4
1464*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x6,x4		// a[2]*a[0]
1465*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x4
1466*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x7,x4		// a[3]*a[0]
1467*8fb009dcSAndroid Build Coastguard Worker	umulh	x19,x7,x4
1468*8fb009dcSAndroid Build Coastguard Worker
1469*8fb009dcSAndroid Build Coastguard Worker	adds	x16,x16,x9		// accumulate high parts of multiplication
1470*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x6,x5		// a[2]*a[1]
1471*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x6,x5
1472*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x10
1473*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x7,x5		// a[3]*a[1]
1474*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x5
1475*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr		// can't overflow
1476*8fb009dcSAndroid Build Coastguard Worker
1477*8fb009dcSAndroid Build Coastguard Worker	mul	x20,x7,x6		// a[3]*a[2]
1478*8fb009dcSAndroid Build Coastguard Worker	umulh	x1,x7,x6
1479*8fb009dcSAndroid Build Coastguard Worker
1480*8fb009dcSAndroid Build Coastguard Worker	adds	x9,x9,x10		// accumulate high parts of multiplication
1481*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x4,x4		// a[0]*a[0]
1482*8fb009dcSAndroid Build Coastguard Worker	adc	x10,x11,xzr		// can't overflow
1483*8fb009dcSAndroid Build Coastguard Worker
1484*8fb009dcSAndroid Build Coastguard Worker	adds	x17,x17,x8		// accumulate low parts of multiplication
1485*8fb009dcSAndroid Build Coastguard Worker	umulh	x4,x4,x4
1486*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x9
1487*8fb009dcSAndroid Build Coastguard Worker	mul	x9,x5,x5		// a[1]*a[1]
1488*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x10
1489*8fb009dcSAndroid Build Coastguard Worker	umulh	x5,x5,x5
1490*8fb009dcSAndroid Build Coastguard Worker	adc	x1,x1,xzr		// can't overflow
1491*8fb009dcSAndroid Build Coastguard Worker
1492*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x15	// acc[1-6]*=2
1493*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x6		// a[2]*a[2]
1494*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x16
1495*8fb009dcSAndroid Build Coastguard Worker	umulh	x6,x6,x6
1496*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x17
1497*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x7		// a[3]*a[3]
1498*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x19
1499*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x7
1500*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x20
1501*8fb009dcSAndroid Build Coastguard Worker	adcs	x1,x1,x1
1502*8fb009dcSAndroid Build Coastguard Worker	adc	x3,xzr,xzr
1503*8fb009dcSAndroid Build Coastguard Worker
1504*8fb009dcSAndroid Build Coastguard Worker	adds	x15,x15,x4		// +a[i]*a[i]
1505*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1506*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x9
1507*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x5
1508*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x10
1509*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x6
1510*8fb009dcSAndroid Build Coastguard Worker	adcs	x1,x1,x11
1511*8fb009dcSAndroid Build Coastguard Worker	adc	x3,x3,x7
1512*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1513*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1514*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1515*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1516*8fb009dcSAndroid Build Coastguard Worker
1517*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1518*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1519*8fb009dcSAndroid Build Coastguard Worker
1520*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1521*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1522*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1523*8fb009dcSAndroid Build Coastguard Worker	adc	x17,xzr,x24		// can't overflow
1524*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x14,x23
1525*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1526*8fb009dcSAndroid Build Coastguard Worker	subs	x15,x15,x24
1527*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1528*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x8
1529*8fb009dcSAndroid Build Coastguard Worker	sbc	x17,x17,x9		// can't borrow
1530*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1531*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x11
1532*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x11
1533*8fb009dcSAndroid Build Coastguard Worker	umulh	x24,x13,x11
1534*8fb009dcSAndroid Build Coastguard Worker
1535*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1536*8fb009dcSAndroid Build Coastguard Worker	adc	x24,x24,xzr
1537*8fb009dcSAndroid Build Coastguard Worker
1538*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1539*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x24
1540*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x11
1541*8fb009dcSAndroid Build Coastguard Worker	adc	x17,xzr,x11		// can't overflow
1542*8fb009dcSAndroid Build Coastguard Worker	mul	x24,x14,x23
1543*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x11,#32
1544*8fb009dcSAndroid Build Coastguard Worker	subs	x15,x15,x11
1545*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x11,#32
1546*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x8
1547*8fb009dcSAndroid Build Coastguard Worker	sbc	x17,x17,x9		// can't borrow
1548*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1549*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x24
1550*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x24
1551*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x13,x24
1552*8fb009dcSAndroid Build Coastguard Worker
1553*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1554*8fb009dcSAndroid Build Coastguard Worker	adc	x11,x11,xzr
1555*8fb009dcSAndroid Build Coastguard Worker
1556*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1557*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x11
1558*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x24
1559*8fb009dcSAndroid Build Coastguard Worker	adc	x17,xzr,x24		// can't overflow
1560*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x14,x23
1561*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x24,#32
1562*8fb009dcSAndroid Build Coastguard Worker	subs	x15,x15,x24
1563*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x24,#32
1564*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x8
1565*8fb009dcSAndroid Build Coastguard Worker	sbc	x17,x17,x9		// can't borrow
1566*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x14,#1
1567*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x12,x11
1568*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x13,x11
1569*8fb009dcSAndroid Build Coastguard Worker	umulh	x24,x13,x11
1570*8fb009dcSAndroid Build Coastguard Worker
1571*8fb009dcSAndroid Build Coastguard Worker	adcs	x10,x10,x9
1572*8fb009dcSAndroid Build Coastguard Worker	adc	x24,x24,xzr
1573*8fb009dcSAndroid Build Coastguard Worker
1574*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x15,x10
1575*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x16,x24
1576*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x17,x11
1577*8fb009dcSAndroid Build Coastguard Worker	adc	x17,xzr,x11		// can't overflow
1578*8fb009dcSAndroid Build Coastguard Worker	lsl	x8,x11,#32
1579*8fb009dcSAndroid Build Coastguard Worker	subs	x15,x15,x11
1580*8fb009dcSAndroid Build Coastguard Worker	lsr	x9,x11,#32
1581*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x16,x8
1582*8fb009dcSAndroid Build Coastguard Worker	sbc	x17,x17,x9		// can't borrow
1583*8fb009dcSAndroid Build Coastguard Worker	adds	x14,x14,x19	// accumulate upper half
1584*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x15,x20
1585*8fb009dcSAndroid Build Coastguard Worker	adcs	x16,x16,x1
1586*8fb009dcSAndroid Build Coastguard Worker	adcs	x17,x17,x3
1587*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr
1588*8fb009dcSAndroid Build Coastguard Worker
1589*8fb009dcSAndroid Build Coastguard Worker	subs	x8,x14,x12		// ret -= modulus
1590*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x15,x13
1591*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x16,x21
1592*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x17,x22
1593*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x19,xzr
1594*8fb009dcSAndroid Build Coastguard Worker
1595*8fb009dcSAndroid Build Coastguard Worker	csel	x4,x14,x8,lo	// ret = borrow ? ret : ret-modulus
1596*8fb009dcSAndroid Build Coastguard Worker	csel	x5,x15,x9,lo
1597*8fb009dcSAndroid Build Coastguard Worker	csel	x6,x16,x10,lo
1598*8fb009dcSAndroid Build Coastguard Worker	csel	x7,x17,x11,lo
1599*8fb009dcSAndroid Build Coastguard Worker
1600*8fb009dcSAndroid Build Coastguard Worker	cbnz	x2,.Loop_ord_sqr
1601*8fb009dcSAndroid Build Coastguard Worker
1602*8fb009dcSAndroid Build Coastguard Worker	stp	x4,x5,[x0]
1603*8fb009dcSAndroid Build Coastguard Worker	stp	x6,x7,[x0,#16]
1604*8fb009dcSAndroid Build Coastguard Worker
1605*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#16]
1606*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#32]
1607*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[sp,#48]
1608*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#64
1609*8fb009dcSAndroid Build Coastguard Worker	ret
1610*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
1611*8fb009dcSAndroid Build Coastguard Worker////////////////////////////////////////////////////////////////////////
1612*8fb009dcSAndroid Build Coastguard Worker// void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index);
1613*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_select_w5
1614*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_select_w5
1615*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_select_w5,%function
1616*8fb009dcSAndroid Build Coastguard Worker.align	4
1617*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_select_w5:
1618*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
1619*8fb009dcSAndroid Build Coastguard Worker
1620*8fb009dcSAndroid Build Coastguard Worker    // x10 := x0
1621*8fb009dcSAndroid Build Coastguard Worker    // w9 := 0; loop counter and incremented internal index
1622*8fb009dcSAndroid Build Coastguard Worker	mov	x10, x0
1623*8fb009dcSAndroid Build Coastguard Worker	mov	w9, #0
1624*8fb009dcSAndroid Build Coastguard Worker
1625*8fb009dcSAndroid Build Coastguard Worker    // [v16-v21] := 0
1626*8fb009dcSAndroid Build Coastguard Worker	movi	v16.16b, #0
1627*8fb009dcSAndroid Build Coastguard Worker	movi	v17.16b, #0
1628*8fb009dcSAndroid Build Coastguard Worker	movi	v18.16b, #0
1629*8fb009dcSAndroid Build Coastguard Worker	movi	v19.16b, #0
1630*8fb009dcSAndroid Build Coastguard Worker	movi	v20.16b, #0
1631*8fb009dcSAndroid Build Coastguard Worker	movi	v21.16b, #0
1632*8fb009dcSAndroid Build Coastguard Worker
1633*8fb009dcSAndroid Build Coastguard Worker.Lselect_w5_loop:
1634*8fb009dcSAndroid Build Coastguard Worker    // Loop 16 times.
1635*8fb009dcSAndroid Build Coastguard Worker
1636*8fb009dcSAndroid Build Coastguard Worker    // Increment index (loop counter); tested at the end of the loop
1637*8fb009dcSAndroid Build Coastguard Worker	add	w9, w9, #1
1638*8fb009dcSAndroid Build Coastguard Worker
1639*8fb009dcSAndroid Build Coastguard Worker    // [v22-v27] := Load a (3*256-bit = 6*128-bit) table entry starting at x1
1640*8fb009dcSAndroid Build Coastguard Worker    //  and advance x1 to point to the next entry
1641*8fb009dcSAndroid Build Coastguard Worker	ld1	{v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64
1642*8fb009dcSAndroid Build Coastguard Worker
1643*8fb009dcSAndroid Build Coastguard Worker    // x11 := (w9 == w2)? All 1s : All 0s
1644*8fb009dcSAndroid Build Coastguard Worker	cmp	w9, w2
1645*8fb009dcSAndroid Build Coastguard Worker	csetm	x11, eq
1646*8fb009dcSAndroid Build Coastguard Worker
1647*8fb009dcSAndroid Build Coastguard Worker    // continue loading ...
1648*8fb009dcSAndroid Build Coastguard Worker	ld1	{v26.2d, v27.2d}, [x1],#32
1649*8fb009dcSAndroid Build Coastguard Worker
1650*8fb009dcSAndroid Build Coastguard Worker    // duplicate mask_64 into Mask (all 0s or all 1s)
1651*8fb009dcSAndroid Build Coastguard Worker	dup	v3.2d, x11
1652*8fb009dcSAndroid Build Coastguard Worker
1653*8fb009dcSAndroid Build Coastguard Worker    // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19]
1654*8fb009dcSAndroid Build Coastguard Worker    // i.e., values in output registers will remain the same if w9 != w2
1655*8fb009dcSAndroid Build Coastguard Worker	bit	v16.16b, v22.16b, v3.16b
1656*8fb009dcSAndroid Build Coastguard Worker	bit	v17.16b, v23.16b, v3.16b
1657*8fb009dcSAndroid Build Coastguard Worker
1658*8fb009dcSAndroid Build Coastguard Worker	bit	v18.16b, v24.16b, v3.16b
1659*8fb009dcSAndroid Build Coastguard Worker	bit	v19.16b, v25.16b, v3.16b
1660*8fb009dcSAndroid Build Coastguard Worker
1661*8fb009dcSAndroid Build Coastguard Worker	bit	v20.16b, v26.16b, v3.16b
1662*8fb009dcSAndroid Build Coastguard Worker	bit	v21.16b, v27.16b, v3.16b
1663*8fb009dcSAndroid Build Coastguard Worker
1664*8fb009dcSAndroid Build Coastguard Worker    // If bit #4 is not 0 (i.e. idx_ctr < 16) loop back
1665*8fb009dcSAndroid Build Coastguard Worker	tbz	w9, #4, .Lselect_w5_loop
1666*8fb009dcSAndroid Build Coastguard Worker
1667*8fb009dcSAndroid Build Coastguard Worker    // Write [v16-v21] to memory at the output pointer
1668*8fb009dcSAndroid Build Coastguard Worker	st1	{v16.2d, v17.2d, v18.2d, v19.2d}, [x10],#64
1669*8fb009dcSAndroid Build Coastguard Worker	st1	{v20.2d, v21.2d}, [x10]
1670*8fb009dcSAndroid Build Coastguard Worker
1671*8fb009dcSAndroid Build Coastguard Worker	ret
1672*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
1673*8fb009dcSAndroid Build Coastguard Worker
1674*8fb009dcSAndroid Build Coastguard Worker
1675*8fb009dcSAndroid Build Coastguard Worker////////////////////////////////////////////////////////////////////////
1676*8fb009dcSAndroid Build Coastguard Worker// void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index);
1677*8fb009dcSAndroid Build Coastguard Worker.globl	ecp_nistz256_select_w7
1678*8fb009dcSAndroid Build Coastguard Worker.hidden	ecp_nistz256_select_w7
1679*8fb009dcSAndroid Build Coastguard Worker.type	ecp_nistz256_select_w7,%function
1680*8fb009dcSAndroid Build Coastguard Worker.align	4
1681*8fb009dcSAndroid Build Coastguard Workerecp_nistz256_select_w7:
1682*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
1683*8fb009dcSAndroid Build Coastguard Worker
1684*8fb009dcSAndroid Build Coastguard Worker    // w9 := 0; loop counter and incremented internal index
1685*8fb009dcSAndroid Build Coastguard Worker	mov	w9, #0
1686*8fb009dcSAndroid Build Coastguard Worker
1687*8fb009dcSAndroid Build Coastguard Worker    // [v16-v21] := 0
1688*8fb009dcSAndroid Build Coastguard Worker	movi	v16.16b, #0
1689*8fb009dcSAndroid Build Coastguard Worker	movi	v17.16b, #0
1690*8fb009dcSAndroid Build Coastguard Worker	movi	v18.16b, #0
1691*8fb009dcSAndroid Build Coastguard Worker	movi	v19.16b, #0
1692*8fb009dcSAndroid Build Coastguard Worker
1693*8fb009dcSAndroid Build Coastguard Worker.Lselect_w7_loop:
1694*8fb009dcSAndroid Build Coastguard Worker    // Loop 64 times.
1695*8fb009dcSAndroid Build Coastguard Worker
1696*8fb009dcSAndroid Build Coastguard Worker    // Increment index (loop counter); tested at the end of the loop
1697*8fb009dcSAndroid Build Coastguard Worker	add	w9, w9, #1
1698*8fb009dcSAndroid Build Coastguard Worker
1699*8fb009dcSAndroid Build Coastguard Worker    // [v22-v25] := Load a (2*256-bit = 4*128-bit) table entry starting at x1
1700*8fb009dcSAndroid Build Coastguard Worker    //  and advance x1 to point to the next entry
1701*8fb009dcSAndroid Build Coastguard Worker	ld1	{v22.2d, v23.2d, v24.2d, v25.2d}, [x1],#64
1702*8fb009dcSAndroid Build Coastguard Worker
1703*8fb009dcSAndroid Build Coastguard Worker    // x11 := (w9 == w2)? All 1s : All 0s
1704*8fb009dcSAndroid Build Coastguard Worker	cmp	w9, w2
1705*8fb009dcSAndroid Build Coastguard Worker	csetm	x11, eq
1706*8fb009dcSAndroid Build Coastguard Worker
1707*8fb009dcSAndroid Build Coastguard Worker    // duplicate mask_64 into Mask (all 0s or all 1s)
1708*8fb009dcSAndroid Build Coastguard Worker	dup	v3.2d, x11
1709*8fb009dcSAndroid Build Coastguard Worker
1710*8fb009dcSAndroid Build Coastguard Worker    // [v16-v19] := (Mask == all 1s)? [v22-v25] : [v16-v19]
1711*8fb009dcSAndroid Build Coastguard Worker    // i.e., values in output registers will remain the same if w9 != w2
1712*8fb009dcSAndroid Build Coastguard Worker	bit	v16.16b, v22.16b, v3.16b
1713*8fb009dcSAndroid Build Coastguard Worker	bit	v17.16b, v23.16b, v3.16b
1714*8fb009dcSAndroid Build Coastguard Worker
1715*8fb009dcSAndroid Build Coastguard Worker	bit	v18.16b, v24.16b, v3.16b
1716*8fb009dcSAndroid Build Coastguard Worker	bit	v19.16b, v25.16b, v3.16b
1717*8fb009dcSAndroid Build Coastguard Worker
1718*8fb009dcSAndroid Build Coastguard Worker    // If bit #6 is not 0 (i.e. idx_ctr < 64) loop back
1719*8fb009dcSAndroid Build Coastguard Worker	tbz	w9, #6, .Lselect_w7_loop
1720*8fb009dcSAndroid Build Coastguard Worker
1721*8fb009dcSAndroid Build Coastguard Worker    // Write [v16-v19] to memory at the output pointer
1722*8fb009dcSAndroid Build Coastguard Worker	st1	{v16.2d, v17.2d, v18.2d, v19.2d}, [x0]
1723*8fb009dcSAndroid Build Coastguard Worker
1724*8fb009dcSAndroid Build Coastguard Worker	ret
1725*8fb009dcSAndroid Build Coastguard Worker.size	ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
1726*8fb009dcSAndroid Build Coastguard Worker#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)
1727