xref: /aosp_15_r20/external/boringssl/src/gen/bcm/armv8-mont-linux.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1*8fb009dcSAndroid Build Coastguard Worker// This file is generated from a similarly-named Perl script in the BoringSSL
2*8fb009dcSAndroid Build Coastguard Worker// source tree. Do not edit by hand.
3*8fb009dcSAndroid Build Coastguard Worker
4*8fb009dcSAndroid Build Coastguard Worker#include <openssl/asm_base.h>
5*8fb009dcSAndroid Build Coastguard Worker
6*8fb009dcSAndroid Build Coastguard Worker#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
7*8fb009dcSAndroid Build Coastguard Worker#include <openssl/arm_arch.h>
8*8fb009dcSAndroid Build Coastguard Worker
9*8fb009dcSAndroid Build Coastguard Worker.text
10*8fb009dcSAndroid Build Coastguard Worker
11*8fb009dcSAndroid Build Coastguard Worker.globl	bn_mul_mont
12*8fb009dcSAndroid Build Coastguard Worker.hidden	bn_mul_mont
13*8fb009dcSAndroid Build Coastguard Worker.type	bn_mul_mont,%function
14*8fb009dcSAndroid Build Coastguard Worker.align	5
15*8fb009dcSAndroid Build Coastguard Workerbn_mul_mont:
16*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
17*8fb009dcSAndroid Build Coastguard Worker	tst	x5,#7
18*8fb009dcSAndroid Build Coastguard Worker	b.eq	__bn_sqr8x_mont
19*8fb009dcSAndroid Build Coastguard Worker	tst	x5,#3
20*8fb009dcSAndroid Build Coastguard Worker	b.eq	__bn_mul4x_mont
21*8fb009dcSAndroid Build Coastguard Worker.Lmul_mont:
22*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-64]!
23*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
24*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
25*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
26*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
27*8fb009dcSAndroid Build Coastguard Worker
28*8fb009dcSAndroid Build Coastguard Worker	ldr	x9,[x2],#8		// bp[0]
29*8fb009dcSAndroid Build Coastguard Worker	sub	x22,sp,x5,lsl#3
30*8fb009dcSAndroid Build Coastguard Worker	ldp	x7,x8,[x1],#16	// ap[0..1]
31*8fb009dcSAndroid Build Coastguard Worker	lsl	x5,x5,#3
32*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x4]		// *n0
33*8fb009dcSAndroid Build Coastguard Worker	and	x22,x22,#-16		// ABI says so
34*8fb009dcSAndroid Build Coastguard Worker	ldp	x13,x14,[x3],#16	// np[0..1]
35*8fb009dcSAndroid Build Coastguard Worker
36*8fb009dcSAndroid Build Coastguard Worker	mul	x6,x7,x9		// ap[0]*bp[0]
37*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x5,#16		// j=num-2
38*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x9
39*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x8,x9		// ap[1]*bp[0]
40*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x8,x9
41*8fb009dcSAndroid Build Coastguard Worker
42*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x6,x4		// "tp[0]"*n0
43*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x22			// alloca
44*8fb009dcSAndroid Build Coastguard Worker
45*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x12,x13,x15	// np[0]*m1
46*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x13,x15
47*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x14,x15		// np[1]*m1
48*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	x12,x12,x6	// discarded
49*8fb009dcSAndroid Build Coastguard Worker	// (*)	As for removal of first multiplication and addition
50*8fb009dcSAndroid Build Coastguard Worker	//	instructions. The outcome of first addition is
51*8fb009dcSAndroid Build Coastguard Worker	//	guaranteed to be zero, which leaves two computationally
52*8fb009dcSAndroid Build Coastguard Worker	//	significant outcomes: it either carries or not. Then
53*8fb009dcSAndroid Build Coastguard Worker	//	question is when does it carry? Is there alternative
54*8fb009dcSAndroid Build Coastguard Worker	//	way to deduce it? If you follow operations, you can
55*8fb009dcSAndroid Build Coastguard Worker	//	observe that condition for carry is quite simple:
56*8fb009dcSAndroid Build Coastguard Worker	//	x6 being non-zero. So that carry can be calculated
57*8fb009dcSAndroid Build Coastguard Worker	//	by adding -1 to x6. That's what next instruction does.
58*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x6,#1		// (*)
59*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x14,x15
60*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x13,xzr
61*8fb009dcSAndroid Build Coastguard Worker	cbz	x21,.L1st_skip
62*8fb009dcSAndroid Build Coastguard Worker
63*8fb009dcSAndroid Build Coastguard Worker.L1st:
64*8fb009dcSAndroid Build Coastguard Worker	ldr	x8,[x1],#8
65*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x10,x7
66*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x21,#8		// j--
67*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x11,xzr
68*8fb009dcSAndroid Build Coastguard Worker
69*8fb009dcSAndroid Build Coastguard Worker	ldr	x14,[x3],#8
70*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x16,x13
71*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x8,x9		// ap[j]*bp[0]
72*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x17,xzr
73*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x8,x9
74*8fb009dcSAndroid Build Coastguard Worker
75*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x12,x6
76*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x14,x15		// np[j]*m1
77*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x13,xzr
78*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x14,x15
79*8fb009dcSAndroid Build Coastguard Worker	str	x12,[x22],#8		// tp[j-1]
80*8fb009dcSAndroid Build Coastguard Worker	cbnz	x21,.L1st
81*8fb009dcSAndroid Build Coastguard Worker
82*8fb009dcSAndroid Build Coastguard Worker.L1st_skip:
83*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x10,x7
84*8fb009dcSAndroid Build Coastguard Worker	sub	x1,x1,x5		// rewind x1
85*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x11,xzr
86*8fb009dcSAndroid Build Coastguard Worker
87*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x16,x13
88*8fb009dcSAndroid Build Coastguard Worker	sub	x3,x3,x5		// rewind x3
89*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x17,xzr
90*8fb009dcSAndroid Build Coastguard Worker
91*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x12,x6
92*8fb009dcSAndroid Build Coastguard Worker	sub	x20,x5,#8		// i=num-1
93*8fb009dcSAndroid Build Coastguard Worker	adcs	x13,x13,x7
94*8fb009dcSAndroid Build Coastguard Worker
95*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr		// upmost overflow bit
96*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x22]
97*8fb009dcSAndroid Build Coastguard Worker
98*8fb009dcSAndroid Build Coastguard Worker.Louter:
99*8fb009dcSAndroid Build Coastguard Worker	ldr	x9,[x2],#8		// bp[i]
100*8fb009dcSAndroid Build Coastguard Worker	ldp	x7,x8,[x1],#16
101*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[sp]		// tp[0]
102*8fb009dcSAndroid Build Coastguard Worker	add	x22,sp,#8
103*8fb009dcSAndroid Build Coastguard Worker
104*8fb009dcSAndroid Build Coastguard Worker	mul	x6,x7,x9		// ap[0]*bp[i]
105*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x5,#16		// j=num-2
106*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x9
107*8fb009dcSAndroid Build Coastguard Worker	ldp	x13,x14,[x3],#16
108*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x8,x9		// ap[1]*bp[i]
109*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x6,x23
110*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x8,x9
111*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x7,xzr
112*8fb009dcSAndroid Build Coastguard Worker
113*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x6,x4
114*8fb009dcSAndroid Build Coastguard Worker	sub	x20,x20,#8		// i--
115*8fb009dcSAndroid Build Coastguard Worker
116*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x12,x13,x15	// np[0]*m1
117*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x13,x15
118*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x14,x15		// np[1]*m1
119*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	x12,x12,x6
120*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x6,#1		// (*)
121*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x14,x15
122*8fb009dcSAndroid Build Coastguard Worker	cbz	x21,.Linner_skip
123*8fb009dcSAndroid Build Coastguard Worker
124*8fb009dcSAndroid Build Coastguard Worker.Linner:
125*8fb009dcSAndroid Build Coastguard Worker	ldr	x8,[x1],#8
126*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x13,xzr
127*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x22],#8		// tp[j]
128*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x10,x7
129*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x21,#8		// j--
130*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x11,xzr
131*8fb009dcSAndroid Build Coastguard Worker
132*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x16,x13
133*8fb009dcSAndroid Build Coastguard Worker	ldr	x14,[x3],#8
134*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x17,xzr
135*8fb009dcSAndroid Build Coastguard Worker
136*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x8,x9		// ap[j]*bp[i]
137*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x6,x23
138*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x8,x9
139*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x7,xzr
140*8fb009dcSAndroid Build Coastguard Worker
141*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x14,x15		// np[j]*m1
142*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x12,x6
143*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x14,x15
144*8fb009dcSAndroid Build Coastguard Worker	str	x12,[x22,#-16]		// tp[j-1]
145*8fb009dcSAndroid Build Coastguard Worker	cbnz	x21,.Linner
146*8fb009dcSAndroid Build Coastguard Worker
147*8fb009dcSAndroid Build Coastguard Worker.Linner_skip:
148*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x22],#8		// tp[j]
149*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x13,xzr
150*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x10,x7
151*8fb009dcSAndroid Build Coastguard Worker	sub	x1,x1,x5		// rewind x1
152*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x11,xzr
153*8fb009dcSAndroid Build Coastguard Worker
154*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x16,x13
155*8fb009dcSAndroid Build Coastguard Worker	sub	x3,x3,x5		// rewind x3
156*8fb009dcSAndroid Build Coastguard Worker	adcs	x13,x17,x19
157*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr
158*8fb009dcSAndroid Build Coastguard Worker
159*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x6,x23
160*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x7,xzr
161*8fb009dcSAndroid Build Coastguard Worker
162*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x12,x6
163*8fb009dcSAndroid Build Coastguard Worker	adcs	x13,x13,x7
164*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr		// upmost overflow bit
165*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x22,#-16]
166*8fb009dcSAndroid Build Coastguard Worker
167*8fb009dcSAndroid Build Coastguard Worker	cbnz	x20,.Louter
168*8fb009dcSAndroid Build Coastguard Worker
169*8fb009dcSAndroid Build Coastguard Worker	// Final step. We see if result is larger than modulus, and
170*8fb009dcSAndroid Build Coastguard Worker	// if it is, subtract the modulus. But comparison implies
171*8fb009dcSAndroid Build Coastguard Worker	// subtraction. So we subtract modulus, see if it borrowed,
172*8fb009dcSAndroid Build Coastguard Worker	// and conditionally copy original value.
173*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[sp]		// tp[0]
174*8fb009dcSAndroid Build Coastguard Worker	add	x22,sp,#8
175*8fb009dcSAndroid Build Coastguard Worker	ldr	x14,[x3],#8		// np[0]
176*8fb009dcSAndroid Build Coastguard Worker	subs	x21,x5,#8		// j=num-1 and clear borrow
177*8fb009dcSAndroid Build Coastguard Worker	mov	x1,x0
178*8fb009dcSAndroid Build Coastguard Worker.Lsub:
179*8fb009dcSAndroid Build Coastguard Worker	sbcs	x8,x23,x14		// tp[j]-np[j]
180*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x22],#8
181*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x21,#8		// j--
182*8fb009dcSAndroid Build Coastguard Worker	ldr	x14,[x3],#8
183*8fb009dcSAndroid Build Coastguard Worker	str	x8,[x1],#8		// rp[j]=tp[j]-np[j]
184*8fb009dcSAndroid Build Coastguard Worker	cbnz	x21,.Lsub
185*8fb009dcSAndroid Build Coastguard Worker
186*8fb009dcSAndroid Build Coastguard Worker	sbcs	x8,x23,x14
187*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,xzr		// did it borrow?
188*8fb009dcSAndroid Build Coastguard Worker	str	x8,[x1],#8		// rp[num-1]
189*8fb009dcSAndroid Build Coastguard Worker
190*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[sp]		// tp[0]
191*8fb009dcSAndroid Build Coastguard Worker	add	x22,sp,#8
192*8fb009dcSAndroid Build Coastguard Worker	ldr	x8,[x0],#8		// rp[0]
193*8fb009dcSAndroid Build Coastguard Worker	sub	x5,x5,#8		// num--
194*8fb009dcSAndroid Build Coastguard Worker	nop
195*8fb009dcSAndroid Build Coastguard Worker.Lcond_copy:
196*8fb009dcSAndroid Build Coastguard Worker	sub	x5,x5,#8		// num--
197*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x23,x8,lo		// did it borrow?
198*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x22],#8
199*8fb009dcSAndroid Build Coastguard Worker	ldr	x8,[x0],#8
200*8fb009dcSAndroid Build Coastguard Worker	str	xzr,[x22,#-16]		// wipe tp
201*8fb009dcSAndroid Build Coastguard Worker	str	x14,[x0,#-16]
202*8fb009dcSAndroid Build Coastguard Worker	cbnz	x5,.Lcond_copy
203*8fb009dcSAndroid Build Coastguard Worker
204*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x23,x8,lo
205*8fb009dcSAndroid Build Coastguard Worker	str	xzr,[x22,#-8]		// wipe tp
206*8fb009dcSAndroid Build Coastguard Worker	str	x14,[x0,#-8]
207*8fb009dcSAndroid Build Coastguard Worker
208*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
209*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x29
210*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
211*8fb009dcSAndroid Build Coastguard Worker	mov	x0,#1
212*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
213*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#64
214*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
215*8fb009dcSAndroid Build Coastguard Worker	ret
216*8fb009dcSAndroid Build Coastguard Worker.size	bn_mul_mont,.-bn_mul_mont
217*8fb009dcSAndroid Build Coastguard Worker.type	__bn_sqr8x_mont,%function
218*8fb009dcSAndroid Build Coastguard Worker.align	5
219*8fb009dcSAndroid Build Coastguard Worker__bn_sqr8x_mont:
220*8fb009dcSAndroid Build Coastguard Worker	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
221*8fb009dcSAndroid Build Coastguard Worker	// only from bn_mul_mont which has already signed the return address.
222*8fb009dcSAndroid Build Coastguard Worker	cmp	x1,x2
223*8fb009dcSAndroid Build Coastguard Worker	b.ne	__bn_mul4x_mont
224*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_mont:
225*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-128]!
226*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
227*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
228*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
229*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
230*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[sp,#64]
231*8fb009dcSAndroid Build Coastguard Worker	stp	x27,x28,[sp,#80]
232*8fb009dcSAndroid Build Coastguard Worker	stp	x0,x3,[sp,#96]	// offload rp and np
233*8fb009dcSAndroid Build Coastguard Worker
234*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
235*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
236*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
237*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
238*8fb009dcSAndroid Build Coastguard Worker
239*8fb009dcSAndroid Build Coastguard Worker	sub	x2,sp,x5,lsl#4
240*8fb009dcSAndroid Build Coastguard Worker	lsl	x5,x5,#3
241*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x4]		// *n0
242*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x2			// alloca
243*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x5,#8*8
244*8fb009dcSAndroid Build Coastguard Worker	b	.Lsqr8x_zero_start
245*8fb009dcSAndroid Build Coastguard Worker
246*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_zero:
247*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#8*8
248*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*0]
249*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*2]
250*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*4]
251*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*6]
252*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_zero_start:
253*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*8]
254*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*10]
255*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*12]
256*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*14]
257*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*16
258*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,.Lsqr8x_zero
259*8fb009dcSAndroid Build Coastguard Worker
260*8fb009dcSAndroid Build Coastguard Worker	add	x3,x1,x5
261*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
262*8fb009dcSAndroid Build Coastguard Worker	mov	x19,xzr
263*8fb009dcSAndroid Build Coastguard Worker	mov	x20,xzr
264*8fb009dcSAndroid Build Coastguard Worker	mov	x21,xzr
265*8fb009dcSAndroid Build Coastguard Worker	mov	x22,xzr
266*8fb009dcSAndroid Build Coastguard Worker	mov	x23,xzr
267*8fb009dcSAndroid Build Coastguard Worker	mov	x24,xzr
268*8fb009dcSAndroid Build Coastguard Worker	mov	x25,xzr
269*8fb009dcSAndroid Build Coastguard Worker	mov	x26,xzr
270*8fb009dcSAndroid Build Coastguard Worker	mov	x2,sp
271*8fb009dcSAndroid Build Coastguard Worker	str	x4,[x29,#112]		// offload n0
272*8fb009dcSAndroid Build Coastguard Worker
273*8fb009dcSAndroid Build Coastguard Worker	// Multiply everything but a[i]*a[i]
274*8fb009dcSAndroid Build Coastguard Worker.align	4
275*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_outer_loop:
276*8fb009dcSAndroid Build Coastguard Worker        //                                                 a[1]a[0]	(i)
277*8fb009dcSAndroid Build Coastguard Worker        //                                             a[2]a[0]
278*8fb009dcSAndroid Build Coastguard Worker        //                                         a[3]a[0]
279*8fb009dcSAndroid Build Coastguard Worker        //                                     a[4]a[0]
280*8fb009dcSAndroid Build Coastguard Worker        //                                 a[5]a[0]
281*8fb009dcSAndroid Build Coastguard Worker        //                             a[6]a[0]
282*8fb009dcSAndroid Build Coastguard Worker        //                         a[7]a[0]
283*8fb009dcSAndroid Build Coastguard Worker        //                                         a[2]a[1]		(ii)
284*8fb009dcSAndroid Build Coastguard Worker        //                                     a[3]a[1]
285*8fb009dcSAndroid Build Coastguard Worker        //                                 a[4]a[1]
286*8fb009dcSAndroid Build Coastguard Worker        //                             a[5]a[1]
287*8fb009dcSAndroid Build Coastguard Worker        //                         a[6]a[1]
288*8fb009dcSAndroid Build Coastguard Worker        //                     a[7]a[1]
289*8fb009dcSAndroid Build Coastguard Worker        //                                 a[3]a[2]			(iii)
290*8fb009dcSAndroid Build Coastguard Worker        //                             a[4]a[2]
291*8fb009dcSAndroid Build Coastguard Worker        //                         a[5]a[2]
292*8fb009dcSAndroid Build Coastguard Worker        //                     a[6]a[2]
293*8fb009dcSAndroid Build Coastguard Worker        //                 a[7]a[2]
294*8fb009dcSAndroid Build Coastguard Worker        //                         a[4]a[3]				(iv)
295*8fb009dcSAndroid Build Coastguard Worker        //                     a[5]a[3]
296*8fb009dcSAndroid Build Coastguard Worker        //                 a[6]a[3]
297*8fb009dcSAndroid Build Coastguard Worker        //             a[7]a[3]
298*8fb009dcSAndroid Build Coastguard Worker        //                 a[5]a[4]					(v)
299*8fb009dcSAndroid Build Coastguard Worker        //             a[6]a[4]
300*8fb009dcSAndroid Build Coastguard Worker        //         a[7]a[4]
301*8fb009dcSAndroid Build Coastguard Worker        //         a[6]a[5]						(vi)
302*8fb009dcSAndroid Build Coastguard Worker        //     a[7]a[5]
303*8fb009dcSAndroid Build Coastguard Worker        // a[7]a[6]							(vii)
304*8fb009dcSAndroid Build Coastguard Worker
305*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x7,x6		// lo(a[1..7]*a[0])		(i)
306*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x8,x6
307*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x9,x6
308*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x10,x6
309*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x14		// t[1]+lo(a[1]*a[0])
310*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x11,x6
311*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x15
312*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x12,x6
313*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x16
314*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x13,x6
315*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x17
316*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x7,x6		// hi(a[1..7]*a[0])
317*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x14
318*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x8,x6
319*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x15
320*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x9,x6
321*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x16
322*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x10,x6
323*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2],#8*2	// t[0..1]
324*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr		// t[8]
325*8fb009dcSAndroid Build Coastguard Worker	adds	x21,x21,x17		// t[2]+lo(a[1]*a[0])
326*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x11,x6
327*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x14
328*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x12,x6
329*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x15
330*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x13,x6
331*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x16
332*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x8,x7		// lo(a[2..7]*a[1])		(ii)
333*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x17
334*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x9,x7
335*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x14
336*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x7
337*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,x15
338*8fb009dcSAndroid Build Coastguard Worker
339*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x7
340*8fb009dcSAndroid Build Coastguard Worker	adds	x22,x22,x16
341*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x7
342*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x17
343*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x7
344*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x14
345*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x8,x7		// hi(a[2..7]*a[1])
346*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x15
347*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x9,x7
348*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x16
349*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x10,x7
350*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x17
351*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x11,x7
352*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2],#8*2	// t[2..3]
353*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr		// t[9]
354*8fb009dcSAndroid Build Coastguard Worker	adds	x23,x23,x14
355*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x12,x7
356*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
357*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x13,x7
358*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
359*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x9,x8		// lo(a[3..7]*a[2])		(iii)
360*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x17
361*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x10,x8
362*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x14
363*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x11,x8
364*8fb009dcSAndroid Build Coastguard Worker	adc	x20,x20,x15
365*8fb009dcSAndroid Build Coastguard Worker
366*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x12,x8
367*8fb009dcSAndroid Build Coastguard Worker	adds	x24,x24,x16
368*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x13,x8
369*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x17
370*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x9,x8		// hi(a[3..7]*a[2])
371*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x14
372*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x8
373*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x15
374*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x8
375*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x16
376*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x8
377*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2],#8*2	// t[4..5]
378*8fb009dcSAndroid Build Coastguard Worker	adc	x21,xzr,xzr		// t[10]
379*8fb009dcSAndroid Build Coastguard Worker	adds	x25,x25,x17
380*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x8
381*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x14
382*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x9		// lo(a[4..7]*a[3])		(iv)
383*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x15
384*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x9
385*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x16
386*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x9
387*8fb009dcSAndroid Build Coastguard Worker	adc	x21,x21,x17
388*8fb009dcSAndroid Build Coastguard Worker
389*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x9
390*8fb009dcSAndroid Build Coastguard Worker	adds	x26,x26,x14
391*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x9		// hi(a[4..7]*a[3])
392*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x15
393*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x9
394*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x16
395*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x9
396*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x17
397*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x9
398*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2],#8*2	// t[6..7]
399*8fb009dcSAndroid Build Coastguard Worker	adc	x22,xzr,xzr		// t[11]
400*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
401*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x11,x10		// lo(a[5..7]*a[4])		(v)
402*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
403*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x12,x10
404*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
405*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x13,x10
406*8fb009dcSAndroid Build Coastguard Worker	adc	x22,x22,x17
407*8fb009dcSAndroid Build Coastguard Worker
408*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x11,x10		// hi(a[5..7]*a[4])
409*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x14
410*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x12,x10
411*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x15
412*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x13,x10
413*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x16
414*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x11		// lo(a[6..7]*a[5])		(vi)
415*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr		// t[12]
416*8fb009dcSAndroid Build Coastguard Worker	adds	x21,x21,x17
417*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x11
418*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x14
419*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x12,x11		// hi(a[6..7]*a[5])
420*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x15
421*8fb009dcSAndroid Build Coastguard Worker
422*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x13,x11
423*8fb009dcSAndroid Build Coastguard Worker	adds	x22,x22,x16
424*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x13,x12		// lo(a[7]*a[6])		(vii)
425*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x17
426*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x12		// hi(a[7]*a[6])
427*8fb009dcSAndroid Build Coastguard Worker	adc	x24,xzr,xzr		// t[13]
428*8fb009dcSAndroid Build Coastguard Worker	adds	x23,x23,x14
429*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x3,x1	// done yet?
430*8fb009dcSAndroid Build Coastguard Worker	adc	x24,x24,x15
431*8fb009dcSAndroid Build Coastguard Worker
432*8fb009dcSAndroid Build Coastguard Worker	adds	x24,x24,x16
433*8fb009dcSAndroid Build Coastguard Worker	sub	x14,x3,x5	// rewinded ap
434*8fb009dcSAndroid Build Coastguard Worker	adc	x25,xzr,xzr		// t[14]
435*8fb009dcSAndroid Build Coastguard Worker	add	x25,x25,x17
436*8fb009dcSAndroid Build Coastguard Worker
437*8fb009dcSAndroid Build Coastguard Worker	cbz	x27,.Lsqr8x_outer_break
438*8fb009dcSAndroid Build Coastguard Worker
439*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x6
440*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x2,#8*0]
441*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2,#8*2]
442*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#8*4]
443*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x2,#8*6]
444*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x6
445*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x7
446*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
447*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x8
448*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x9
449*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
450*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x10
451*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x11
452*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
453*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x12
454*8fb009dcSAndroid Build Coastguard Worker	mov	x0,x1
455*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,xzr,x13
456*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
457*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
458*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved below
459*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#-8*8
460*8fb009dcSAndroid Build Coastguard Worker
461*8fb009dcSAndroid Build Coastguard Worker	//                                                         a[8]a[0]
462*8fb009dcSAndroid Build Coastguard Worker	//                                                     a[9]a[0]
463*8fb009dcSAndroid Build Coastguard Worker	//                                                 a[a]a[0]
464*8fb009dcSAndroid Build Coastguard Worker	//                                             a[b]a[0]
465*8fb009dcSAndroid Build Coastguard Worker	//                                         a[c]a[0]
466*8fb009dcSAndroid Build Coastguard Worker	//                                     a[d]a[0]
467*8fb009dcSAndroid Build Coastguard Worker	//                                 a[e]a[0]
468*8fb009dcSAndroid Build Coastguard Worker	//                             a[f]a[0]
469*8fb009dcSAndroid Build Coastguard Worker	//                                                     a[8]a[1]
470*8fb009dcSAndroid Build Coastguard Worker	//                         a[f]a[1]........................
471*8fb009dcSAndroid Build Coastguard Worker	//                                                 a[8]a[2]
472*8fb009dcSAndroid Build Coastguard Worker	//                     a[f]a[2]........................
473*8fb009dcSAndroid Build Coastguard Worker	//                                             a[8]a[3]
474*8fb009dcSAndroid Build Coastguard Worker	//                 a[f]a[3]........................
475*8fb009dcSAndroid Build Coastguard Worker	//                                         a[8]a[4]
476*8fb009dcSAndroid Build Coastguard Worker	//             a[f]a[4]........................
477*8fb009dcSAndroid Build Coastguard Worker	//                                     a[8]a[5]
478*8fb009dcSAndroid Build Coastguard Worker	//         a[f]a[5]........................
479*8fb009dcSAndroid Build Coastguard Worker	//                                 a[8]a[6]
480*8fb009dcSAndroid Build Coastguard Worker	//     a[f]a[6]........................
481*8fb009dcSAndroid Build Coastguard Worker	//                             a[8]a[7]
482*8fb009dcSAndroid Build Coastguard Worker	// a[f]a[7]........................
483*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_mul:
484*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x6,x4
485*8fb009dcSAndroid Build Coastguard Worker	adc	x28,xzr,xzr		// carry bit, modulo-scheduled
486*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x7,x4
487*8fb009dcSAndroid Build Coastguard Worker	add	x27,x27,#8
488*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x8,x4
489*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x9,x4
490*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
491*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x4
492*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
493*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x4
494*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
495*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x4
496*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x17
497*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x4
498*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x14
499*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x6,x4
500*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
501*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x7,x4
502*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
503*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x8,x4
504*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x17
505*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x9,x4
506*8fb009dcSAndroid Build Coastguard Worker	adc	x28,x28,xzr
507*8fb009dcSAndroid Build Coastguard Worker	str	x19,[x2],#8
508*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x20,x14
509*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x4
510*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x15
511*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x4
512*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x16
513*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x4
514*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x17
515*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x4
516*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x0,x27]
517*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x24,x14
518*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x25,x15
519*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x26,x16
520*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x28,x17
521*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved above
522*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,.Lsqr8x_mul
523*8fb009dcSAndroid Build Coastguard Worker					// note that carry flag is guaranteed
524*8fb009dcSAndroid Build Coastguard Worker					// to be zero at this point
525*8fb009dcSAndroid Build Coastguard Worker	cmp	x1,x3		// done yet?
526*8fb009dcSAndroid Build Coastguard Worker	b.eq	.Lsqr8x_break
527*8fb009dcSAndroid Build Coastguard Worker
528*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x2,#8*0]
529*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2,#8*2]
530*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#8*4]
531*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x2,#8*6]
532*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x6
533*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x0,#-8*8]
534*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x7
535*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
536*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x8
537*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x9
538*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
539*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x10
540*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x11
541*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
542*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x12
543*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#-8*8
544*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x13
545*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
546*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
547*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved above
548*8fb009dcSAndroid Build Coastguard Worker	b	.Lsqr8x_mul
549*8fb009dcSAndroid Build Coastguard Worker
550*8fb009dcSAndroid Build Coastguard Worker.align	4
551*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_break:
552*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x0,#8*0]
553*8fb009dcSAndroid Build Coastguard Worker	add	x1,x0,#8*8
554*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x0,#8*2]
555*8fb009dcSAndroid Build Coastguard Worker	sub	x14,x3,x1		// is it last iteration?
556*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x0,#8*4]
557*8fb009dcSAndroid Build Coastguard Worker	sub	x15,x2,x14
558*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x0,#8*6]
559*8fb009dcSAndroid Build Coastguard Worker	cbz	x14,.Lsqr8x_outer_loop
560*8fb009dcSAndroid Build Coastguard Worker
561*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2,#8*0]
562*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x15,#8*0]
563*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
564*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x15,#8*2]
565*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
566*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x15,#8*4]
567*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
568*8fb009dcSAndroid Build Coastguard Worker	mov	x2,x15
569*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x15,#8*6]
570*8fb009dcSAndroid Build Coastguard Worker	b	.Lsqr8x_outer_loop
571*8fb009dcSAndroid Build Coastguard Worker
572*8fb009dcSAndroid Build Coastguard Worker.align	4
573*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_outer_break:
574*8fb009dcSAndroid Build Coastguard Worker	// Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
575*8fb009dcSAndroid Build Coastguard Worker	ldp	x7,x9,[x14,#8*0]	// recall that x14 is &a[0]
576*8fb009dcSAndroid Build Coastguard Worker	ldp	x15,x16,[sp,#8*1]
577*8fb009dcSAndroid Build Coastguard Worker	ldp	x11,x13,[x14,#8*2]
578*8fb009dcSAndroid Build Coastguard Worker	add	x1,x14,#8*4
579*8fb009dcSAndroid Build Coastguard Worker	ldp	x17,x14,[sp,#8*3]
580*8fb009dcSAndroid Build Coastguard Worker
581*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2,#8*0]
582*8fb009dcSAndroid Build Coastguard Worker	mul	x19,x7,x7
583*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
584*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x7
585*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
586*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x9,x9
587*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
588*8fb009dcSAndroid Build Coastguard Worker	mov	x2,sp
589*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x9,x9
590*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x7,x15,lsl#1
591*8fb009dcSAndroid Build Coastguard Worker	extr	x15,x16,x15,#63
592*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x5,#8*4
593*8fb009dcSAndroid Build Coastguard Worker
594*8fb009dcSAndroid Build Coastguard Worker.Lsqr4x_shift_n_add:
595*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x8,x15
596*8fb009dcSAndroid Build Coastguard Worker	extr	x16,x17,x16,#63
597*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#8*4
598*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x9,x16
599*8fb009dcSAndroid Build Coastguard Worker	ldp	x15,x16,[x2,#8*5]
600*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x11,x11
601*8fb009dcSAndroid Build Coastguard Worker	ldp	x7,x9,[x1],#8*2
602*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x11,x11
603*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x13,x13
604*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x13,x13
605*8fb009dcSAndroid Build Coastguard Worker	extr	x17,x14,x17,#63
606*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2,#8*0]
607*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x10,x17
608*8fb009dcSAndroid Build Coastguard Worker	extr	x14,x15,x14,#63
609*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
610*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x11,x14
611*8fb009dcSAndroid Build Coastguard Worker	ldp	x17,x14,[x2,#8*7]
612*8fb009dcSAndroid Build Coastguard Worker	extr	x15,x16,x15,#63
613*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x12,x15
614*8fb009dcSAndroid Build Coastguard Worker	extr	x16,x17,x16,#63
615*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x13,x16
616*8fb009dcSAndroid Build Coastguard Worker	ldp	x15,x16,[x2,#8*9]
617*8fb009dcSAndroid Build Coastguard Worker	mul	x6,x7,x7
618*8fb009dcSAndroid Build Coastguard Worker	ldp	x11,x13,[x1],#8*2
619*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x7
620*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x9,x9
621*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x9,x9
622*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
623*8fb009dcSAndroid Build Coastguard Worker	extr	x17,x14,x17,#63
624*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
625*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*8
626*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x6,x17
627*8fb009dcSAndroid Build Coastguard Worker	extr	x14,x15,x14,#63
628*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x7,x14
629*8fb009dcSAndroid Build Coastguard Worker	ldp	x17,x14,[x2,#8*3]
630*8fb009dcSAndroid Build Coastguard Worker	extr	x15,x16,x15,#63
631*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,.Lsqr4x_shift_n_add
632*8fb009dcSAndroid Build Coastguard Worker	ldp	x1,x4,[x29,#104]	// pull np and n0
633*8fb009dcSAndroid Build Coastguard Worker
634*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x8,x15
635*8fb009dcSAndroid Build Coastguard Worker	extr	x16,x17,x16,#63
636*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x9,x16
637*8fb009dcSAndroid Build Coastguard Worker	ldp	x15,x16,[x2,#8*5]
638*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x11,x11
639*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x11,x11
640*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2,#8*0]
641*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x13,x13
642*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x13,x13
643*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
644*8fb009dcSAndroid Build Coastguard Worker	extr	x17,x14,x17,#63
645*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x10,x17
646*8fb009dcSAndroid Build Coastguard Worker	extr	x14,x15,x14,#63
647*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#8*0]
648*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x11,x14
649*8fb009dcSAndroid Build Coastguard Worker	extr	x15,x16,x15,#63
650*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
651*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x12,x15
652*8fb009dcSAndroid Build Coastguard Worker	extr	x16,xzr,x16,#63
653*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
654*8fb009dcSAndroid Build Coastguard Worker	adc	x26,x13,x16
655*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
656*8fb009dcSAndroid Build Coastguard Worker
657*8fb009dcSAndroid Build Coastguard Worker	// Reduce by 512 bits per iteration
658*8fb009dcSAndroid Build Coastguard Worker	mul	x28,x4,x19		// t[0]*n0
659*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
660*8fb009dcSAndroid Build Coastguard Worker	add	x3,x1,x5
661*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#8*2]
662*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
663*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[sp,#8*4]
664*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
665*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[sp,#8*6]
666*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
667*8fb009dcSAndroid Build Coastguard Worker	mov	x30,xzr		// initial top-most carry
668*8fb009dcSAndroid Build Coastguard Worker	mov	x2,sp
669*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#8
670*8fb009dcSAndroid Build Coastguard Worker
671*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_reduction:
672*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x14,x6,x28	// lo(n[0-7])*lo(t[0]*n0)
673*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x7,x28
674*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#1
675*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x8,x28
676*8fb009dcSAndroid Build Coastguard Worker	str	x28,[x2],#8		// put aside t[0]*n0 for tail processing
677*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x9,x28
678*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	xzr,x19,x14
679*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x19,#1		// (*)
680*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x28
681*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x20,x15
682*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x28
683*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x16
684*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x28
685*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x17
686*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x28
687*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x14
688*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x6,x28		// hi(n[0-7])*lo(t[0]*n0)
689*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x24,x15
690*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x7,x28
691*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x25,x16
692*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x8,x28
693*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x26,x17
694*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x9,x28
695*8fb009dcSAndroid Build Coastguard Worker	adc	x26,xzr,xzr
696*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
697*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x28
698*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
699*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x28
700*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
701*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x28
702*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x17
703*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x28
704*8fb009dcSAndroid Build Coastguard Worker	mul	x28,x4,x19		// next t[0]*n0
705*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x14
706*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
707*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
708*8fb009dcSAndroid Build Coastguard Worker	adc	x26,x26,x17
709*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,.Lsqr8x_reduction
710*8fb009dcSAndroid Build Coastguard Worker
711*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x2,#8*0]
712*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x2,#8*2]
713*8fb009dcSAndroid Build Coastguard Worker	mov	x0,x2
714*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x3,x1	// done yet?
715*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
716*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
717*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x2,#8*4]
718*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
719*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x17
720*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x2,#8*6]
721*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x14
722*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
723*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
724*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x17
725*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved below
726*8fb009dcSAndroid Build Coastguard Worker	cbz	x27,.Lsqr8x8_post_condition
727*8fb009dcSAndroid Build Coastguard Worker
728*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x2,#-8*8]
729*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
730*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
731*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
732*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#-8*8
733*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
734*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
735*8fb009dcSAndroid Build Coastguard Worker
736*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_tail:
737*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x6,x4
738*8fb009dcSAndroid Build Coastguard Worker	adc	x28,xzr,xzr		// carry bit, modulo-scheduled
739*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x7,x4
740*8fb009dcSAndroid Build Coastguard Worker	add	x27,x27,#8
741*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x8,x4
742*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x9,x4
743*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
744*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x4
745*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
746*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x4
747*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
748*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x4
749*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x17
750*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x4
751*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x14
752*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x6,x4
753*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
754*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x7,x4
755*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
756*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x8,x4
757*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x17
758*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x9,x4
759*8fb009dcSAndroid Build Coastguard Worker	adc	x28,x28,xzr
760*8fb009dcSAndroid Build Coastguard Worker	str	x19,[x2],#8
761*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x20,x14
762*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x4
763*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x15
764*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x4
765*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x16
766*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x4
767*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x17
768*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x4
769*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x0,x27]
770*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x24,x14
771*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x25,x15
772*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x26,x16
773*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x28,x17
774*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved above
775*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,.Lsqr8x_tail
776*8fb009dcSAndroid Build Coastguard Worker					// note that carry flag is guaranteed
777*8fb009dcSAndroid Build Coastguard Worker					// to be zero at this point
778*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x2,#8*0]
779*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x3,x1	// done yet?
780*8fb009dcSAndroid Build Coastguard Worker	sub	x16,x3,x5	// rewinded np
781*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2,#8*2]
782*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#8*4]
783*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x2,#8*6]
784*8fb009dcSAndroid Build Coastguard Worker	cbz	x27,.Lsqr8x_tail_break
785*8fb009dcSAndroid Build Coastguard Worker
786*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x0,#-8*8]
787*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x6
788*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x7
789*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
790*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x8
791*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x9
792*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
793*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x10
794*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x11
795*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
796*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x12
797*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#-8*8
798*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x13
799*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
800*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
801*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved above
802*8fb009dcSAndroid Build Coastguard Worker	b	.Lsqr8x_tail
803*8fb009dcSAndroid Build Coastguard Worker
804*8fb009dcSAndroid Build Coastguard Worker.align	4
805*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_tail_break:
806*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x29,#112]		// pull n0
807*8fb009dcSAndroid Build Coastguard Worker	add	x27,x2,#8*8		// end of current t[num] window
808*8fb009dcSAndroid Build Coastguard Worker
809*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x30,#1		// "move" top-most carry to carry bit
810*8fb009dcSAndroid Build Coastguard Worker	adcs	x14,x19,x6
811*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x20,x7
812*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x0,#8*0]
813*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x8
814*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x16,#8*0]	// recall that x16 is &n[0]
815*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x9
816*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x16,#8*2]
817*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x10
818*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x11
819*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x16,#8*4]
820*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x12
821*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x13
822*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x16,#8*6]
823*8fb009dcSAndroid Build Coastguard Worker	add	x1,x16,#8*8
824*8fb009dcSAndroid Build Coastguard Worker	adc	x30,xzr,xzr	// top-most carry
825*8fb009dcSAndroid Build Coastguard Worker	mul	x28,x4,x19
826*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x2,#8*0]
827*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
828*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x0,#8*2]
829*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
830*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x0,#8*4]
831*8fb009dcSAndroid Build Coastguard Worker	cmp	x27,x29		// did we hit the bottom?
832*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
833*8fb009dcSAndroid Build Coastguard Worker	mov	x2,x0			// slide the window
834*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x0,#8*6]
835*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#8
836*8fb009dcSAndroid Build Coastguard Worker	b.ne	.Lsqr8x_reduction
837*8fb009dcSAndroid Build Coastguard Worker
838*8fb009dcSAndroid Build Coastguard Worker	// Final step. We see if result is larger than modulus, and
839*8fb009dcSAndroid Build Coastguard Worker	// if it is, subtract the modulus. But comparison implies
840*8fb009dcSAndroid Build Coastguard Worker	// subtraction. So we subtract modulus, see if it borrowed,
841*8fb009dcSAndroid Build Coastguard Worker	// and conditionally copy original value.
842*8fb009dcSAndroid Build Coastguard Worker	ldr	x0,[x29,#96]		// pull rp
843*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*8
844*8fb009dcSAndroid Build Coastguard Worker	subs	x14,x19,x6
845*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x20,x7
846*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x5,#8*8
847*8fb009dcSAndroid Build Coastguard Worker	mov	x3,x0		// x0 copy
848*8fb009dcSAndroid Build Coastguard Worker
849*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_sub:
850*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x21,x8
851*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
852*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x22,x9
853*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0,#8*0]
854*8fb009dcSAndroid Build Coastguard Worker	sbcs	x14,x23,x10
855*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
856*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x24,x11
857*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#8*2]
858*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x25,x12
859*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
860*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x26,x13
861*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
862*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
863*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x2,#8*0]
864*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#8*8
865*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x2,#8*2]
866*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x2,#8*4]
867*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x2,#8*6]
868*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*8
869*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0,#8*4]
870*8fb009dcSAndroid Build Coastguard Worker	sbcs	x14,x19,x6
871*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#8*6]
872*8fb009dcSAndroid Build Coastguard Worker	add	x0,x0,#8*8
873*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x20,x7
874*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,.Lsqr8x_sub
875*8fb009dcSAndroid Build Coastguard Worker
876*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x21,x8
877*8fb009dcSAndroid Build Coastguard Worker	mov	x2,sp
878*8fb009dcSAndroid Build Coastguard Worker	add	x1,sp,x5
879*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x3,#8*0]
880*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x22,x9
881*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0,#8*0]
882*8fb009dcSAndroid Build Coastguard Worker	sbcs	x14,x23,x10
883*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x3,#8*2]
884*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x24,x11
885*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#8*2]
886*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x25,x12
887*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x1,#8*0]
888*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x26,x13
889*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x1,#8*2]
890*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x30,xzr	// did it borrow?
891*8fb009dcSAndroid Build Coastguard Worker	ldr	x30,[x29,#8]		// pull return address
892*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0,#8*4]
893*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#8*6]
894*8fb009dcSAndroid Build Coastguard Worker
895*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x5,#8*4
896*8fb009dcSAndroid Build Coastguard Worker.Lsqr4x_cond_copy:
897*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#8*4
898*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x19,x6,lo
899*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*0]
900*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x20,x7,lo
901*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x3,#8*4]
902*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x1,#8*4]
903*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x21,x8,lo
904*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*2]
905*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*4
906*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x22,x9,lo
907*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x3,#8*6]
908*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x1,#8*6]
909*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
910*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x3,#8*0]
911*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x3,#8*2]
912*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
913*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x1,#8*0]
914*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x1,#8*2]
915*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,.Lsqr4x_cond_copy
916*8fb009dcSAndroid Build Coastguard Worker
917*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x19,x6,lo
918*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*0]
919*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x20,x7,lo
920*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*2]
921*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x21,x8,lo
922*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x22,x9,lo
923*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x3,#8*0]
924*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x3,#8*2]
925*8fb009dcSAndroid Build Coastguard Worker
926*8fb009dcSAndroid Build Coastguard Worker	b	.Lsqr8x_done
927*8fb009dcSAndroid Build Coastguard Worker
928*8fb009dcSAndroid Build Coastguard Worker.align	4
929*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x8_post_condition:
930*8fb009dcSAndroid Build Coastguard Worker	adc	x28,xzr,xzr
931*8fb009dcSAndroid Build Coastguard Worker	ldr	x30,[x29,#8]		// pull return address
932*8fb009dcSAndroid Build Coastguard Worker	// x19-7,x28 hold result, x6-7 hold modulus
933*8fb009dcSAndroid Build Coastguard Worker	subs	x6,x19,x6
934*8fb009dcSAndroid Build Coastguard Worker	ldr	x1,[x29,#96]		// pull rp
935*8fb009dcSAndroid Build Coastguard Worker	sbcs	x7,x20,x7
936*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*0]
937*8fb009dcSAndroid Build Coastguard Worker	sbcs	x8,x21,x8
938*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*2]
939*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x22,x9
940*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*4]
941*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x23,x10
942*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*6]
943*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x24,x11
944*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*8]
945*8fb009dcSAndroid Build Coastguard Worker	sbcs	x12,x25,x12
946*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*10]
947*8fb009dcSAndroid Build Coastguard Worker	sbcs	x13,x26,x13
948*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*12]
949*8fb009dcSAndroid Build Coastguard Worker	sbcs	x28,x28,xzr	// did it borrow?
950*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*14]
951*8fb009dcSAndroid Build Coastguard Worker
952*8fb009dcSAndroid Build Coastguard Worker	// x6-7 hold result-modulus
953*8fb009dcSAndroid Build Coastguard Worker	csel	x6,x19,x6,lo
954*8fb009dcSAndroid Build Coastguard Worker	csel	x7,x20,x7,lo
955*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x21,x8,lo
956*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x22,x9,lo
957*8fb009dcSAndroid Build Coastguard Worker	stp	x6,x7,[x1,#8*0]
958*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x23,x10,lo
959*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x24,x11,lo
960*8fb009dcSAndroid Build Coastguard Worker	stp	x8,x9,[x1,#8*2]
961*8fb009dcSAndroid Build Coastguard Worker	csel	x12,x25,x12,lo
962*8fb009dcSAndroid Build Coastguard Worker	csel	x13,x26,x13,lo
963*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x1,#8*4]
964*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x1,#8*6]
965*8fb009dcSAndroid Build Coastguard Worker
966*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_done:
967*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
968*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x29
969*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
970*8fb009dcSAndroid Build Coastguard Worker	mov	x0,#1
971*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
972*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
973*8fb009dcSAndroid Build Coastguard Worker	ldp	x27,x28,[x29,#80]
974*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#128
975*8fb009dcSAndroid Build Coastguard Worker	// x30 is popped earlier
976*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
977*8fb009dcSAndroid Build Coastguard Worker	ret
978*8fb009dcSAndroid Build Coastguard Worker.size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
979*8fb009dcSAndroid Build Coastguard Worker.type	__bn_mul4x_mont,%function
980*8fb009dcSAndroid Build Coastguard Worker.align	5
981*8fb009dcSAndroid Build Coastguard Worker__bn_mul4x_mont:
982*8fb009dcSAndroid Build Coastguard Worker	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
983*8fb009dcSAndroid Build Coastguard Worker	// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
984*8fb009dcSAndroid Build Coastguard Worker	// return address.
985*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-128]!
986*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
987*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
988*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
989*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
990*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[sp,#64]
991*8fb009dcSAndroid Build Coastguard Worker	stp	x27,x28,[sp,#80]
992*8fb009dcSAndroid Build Coastguard Worker
993*8fb009dcSAndroid Build Coastguard Worker	sub	x26,sp,x5,lsl#3
994*8fb009dcSAndroid Build Coastguard Worker	lsl	x5,x5,#3
995*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x4]		// *n0
996*8fb009dcSAndroid Build Coastguard Worker	sub	sp,x26,#8*4		// alloca
997*8fb009dcSAndroid Build Coastguard Worker
998*8fb009dcSAndroid Build Coastguard Worker	add	x10,x2,x5
999*8fb009dcSAndroid Build Coastguard Worker	add	x27,x1,x5
1000*8fb009dcSAndroid Build Coastguard Worker	stp	x0,x10,[x29,#96]	// offload rp and &b[num]
1001*8fb009dcSAndroid Build Coastguard Worker
1002*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,#8*0]		// b[0]
1003*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]	// a[0..3]
1004*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1005*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1006*8fb009dcSAndroid Build Coastguard Worker	mov	x19,xzr
1007*8fb009dcSAndroid Build Coastguard Worker	mov	x20,xzr
1008*8fb009dcSAndroid Build Coastguard Worker	mov	x21,xzr
1009*8fb009dcSAndroid Build Coastguard Worker	mov	x22,xzr
1010*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]	// n[0..3]
1011*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1012*8fb009dcSAndroid Build Coastguard Worker	adds	x3,x3,#8*4		// clear carry bit
1013*8fb009dcSAndroid Build Coastguard Worker	mov	x0,xzr
1014*8fb009dcSAndroid Build Coastguard Worker	mov	x28,#0
1015*8fb009dcSAndroid Build Coastguard Worker	mov	x26,sp
1016*8fb009dcSAndroid Build Coastguard Worker
1017*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_1st_reduction:
1018*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x24		// lo(a[0..3]*b[0])
1019*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr	// modulo-scheduled
1020*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x24
1021*8fb009dcSAndroid Build Coastguard Worker	add	x28,x28,#8
1022*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x8,x24
1023*8fb009dcSAndroid Build Coastguard Worker	and	x28,x28,#31
1024*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x9,x24
1025*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1026*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x24		// hi(a[0..3]*b[0])
1027*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1028*8fb009dcSAndroid Build Coastguard Worker	mul	x25,x19,x4		// t[0]*n0
1029*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1030*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x24
1031*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1032*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x8,x24
1033*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr
1034*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x9,x24
1035*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,x28]		// next b[i] (or b[0])
1036*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x10
1037*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x10,x14,x25	// lo(n[0..3]*t[0]*n0)
1038*8fb009dcSAndroid Build Coastguard Worker	str	x25,[x26],#8		// put aside t[0]*n0 for tail processing
1039*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x11
1040*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x15,x25
1041*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x12
1042*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x16,x25
1043*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x13		// can't overflow
1044*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x17,x25
1045*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	xzr,x19,x10
1046*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x19,#1		// (*)
1047*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x14,x25		// hi(n[0..3]*t[0]*n0)
1048*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x20,x11
1049*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x15,x25
1050*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x12
1051*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x16,x25
1052*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x13
1053*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x17,x25
1054*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x0
1055*8fb009dcSAndroid Build Coastguard Worker	adc	x0,xzr,xzr
1056*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1057*8fb009dcSAndroid Build Coastguard Worker	sub	x10,x27,x1
1058*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1059*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1060*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1061*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1062*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,.Loop_mul4x_1st_reduction
1063*8fb009dcSAndroid Build Coastguard Worker
1064*8fb009dcSAndroid Build Coastguard Worker	cbz	x10,.Lmul4x4_post_condition
1065*8fb009dcSAndroid Build Coastguard Worker
1066*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]	// a[4..7]
1067*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1068*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1069*8fb009dcSAndroid Build Coastguard Worker	ldr	x25,[sp]		// a[0]*n0
1070*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]	// n[4..7]
1071*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1072*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1073*8fb009dcSAndroid Build Coastguard Worker
1074*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_1st_tail:
1075*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x24		// lo(a[4..7]*b[i])
1076*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr	// modulo-scheduled
1077*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x24
1078*8fb009dcSAndroid Build Coastguard Worker	add	x28,x28,#8
1079*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x8,x24
1080*8fb009dcSAndroid Build Coastguard Worker	and	x28,x28,#31
1081*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x9,x24
1082*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1083*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x24		// hi(a[4..7]*b[i])
1084*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1085*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x24
1086*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1087*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x8,x24
1088*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1089*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x9,x24
1090*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr
1091*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,x28]		// next b[i] (or b[0])
1092*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x10
1093*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x14,x25		// lo(n[4..7]*a[0]*n0)
1094*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x11
1095*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x15,x25
1096*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x12
1097*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x16,x25
1098*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x13		// can't overflow
1099*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x17,x25
1100*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1101*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x14,x25		// hi(n[4..7]*a[0]*n0)
1102*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1103*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x15,x25
1104*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1105*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x16,x25
1106*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1107*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x0
1108*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x17,x25
1109*8fb009dcSAndroid Build Coastguard Worker	adc	x0,xzr,xzr
1110*8fb009dcSAndroid Build Coastguard Worker	ldr	x25,[sp,x28]		// next t[0]*n0
1111*8fb009dcSAndroid Build Coastguard Worker	str	x19,[x26],#8		// result!!!
1112*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x20,x10
1113*8fb009dcSAndroid Build Coastguard Worker	sub	x10,x27,x1		// done yet?
1114*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x11
1115*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x12
1116*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x13
1117*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1118*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,.Loop_mul4x_1st_tail
1119*8fb009dcSAndroid Build Coastguard Worker
1120*8fb009dcSAndroid Build Coastguard Worker	sub	x11,x27,x5	// rewinded x1
1121*8fb009dcSAndroid Build Coastguard Worker	cbz	x10,.Lmul4x_proceed
1122*8fb009dcSAndroid Build Coastguard Worker
1123*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
1124*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1125*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1126*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]
1127*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1128*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1129*8fb009dcSAndroid Build Coastguard Worker	b	.Loop_mul4x_1st_tail
1130*8fb009dcSAndroid Build Coastguard Worker
1131*8fb009dcSAndroid Build Coastguard Worker.align	5
1132*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_proceed:
1133*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,#8*4]!		// *++b
1134*8fb009dcSAndroid Build Coastguard Worker	adc	x30,x0,xzr
1135*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x11,#8*0]	// a[0..3]
1136*8fb009dcSAndroid Build Coastguard Worker	sub	x3,x3,x5		// rewind np
1137*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x11,#8*2]
1138*8fb009dcSAndroid Build Coastguard Worker	add	x1,x11,#8*4
1139*8fb009dcSAndroid Build Coastguard Worker
1140*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x26,#8*0]	// result!!!
1141*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#8*4]	// t[0..3]
1142*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x26,#8*2]	// result!!!
1143*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#8*6]
1144*8fb009dcSAndroid Build Coastguard Worker
1145*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]	// n[0..3]
1146*8fb009dcSAndroid Build Coastguard Worker	mov	x26,sp
1147*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1148*8fb009dcSAndroid Build Coastguard Worker	adds	x3,x3,#8*4		// clear carry bit
1149*8fb009dcSAndroid Build Coastguard Worker	mov	x0,xzr
1150*8fb009dcSAndroid Build Coastguard Worker
1151*8fb009dcSAndroid Build Coastguard Worker.align	4
1152*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_reduction:
1153*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x24		// lo(a[0..3]*b[4])
1154*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr	// modulo-scheduled
1155*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x24
1156*8fb009dcSAndroid Build Coastguard Worker	add	x28,x28,#8
1157*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x8,x24
1158*8fb009dcSAndroid Build Coastguard Worker	and	x28,x28,#31
1159*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x9,x24
1160*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1161*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x24		// hi(a[0..3]*b[4])
1162*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1163*8fb009dcSAndroid Build Coastguard Worker	mul	x25,x19,x4		// t[0]*n0
1164*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1165*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x24
1166*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1167*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x8,x24
1168*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr
1169*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x9,x24
1170*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,x28]		// next b[i]
1171*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x10
1172*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x10,x14,x25
1173*8fb009dcSAndroid Build Coastguard Worker	str	x25,[x26],#8		// put aside t[0]*n0 for tail processing
1174*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x11
1175*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x15,x25		// lo(n[0..3]*t[0]*n0
1176*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x12
1177*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x16,x25
1178*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x13		// can't overflow
1179*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x17,x25
1180*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	xzr,x19,x10
1181*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x19,#1		// (*)
1182*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x14,x25		// hi(n[0..3]*t[0]*n0
1183*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x20,x11
1184*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x15,x25
1185*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x12
1186*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x16,x25
1187*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x13
1188*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x17,x25
1189*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x0
1190*8fb009dcSAndroid Build Coastguard Worker	adc	x0,xzr,xzr
1191*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1192*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1193*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1194*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1195*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1196*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,.Loop_mul4x_reduction
1197*8fb009dcSAndroid Build Coastguard Worker
1198*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr
1199*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x26,#8*4]	// t[4..7]
1200*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x26,#8*6]
1201*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]	// a[4..7]
1202*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1203*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1204*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1205*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1206*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1207*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1208*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1209*8fb009dcSAndroid Build Coastguard Worker
1210*8fb009dcSAndroid Build Coastguard Worker	ldr	x25,[sp]		// t[0]*n0
1211*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]	// n[4..7]
1212*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1213*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1214*8fb009dcSAndroid Build Coastguard Worker
1215*8fb009dcSAndroid Build Coastguard Worker.align	4
1216*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_tail:
1217*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x24		// lo(a[4..7]*b[4])
1218*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr	// modulo-scheduled
1219*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x24
1220*8fb009dcSAndroid Build Coastguard Worker	add	x28,x28,#8
1221*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x8,x24
1222*8fb009dcSAndroid Build Coastguard Worker	and	x28,x28,#31
1223*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x9,x24
1224*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1225*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x24		// hi(a[4..7]*b[4])
1226*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1227*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x24
1228*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1229*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x8,x24
1230*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1231*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x9,x24
1232*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr
1233*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,x28]		// next b[i]
1234*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x10
1235*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x14,x25		// lo(n[4..7]*t[0]*n0)
1236*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x11
1237*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x15,x25
1238*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x12
1239*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x16,x25
1240*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x13		// can't overflow
1241*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x17,x25
1242*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1243*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x14,x25		// hi(n[4..7]*t[0]*n0)
1244*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1245*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x15,x25
1246*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1247*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x16,x25
1248*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1249*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x17,x25
1250*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x0
1251*8fb009dcSAndroid Build Coastguard Worker	ldr	x25,[sp,x28]		// next a[0]*n0
1252*8fb009dcSAndroid Build Coastguard Worker	adc	x0,xzr,xzr
1253*8fb009dcSAndroid Build Coastguard Worker	str	x19,[x26],#8		// result!!!
1254*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x20,x10
1255*8fb009dcSAndroid Build Coastguard Worker	sub	x10,x27,x1		// done yet?
1256*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x11
1257*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x12
1258*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x13
1259*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1260*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,.Loop_mul4x_tail
1261*8fb009dcSAndroid Build Coastguard Worker
1262*8fb009dcSAndroid Build Coastguard Worker	sub	x11,x3,x5		// rewinded np?
1263*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr
1264*8fb009dcSAndroid Build Coastguard Worker	cbz	x10,.Loop_mul4x_break
1265*8fb009dcSAndroid Build Coastguard Worker
1266*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x26,#8*4]
1267*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x26,#8*6]
1268*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
1269*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1270*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1271*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1272*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1273*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1274*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1275*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1276*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]
1277*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1278*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1279*8fb009dcSAndroid Build Coastguard Worker	b	.Loop_mul4x_tail
1280*8fb009dcSAndroid Build Coastguard Worker
1281*8fb009dcSAndroid Build Coastguard Worker.align	4
1282*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_break:
1283*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x29,#96]	// pull rp and &b[num]
1284*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x30
1285*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*4		// bp++
1286*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,xzr
1287*8fb009dcSAndroid Build Coastguard Worker	sub	x1,x1,x5		// rewind ap
1288*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,xzr
1289*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x26,#8*0]	// result!!!
1290*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,xzr
1291*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#8*4]	// t[0..3]
1292*8fb009dcSAndroid Build Coastguard Worker	adc	x30,x0,xzr
1293*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x26,#8*2]	// result!!!
1294*8fb009dcSAndroid Build Coastguard Worker	cmp	x2,x13			// done yet?
1295*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#8*6]
1296*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x11,#8*0]	// n[0..3]
1297*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x11,#8*2]
1298*8fb009dcSAndroid Build Coastguard Worker	add	x3,x11,#8*4
1299*8fb009dcSAndroid Build Coastguard Worker	b.eq	.Lmul4x_post
1300*8fb009dcSAndroid Build Coastguard Worker
1301*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2]
1302*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]	// a[0..3]
1303*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1304*8fb009dcSAndroid Build Coastguard Worker	adds	x1,x1,#8*4		// clear carry bit
1305*8fb009dcSAndroid Build Coastguard Worker	mov	x0,xzr
1306*8fb009dcSAndroid Build Coastguard Worker	mov	x26,sp
1307*8fb009dcSAndroid Build Coastguard Worker	b	.Loop_mul4x_reduction
1308*8fb009dcSAndroid Build Coastguard Worker
1309*8fb009dcSAndroid Build Coastguard Worker.align	4
1310*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_post:
1311*8fb009dcSAndroid Build Coastguard Worker	// Final step. We see if result is larger than modulus, and
1312*8fb009dcSAndroid Build Coastguard Worker	// if it is, subtract the modulus. But comparison implies
1313*8fb009dcSAndroid Build Coastguard Worker	// subtraction. So we subtract modulus, see if it borrowed,
1314*8fb009dcSAndroid Build Coastguard Worker	// and conditionally copy original value.
1315*8fb009dcSAndroid Build Coastguard Worker	mov	x0,x12
1316*8fb009dcSAndroid Build Coastguard Worker	mov	x27,x12		// x0 copy
1317*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x19,x14
1318*8fb009dcSAndroid Build Coastguard Worker	add	x26,sp,#8*8
1319*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x20,x15
1320*8fb009dcSAndroid Build Coastguard Worker	sub	x28,x5,#8*4
1321*8fb009dcSAndroid Build Coastguard Worker
1322*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_sub:
1323*8fb009dcSAndroid Build Coastguard Worker	sbcs	x12,x21,x16
1324*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]
1325*8fb009dcSAndroid Build Coastguard Worker	sub	x28,x28,#8*4
1326*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x26,#8*0]
1327*8fb009dcSAndroid Build Coastguard Worker	sbcs	x13,x22,x17
1328*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1329*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1330*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x26,#8*2]
1331*8fb009dcSAndroid Build Coastguard Worker	add	x26,x26,#8*4
1332*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x0,#8*0]
1333*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x19,x14
1334*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x0,#8*2]
1335*8fb009dcSAndroid Build Coastguard Worker	add	x0,x0,#8*4
1336*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x20,x15
1337*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,.Lmul4x_sub
1338*8fb009dcSAndroid Build Coastguard Worker
1339*8fb009dcSAndroid Build Coastguard Worker	sbcs	x12,x21,x16
1340*8fb009dcSAndroid Build Coastguard Worker	mov	x26,sp
1341*8fb009dcSAndroid Build Coastguard Worker	add	x1,sp,#8*4
1342*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x27,#8*0]
1343*8fb009dcSAndroid Build Coastguard Worker	sbcs	x13,x22,x17
1344*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x0,#8*0]
1345*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x27,#8*2]
1346*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x0,#8*2]
1347*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x1,#8*0]
1348*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x1,#8*2]
1349*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x30,xzr	// did it borrow?
1350*8fb009dcSAndroid Build Coastguard Worker	ldr	x30,[x29,#8]		// pull return address
1351*8fb009dcSAndroid Build Coastguard Worker
1352*8fb009dcSAndroid Build Coastguard Worker	sub	x28,x5,#8*4
1353*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_cond_copy:
1354*8fb009dcSAndroid Build Coastguard Worker	sub	x28,x28,#8*4
1355*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x19,x6,lo
1356*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*0]
1357*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x20,x7,lo
1358*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x27,#8*4]
1359*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x1,#8*4]
1360*8fb009dcSAndroid Build Coastguard Worker	csel	x12,x21,x8,lo
1361*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*2]
1362*8fb009dcSAndroid Build Coastguard Worker	add	x26,x26,#8*4
1363*8fb009dcSAndroid Build Coastguard Worker	csel	x13,x22,x9,lo
1364*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x27,#8*6]
1365*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x1,#8*6]
1366*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1367*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x27,#8*0]
1368*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x27,#8*2]
1369*8fb009dcSAndroid Build Coastguard Worker	add	x27,x27,#8*4
1370*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,.Lmul4x_cond_copy
1371*8fb009dcSAndroid Build Coastguard Worker
1372*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x19,x6,lo
1373*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*0]
1374*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x20,x7,lo
1375*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*2]
1376*8fb009dcSAndroid Build Coastguard Worker	csel	x12,x21,x8,lo
1377*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*3]
1378*8fb009dcSAndroid Build Coastguard Worker	csel	x13,x22,x9,lo
1379*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*4]
1380*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x27,#8*0]
1381*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x27,#8*2]
1382*8fb009dcSAndroid Build Coastguard Worker
1383*8fb009dcSAndroid Build Coastguard Worker	b	.Lmul4x_done
1384*8fb009dcSAndroid Build Coastguard Worker
1385*8fb009dcSAndroid Build Coastguard Worker.align	4
1386*8fb009dcSAndroid Build Coastguard Worker.Lmul4x4_post_condition:
1387*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr
1388*8fb009dcSAndroid Build Coastguard Worker	ldr	x1,[x29,#96]		// pull rp
1389*8fb009dcSAndroid Build Coastguard Worker	// x19-3,x0 hold result, x14-7 hold modulus
1390*8fb009dcSAndroid Build Coastguard Worker	subs	x6,x19,x14
1391*8fb009dcSAndroid Build Coastguard Worker	ldr	x30,[x29,#8]		// pull return address
1392*8fb009dcSAndroid Build Coastguard Worker	sbcs	x7,x20,x15
1393*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*0]
1394*8fb009dcSAndroid Build Coastguard Worker	sbcs	x8,x21,x16
1395*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*2]
1396*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x22,x17
1397*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*4]
1398*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x0,xzr		// did it borrow?
1399*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*6]
1400*8fb009dcSAndroid Build Coastguard Worker
1401*8fb009dcSAndroid Build Coastguard Worker	// x6-3 hold result-modulus
1402*8fb009dcSAndroid Build Coastguard Worker	csel	x6,x19,x6,lo
1403*8fb009dcSAndroid Build Coastguard Worker	csel	x7,x20,x7,lo
1404*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x21,x8,lo
1405*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x22,x9,lo
1406*8fb009dcSAndroid Build Coastguard Worker	stp	x6,x7,[x1,#8*0]
1407*8fb009dcSAndroid Build Coastguard Worker	stp	x8,x9,[x1,#8*2]
1408*8fb009dcSAndroid Build Coastguard Worker
1409*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_done:
1410*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
1411*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x29
1412*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
1413*8fb009dcSAndroid Build Coastguard Worker	mov	x0,#1
1414*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
1415*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
1416*8fb009dcSAndroid Build Coastguard Worker	ldp	x27,x28,[x29,#80]
1417*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#128
1418*8fb009dcSAndroid Build Coastguard Worker	// x30 is popped earlier
1419*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
1420*8fb009dcSAndroid Build Coastguard Worker	ret
1421*8fb009dcSAndroid Build Coastguard Worker.size	__bn_mul4x_mont,.-__bn_mul4x_mont
1422*8fb009dcSAndroid Build Coastguard Worker.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1423*8fb009dcSAndroid Build Coastguard Worker.align	2
1424*8fb009dcSAndroid Build Coastguard Worker.align	4
1425*8fb009dcSAndroid Build Coastguard Worker#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)
1426