xref: /aosp_15_r20/external/boringssl/src/gen/bcm/armv8-mont-win.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1*8fb009dcSAndroid Build Coastguard Worker// This file is generated from a similarly-named Perl script in the BoringSSL
2*8fb009dcSAndroid Build Coastguard Worker// source tree. Do not edit by hand.
3*8fb009dcSAndroid Build Coastguard Worker
4*8fb009dcSAndroid Build Coastguard Worker#include <openssl/asm_base.h>
5*8fb009dcSAndroid Build Coastguard Worker
6*8fb009dcSAndroid Build Coastguard Worker#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32)
7*8fb009dcSAndroid Build Coastguard Worker#include <openssl/arm_arch.h>
8*8fb009dcSAndroid Build Coastguard Worker
9*8fb009dcSAndroid Build Coastguard Worker.text
10*8fb009dcSAndroid Build Coastguard Worker
11*8fb009dcSAndroid Build Coastguard Worker.globl	bn_mul_mont
12*8fb009dcSAndroid Build Coastguard Worker
13*8fb009dcSAndroid Build Coastguard Worker.def bn_mul_mont
14*8fb009dcSAndroid Build Coastguard Worker   .type 32
15*8fb009dcSAndroid Build Coastguard Worker.endef
16*8fb009dcSAndroid Build Coastguard Worker.align	5
17*8fb009dcSAndroid Build Coastguard Workerbn_mul_mont:
18*8fb009dcSAndroid Build Coastguard Worker	AARCH64_SIGN_LINK_REGISTER
19*8fb009dcSAndroid Build Coastguard Worker	tst	x5,#7
20*8fb009dcSAndroid Build Coastguard Worker	b.eq	__bn_sqr8x_mont
21*8fb009dcSAndroid Build Coastguard Worker	tst	x5,#3
22*8fb009dcSAndroid Build Coastguard Worker	b.eq	__bn_mul4x_mont
23*8fb009dcSAndroid Build Coastguard WorkerLmul_mont:
24*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-64]!
25*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
26*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
27*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
28*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
29*8fb009dcSAndroid Build Coastguard Worker
30*8fb009dcSAndroid Build Coastguard Worker	ldr	x9,[x2],#8		// bp[0]
31*8fb009dcSAndroid Build Coastguard Worker	sub	x22,sp,x5,lsl#3
32*8fb009dcSAndroid Build Coastguard Worker	ldp	x7,x8,[x1],#16	// ap[0..1]
33*8fb009dcSAndroid Build Coastguard Worker	lsl	x5,x5,#3
34*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x4]		// *n0
35*8fb009dcSAndroid Build Coastguard Worker	and	x22,x22,#-16		// ABI says so
36*8fb009dcSAndroid Build Coastguard Worker	ldp	x13,x14,[x3],#16	// np[0..1]
37*8fb009dcSAndroid Build Coastguard Worker
38*8fb009dcSAndroid Build Coastguard Worker	mul	x6,x7,x9		// ap[0]*bp[0]
39*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x5,#16		// j=num-2
40*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x9
41*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x8,x9		// ap[1]*bp[0]
42*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x8,x9
43*8fb009dcSAndroid Build Coastguard Worker
44*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x6,x4		// "tp[0]"*n0
45*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x22			// alloca
46*8fb009dcSAndroid Build Coastguard Worker
47*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x12,x13,x15	// np[0]*m1
48*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x13,x15
49*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x14,x15		// np[1]*m1
50*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	x12,x12,x6	// discarded
51*8fb009dcSAndroid Build Coastguard Worker	// (*)	As for removal of first multiplication and addition
52*8fb009dcSAndroid Build Coastguard Worker	//	instructions. The outcome of first addition is
53*8fb009dcSAndroid Build Coastguard Worker	//	guaranteed to be zero, which leaves two computationally
54*8fb009dcSAndroid Build Coastguard Worker	//	significant outcomes: it either carries or not. Then
55*8fb009dcSAndroid Build Coastguard Worker	//	question is when does it carry? Is there alternative
56*8fb009dcSAndroid Build Coastguard Worker	//	way to deduce it? If you follow operations, you can
57*8fb009dcSAndroid Build Coastguard Worker	//	observe that condition for carry is quite simple:
58*8fb009dcSAndroid Build Coastguard Worker	//	x6 being non-zero. So that carry can be calculated
59*8fb009dcSAndroid Build Coastguard Worker	//	by adding -1 to x6. That's what next instruction does.
60*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x6,#1		// (*)
61*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x14,x15
62*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x13,xzr
63*8fb009dcSAndroid Build Coastguard Worker	cbz	x21,L1st_skip
64*8fb009dcSAndroid Build Coastguard Worker
65*8fb009dcSAndroid Build Coastguard WorkerL1st:
66*8fb009dcSAndroid Build Coastguard Worker	ldr	x8,[x1],#8
67*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x10,x7
68*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x21,#8		// j--
69*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x11,xzr
70*8fb009dcSAndroid Build Coastguard Worker
71*8fb009dcSAndroid Build Coastguard Worker	ldr	x14,[x3],#8
72*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x16,x13
73*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x8,x9		// ap[j]*bp[0]
74*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x17,xzr
75*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x8,x9
76*8fb009dcSAndroid Build Coastguard Worker
77*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x12,x6
78*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x14,x15		// np[j]*m1
79*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x13,xzr
80*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x14,x15
81*8fb009dcSAndroid Build Coastguard Worker	str	x12,[x22],#8		// tp[j-1]
82*8fb009dcSAndroid Build Coastguard Worker	cbnz	x21,L1st
83*8fb009dcSAndroid Build Coastguard Worker
84*8fb009dcSAndroid Build Coastguard WorkerL1st_skip:
85*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x10,x7
86*8fb009dcSAndroid Build Coastguard Worker	sub	x1,x1,x5		// rewind x1
87*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x11,xzr
88*8fb009dcSAndroid Build Coastguard Worker
89*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x16,x13
90*8fb009dcSAndroid Build Coastguard Worker	sub	x3,x3,x5		// rewind x3
91*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x17,xzr
92*8fb009dcSAndroid Build Coastguard Worker
93*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x12,x6
94*8fb009dcSAndroid Build Coastguard Worker	sub	x20,x5,#8		// i=num-1
95*8fb009dcSAndroid Build Coastguard Worker	adcs	x13,x13,x7
96*8fb009dcSAndroid Build Coastguard Worker
97*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr		// upmost overflow bit
98*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x22]
99*8fb009dcSAndroid Build Coastguard Worker
100*8fb009dcSAndroid Build Coastguard WorkerLouter:
101*8fb009dcSAndroid Build Coastguard Worker	ldr	x9,[x2],#8		// bp[i]
102*8fb009dcSAndroid Build Coastguard Worker	ldp	x7,x8,[x1],#16
103*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[sp]		// tp[0]
104*8fb009dcSAndroid Build Coastguard Worker	add	x22,sp,#8
105*8fb009dcSAndroid Build Coastguard Worker
106*8fb009dcSAndroid Build Coastguard Worker	mul	x6,x7,x9		// ap[0]*bp[i]
107*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x5,#16		// j=num-2
108*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x9
109*8fb009dcSAndroid Build Coastguard Worker	ldp	x13,x14,[x3],#16
110*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x8,x9		// ap[1]*bp[i]
111*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x6,x23
112*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x8,x9
113*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x7,xzr
114*8fb009dcSAndroid Build Coastguard Worker
115*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x6,x4
116*8fb009dcSAndroid Build Coastguard Worker	sub	x20,x20,#8		// i--
117*8fb009dcSAndroid Build Coastguard Worker
118*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x12,x13,x15	// np[0]*m1
119*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x13,x15
120*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x14,x15		// np[1]*m1
121*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	x12,x12,x6
122*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x6,#1		// (*)
123*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x14,x15
124*8fb009dcSAndroid Build Coastguard Worker	cbz	x21,Linner_skip
125*8fb009dcSAndroid Build Coastguard Worker
126*8fb009dcSAndroid Build Coastguard WorkerLinner:
127*8fb009dcSAndroid Build Coastguard Worker	ldr	x8,[x1],#8
128*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x13,xzr
129*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x22],#8		// tp[j]
130*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x10,x7
131*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x21,#8		// j--
132*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x11,xzr
133*8fb009dcSAndroid Build Coastguard Worker
134*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x16,x13
135*8fb009dcSAndroid Build Coastguard Worker	ldr	x14,[x3],#8
136*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x17,xzr
137*8fb009dcSAndroid Build Coastguard Worker
138*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x8,x9		// ap[j]*bp[i]
139*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x6,x23
140*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x8,x9
141*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x7,xzr
142*8fb009dcSAndroid Build Coastguard Worker
143*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x14,x15		// np[j]*m1
144*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x12,x6
145*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x14,x15
146*8fb009dcSAndroid Build Coastguard Worker	str	x12,[x22,#-16]		// tp[j-1]
147*8fb009dcSAndroid Build Coastguard Worker	cbnz	x21,Linner
148*8fb009dcSAndroid Build Coastguard Worker
149*8fb009dcSAndroid Build Coastguard WorkerLinner_skip:
150*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x22],#8		// tp[j]
151*8fb009dcSAndroid Build Coastguard Worker	adc	x13,x13,xzr
152*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x10,x7
153*8fb009dcSAndroid Build Coastguard Worker	sub	x1,x1,x5		// rewind x1
154*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x11,xzr
155*8fb009dcSAndroid Build Coastguard Worker
156*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x16,x13
157*8fb009dcSAndroid Build Coastguard Worker	sub	x3,x3,x5		// rewind x3
158*8fb009dcSAndroid Build Coastguard Worker	adcs	x13,x17,x19
159*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr
160*8fb009dcSAndroid Build Coastguard Worker
161*8fb009dcSAndroid Build Coastguard Worker	adds	x6,x6,x23
162*8fb009dcSAndroid Build Coastguard Worker	adc	x7,x7,xzr
163*8fb009dcSAndroid Build Coastguard Worker
164*8fb009dcSAndroid Build Coastguard Worker	adds	x12,x12,x6
165*8fb009dcSAndroid Build Coastguard Worker	adcs	x13,x13,x7
166*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,xzr		// upmost overflow bit
167*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x22,#-16]
168*8fb009dcSAndroid Build Coastguard Worker
169*8fb009dcSAndroid Build Coastguard Worker	cbnz	x20,Louter
170*8fb009dcSAndroid Build Coastguard Worker
171*8fb009dcSAndroid Build Coastguard Worker	// Final step. We see if result is larger than modulus, and
172*8fb009dcSAndroid Build Coastguard Worker	// if it is, subtract the modulus. But comparison implies
173*8fb009dcSAndroid Build Coastguard Worker	// subtraction. So we subtract modulus, see if it borrowed,
174*8fb009dcSAndroid Build Coastguard Worker	// and conditionally copy original value.
175*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[sp]		// tp[0]
176*8fb009dcSAndroid Build Coastguard Worker	add	x22,sp,#8
177*8fb009dcSAndroid Build Coastguard Worker	ldr	x14,[x3],#8		// np[0]
178*8fb009dcSAndroid Build Coastguard Worker	subs	x21,x5,#8		// j=num-1 and clear borrow
179*8fb009dcSAndroid Build Coastguard Worker	mov	x1,x0
180*8fb009dcSAndroid Build Coastguard WorkerLsub:
181*8fb009dcSAndroid Build Coastguard Worker	sbcs	x8,x23,x14		// tp[j]-np[j]
182*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x22],#8
183*8fb009dcSAndroid Build Coastguard Worker	sub	x21,x21,#8		// j--
184*8fb009dcSAndroid Build Coastguard Worker	ldr	x14,[x3],#8
185*8fb009dcSAndroid Build Coastguard Worker	str	x8,[x1],#8		// rp[j]=tp[j]-np[j]
186*8fb009dcSAndroid Build Coastguard Worker	cbnz	x21,Lsub
187*8fb009dcSAndroid Build Coastguard Worker
188*8fb009dcSAndroid Build Coastguard Worker	sbcs	x8,x23,x14
189*8fb009dcSAndroid Build Coastguard Worker	sbcs	x19,x19,xzr		// did it borrow?
190*8fb009dcSAndroid Build Coastguard Worker	str	x8,[x1],#8		// rp[num-1]
191*8fb009dcSAndroid Build Coastguard Worker
192*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[sp]		// tp[0]
193*8fb009dcSAndroid Build Coastguard Worker	add	x22,sp,#8
194*8fb009dcSAndroid Build Coastguard Worker	ldr	x8,[x0],#8		// rp[0]
195*8fb009dcSAndroid Build Coastguard Worker	sub	x5,x5,#8		// num--
196*8fb009dcSAndroid Build Coastguard Worker	nop
197*8fb009dcSAndroid Build Coastguard WorkerLcond_copy:
198*8fb009dcSAndroid Build Coastguard Worker	sub	x5,x5,#8		// num--
199*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x23,x8,lo		// did it borrow?
200*8fb009dcSAndroid Build Coastguard Worker	ldr	x23,[x22],#8
201*8fb009dcSAndroid Build Coastguard Worker	ldr	x8,[x0],#8
202*8fb009dcSAndroid Build Coastguard Worker	str	xzr,[x22,#-16]		// wipe tp
203*8fb009dcSAndroid Build Coastguard Worker	str	x14,[x0,#-16]
204*8fb009dcSAndroid Build Coastguard Worker	cbnz	x5,Lcond_copy
205*8fb009dcSAndroid Build Coastguard Worker
206*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x23,x8,lo
207*8fb009dcSAndroid Build Coastguard Worker	str	xzr,[x22,#-8]		// wipe tp
208*8fb009dcSAndroid Build Coastguard Worker	str	x14,[x0,#-8]
209*8fb009dcSAndroid Build Coastguard Worker
210*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
211*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x29
212*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
213*8fb009dcSAndroid Build Coastguard Worker	mov	x0,#1
214*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
215*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#64
216*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
217*8fb009dcSAndroid Build Coastguard Worker	ret
218*8fb009dcSAndroid Build Coastguard Worker
219*8fb009dcSAndroid Build Coastguard Worker.def __bn_sqr8x_mont
220*8fb009dcSAndroid Build Coastguard Worker   .type 32
221*8fb009dcSAndroid Build Coastguard Worker.endef
222*8fb009dcSAndroid Build Coastguard Worker.align	5
223*8fb009dcSAndroid Build Coastguard Worker__bn_sqr8x_mont:
224*8fb009dcSAndroid Build Coastguard Worker	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
225*8fb009dcSAndroid Build Coastguard Worker	// only from bn_mul_mont which has already signed the return address.
226*8fb009dcSAndroid Build Coastguard Worker	cmp	x1,x2
227*8fb009dcSAndroid Build Coastguard Worker	b.ne	__bn_mul4x_mont
228*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_mont:
229*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-128]!
230*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
231*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
232*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
233*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
234*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[sp,#64]
235*8fb009dcSAndroid Build Coastguard Worker	stp	x27,x28,[sp,#80]
236*8fb009dcSAndroid Build Coastguard Worker	stp	x0,x3,[sp,#96]	// offload rp and np
237*8fb009dcSAndroid Build Coastguard Worker
238*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
239*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
240*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
241*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
242*8fb009dcSAndroid Build Coastguard Worker
243*8fb009dcSAndroid Build Coastguard Worker	sub	x2,sp,x5,lsl#4
244*8fb009dcSAndroid Build Coastguard Worker	lsl	x5,x5,#3
245*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x4]		// *n0
246*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x2			// alloca
247*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x5,#8*8
248*8fb009dcSAndroid Build Coastguard Worker	b	Lsqr8x_zero_start
249*8fb009dcSAndroid Build Coastguard Worker
250*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_zero:
251*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#8*8
252*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*0]
253*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*2]
254*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*4]
255*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*6]
256*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_zero_start:
257*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*8]
258*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*10]
259*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*12]
260*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*14]
261*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*16
262*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,Lsqr8x_zero
263*8fb009dcSAndroid Build Coastguard Worker
264*8fb009dcSAndroid Build Coastguard Worker	add	x3,x1,x5
265*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
266*8fb009dcSAndroid Build Coastguard Worker	mov	x19,xzr
267*8fb009dcSAndroid Build Coastguard Worker	mov	x20,xzr
268*8fb009dcSAndroid Build Coastguard Worker	mov	x21,xzr
269*8fb009dcSAndroid Build Coastguard Worker	mov	x22,xzr
270*8fb009dcSAndroid Build Coastguard Worker	mov	x23,xzr
271*8fb009dcSAndroid Build Coastguard Worker	mov	x24,xzr
272*8fb009dcSAndroid Build Coastguard Worker	mov	x25,xzr
273*8fb009dcSAndroid Build Coastguard Worker	mov	x26,xzr
274*8fb009dcSAndroid Build Coastguard Worker	mov	x2,sp
275*8fb009dcSAndroid Build Coastguard Worker	str	x4,[x29,#112]		// offload n0
276*8fb009dcSAndroid Build Coastguard Worker
277*8fb009dcSAndroid Build Coastguard Worker	// Multiply everything but a[i]*a[i]
278*8fb009dcSAndroid Build Coastguard Worker.align	4
279*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_outer_loop:
280*8fb009dcSAndroid Build Coastguard Worker        //                                                 a[1]a[0]	(i)
281*8fb009dcSAndroid Build Coastguard Worker        //                                             a[2]a[0]
282*8fb009dcSAndroid Build Coastguard Worker        //                                         a[3]a[0]
283*8fb009dcSAndroid Build Coastguard Worker        //                                     a[4]a[0]
284*8fb009dcSAndroid Build Coastguard Worker        //                                 a[5]a[0]
285*8fb009dcSAndroid Build Coastguard Worker        //                             a[6]a[0]
286*8fb009dcSAndroid Build Coastguard Worker        //                         a[7]a[0]
287*8fb009dcSAndroid Build Coastguard Worker        //                                         a[2]a[1]		(ii)
288*8fb009dcSAndroid Build Coastguard Worker        //                                     a[3]a[1]
289*8fb009dcSAndroid Build Coastguard Worker        //                                 a[4]a[1]
290*8fb009dcSAndroid Build Coastguard Worker        //                             a[5]a[1]
291*8fb009dcSAndroid Build Coastguard Worker        //                         a[6]a[1]
292*8fb009dcSAndroid Build Coastguard Worker        //                     a[7]a[1]
293*8fb009dcSAndroid Build Coastguard Worker        //                                 a[3]a[2]			(iii)
294*8fb009dcSAndroid Build Coastguard Worker        //                             a[4]a[2]
295*8fb009dcSAndroid Build Coastguard Worker        //                         a[5]a[2]
296*8fb009dcSAndroid Build Coastguard Worker        //                     a[6]a[2]
297*8fb009dcSAndroid Build Coastguard Worker        //                 a[7]a[2]
298*8fb009dcSAndroid Build Coastguard Worker        //                         a[4]a[3]				(iv)
299*8fb009dcSAndroid Build Coastguard Worker        //                     a[5]a[3]
300*8fb009dcSAndroid Build Coastguard Worker        //                 a[6]a[3]
301*8fb009dcSAndroid Build Coastguard Worker        //             a[7]a[3]
302*8fb009dcSAndroid Build Coastguard Worker        //                 a[5]a[4]					(v)
303*8fb009dcSAndroid Build Coastguard Worker        //             a[6]a[4]
304*8fb009dcSAndroid Build Coastguard Worker        //         a[7]a[4]
305*8fb009dcSAndroid Build Coastguard Worker        //         a[6]a[5]						(vi)
306*8fb009dcSAndroid Build Coastguard Worker        //     a[7]a[5]
307*8fb009dcSAndroid Build Coastguard Worker        // a[7]a[6]							(vii)
308*8fb009dcSAndroid Build Coastguard Worker
309*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x7,x6		// lo(a[1..7]*a[0])		(i)
310*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x8,x6
311*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x9,x6
312*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x10,x6
313*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x14		// t[1]+lo(a[1]*a[0])
314*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x11,x6
315*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x15
316*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x12,x6
317*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x16
318*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x13,x6
319*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x17
320*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x7,x6		// hi(a[1..7]*a[0])
321*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x14
322*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x8,x6
323*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x15
324*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x9,x6
325*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x16
326*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x10,x6
327*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2],#8*2	// t[0..1]
328*8fb009dcSAndroid Build Coastguard Worker	adc	x19,xzr,xzr		// t[8]
329*8fb009dcSAndroid Build Coastguard Worker	adds	x21,x21,x17		// t[2]+lo(a[1]*a[0])
330*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x11,x6
331*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x14
332*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x12,x6
333*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x15
334*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x13,x6
335*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x16
336*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x8,x7		// lo(a[2..7]*a[1])		(ii)
337*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x17
338*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x9,x7
339*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x14
340*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x7
341*8fb009dcSAndroid Build Coastguard Worker	adc	x19,x19,x15
342*8fb009dcSAndroid Build Coastguard Worker
343*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x7
344*8fb009dcSAndroid Build Coastguard Worker	adds	x22,x22,x16
345*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x7
346*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x17
347*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x7
348*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x14
349*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x8,x7		// hi(a[2..7]*a[1])
350*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x15
351*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x9,x7
352*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x16
353*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x10,x7
354*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x17
355*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x11,x7
356*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2],#8*2	// t[2..3]
357*8fb009dcSAndroid Build Coastguard Worker	adc	x20,xzr,xzr		// t[9]
358*8fb009dcSAndroid Build Coastguard Worker	adds	x23,x23,x14
359*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x12,x7
360*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
361*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x13,x7
362*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
363*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x9,x8		// lo(a[3..7]*a[2])		(iii)
364*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x17
365*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x10,x8
366*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x14
367*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x11,x8
368*8fb009dcSAndroid Build Coastguard Worker	adc	x20,x20,x15
369*8fb009dcSAndroid Build Coastguard Worker
370*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x12,x8
371*8fb009dcSAndroid Build Coastguard Worker	adds	x24,x24,x16
372*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x13,x8
373*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x17
374*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x9,x8		// hi(a[3..7]*a[2])
375*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x14
376*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x8
377*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x15
378*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x8
379*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x16
380*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x8
381*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2],#8*2	// t[4..5]
382*8fb009dcSAndroid Build Coastguard Worker	adc	x21,xzr,xzr		// t[10]
383*8fb009dcSAndroid Build Coastguard Worker	adds	x25,x25,x17
384*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x8
385*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x14
386*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x9		// lo(a[4..7]*a[3])		(iv)
387*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x15
388*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x9
389*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x16
390*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x9
391*8fb009dcSAndroid Build Coastguard Worker	adc	x21,x21,x17
392*8fb009dcSAndroid Build Coastguard Worker
393*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x9
394*8fb009dcSAndroid Build Coastguard Worker	adds	x26,x26,x14
395*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x9		// hi(a[4..7]*a[3])
396*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x19,x15
397*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x9
398*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x16
399*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x9
400*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x17
401*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x9
402*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2],#8*2	// t[6..7]
403*8fb009dcSAndroid Build Coastguard Worker	adc	x22,xzr,xzr		// t[11]
404*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
405*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x11,x10		// lo(a[5..7]*a[4])		(v)
406*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
407*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x12,x10
408*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
409*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x13,x10
410*8fb009dcSAndroid Build Coastguard Worker	adc	x22,x22,x17
411*8fb009dcSAndroid Build Coastguard Worker
412*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x11,x10		// hi(a[5..7]*a[4])
413*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x14
414*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x12,x10
415*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x15
416*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x13,x10
417*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x16
418*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x11		// lo(a[6..7]*a[5])		(vi)
419*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr		// t[12]
420*8fb009dcSAndroid Build Coastguard Worker	adds	x21,x21,x17
421*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x11
422*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x14
423*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x12,x11		// hi(a[6..7]*a[5])
424*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x15
425*8fb009dcSAndroid Build Coastguard Worker
426*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x13,x11
427*8fb009dcSAndroid Build Coastguard Worker	adds	x22,x22,x16
428*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x13,x12		// lo(a[7]*a[6])		(vii)
429*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x17
430*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x12		// hi(a[7]*a[6])
431*8fb009dcSAndroid Build Coastguard Worker	adc	x24,xzr,xzr		// t[13]
432*8fb009dcSAndroid Build Coastguard Worker	adds	x23,x23,x14
433*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x3,x1	// done yet?
434*8fb009dcSAndroid Build Coastguard Worker	adc	x24,x24,x15
435*8fb009dcSAndroid Build Coastguard Worker
436*8fb009dcSAndroid Build Coastguard Worker	adds	x24,x24,x16
437*8fb009dcSAndroid Build Coastguard Worker	sub	x14,x3,x5	// rewinded ap
438*8fb009dcSAndroid Build Coastguard Worker	adc	x25,xzr,xzr		// t[14]
439*8fb009dcSAndroid Build Coastguard Worker	add	x25,x25,x17
440*8fb009dcSAndroid Build Coastguard Worker
441*8fb009dcSAndroid Build Coastguard Worker	cbz	x27,Lsqr8x_outer_break
442*8fb009dcSAndroid Build Coastguard Worker
443*8fb009dcSAndroid Build Coastguard Worker	mov	x4,x6
444*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x2,#8*0]
445*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2,#8*2]
446*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#8*4]
447*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x2,#8*6]
448*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x6
449*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x7
450*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
451*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x8
452*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x9
453*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
454*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x10
455*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x11
456*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
457*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x12
458*8fb009dcSAndroid Build Coastguard Worker	mov	x0,x1
459*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,xzr,x13
460*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
461*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
462*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved below
463*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#-8*8
464*8fb009dcSAndroid Build Coastguard Worker
465*8fb009dcSAndroid Build Coastguard Worker	//                                                         a[8]a[0]
466*8fb009dcSAndroid Build Coastguard Worker	//                                                     a[9]a[0]
467*8fb009dcSAndroid Build Coastguard Worker	//                                                 a[a]a[0]
468*8fb009dcSAndroid Build Coastguard Worker	//                                             a[b]a[0]
469*8fb009dcSAndroid Build Coastguard Worker	//                                         a[c]a[0]
470*8fb009dcSAndroid Build Coastguard Worker	//                                     a[d]a[0]
471*8fb009dcSAndroid Build Coastguard Worker	//                                 a[e]a[0]
472*8fb009dcSAndroid Build Coastguard Worker	//                             a[f]a[0]
473*8fb009dcSAndroid Build Coastguard Worker	//                                                     a[8]a[1]
474*8fb009dcSAndroid Build Coastguard Worker	//                         a[f]a[1]........................
475*8fb009dcSAndroid Build Coastguard Worker	//                                                 a[8]a[2]
476*8fb009dcSAndroid Build Coastguard Worker	//                     a[f]a[2]........................
477*8fb009dcSAndroid Build Coastguard Worker	//                                             a[8]a[3]
478*8fb009dcSAndroid Build Coastguard Worker	//                 a[f]a[3]........................
479*8fb009dcSAndroid Build Coastguard Worker	//                                         a[8]a[4]
480*8fb009dcSAndroid Build Coastguard Worker	//             a[f]a[4]........................
481*8fb009dcSAndroid Build Coastguard Worker	//                                     a[8]a[5]
482*8fb009dcSAndroid Build Coastguard Worker	//         a[f]a[5]........................
483*8fb009dcSAndroid Build Coastguard Worker	//                                 a[8]a[6]
484*8fb009dcSAndroid Build Coastguard Worker	//     a[f]a[6]........................
485*8fb009dcSAndroid Build Coastguard Worker	//                             a[8]a[7]
486*8fb009dcSAndroid Build Coastguard Worker	// a[f]a[7]........................
487*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_mul:
488*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x6,x4
489*8fb009dcSAndroid Build Coastguard Worker	adc	x28,xzr,xzr		// carry bit, modulo-scheduled
490*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x7,x4
491*8fb009dcSAndroid Build Coastguard Worker	add	x27,x27,#8
492*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x8,x4
493*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x9,x4
494*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
495*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x4
496*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
497*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x4
498*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
499*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x4
500*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x17
501*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x4
502*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x14
503*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x6,x4
504*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
505*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x7,x4
506*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
507*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x8,x4
508*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x17
509*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x9,x4
510*8fb009dcSAndroid Build Coastguard Worker	adc	x28,x28,xzr
511*8fb009dcSAndroid Build Coastguard Worker	str	x19,[x2],#8
512*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x20,x14
513*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x4
514*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x15
515*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x4
516*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x16
517*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x4
518*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x17
519*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x4
520*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x0,x27]
521*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x24,x14
522*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x25,x15
523*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x26,x16
524*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x28,x17
525*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved above
526*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,Lsqr8x_mul
527*8fb009dcSAndroid Build Coastguard Worker					// note that carry flag is guaranteed
528*8fb009dcSAndroid Build Coastguard Worker					// to be zero at this point
529*8fb009dcSAndroid Build Coastguard Worker	cmp	x1,x3		// done yet?
530*8fb009dcSAndroid Build Coastguard Worker	b.eq	Lsqr8x_break
531*8fb009dcSAndroid Build Coastguard Worker
532*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x2,#8*0]
533*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2,#8*2]
534*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#8*4]
535*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x2,#8*6]
536*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x6
537*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x0,#-8*8]
538*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x7
539*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
540*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x8
541*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x9
542*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
543*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x10
544*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x11
545*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
546*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x12
547*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#-8*8
548*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x13
549*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
550*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
551*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved above
552*8fb009dcSAndroid Build Coastguard Worker	b	Lsqr8x_mul
553*8fb009dcSAndroid Build Coastguard Worker
554*8fb009dcSAndroid Build Coastguard Worker.align	4
555*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_break:
556*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x0,#8*0]
557*8fb009dcSAndroid Build Coastguard Worker	add	x1,x0,#8*8
558*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x0,#8*2]
559*8fb009dcSAndroid Build Coastguard Worker	sub	x14,x3,x1		// is it last iteration?
560*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x0,#8*4]
561*8fb009dcSAndroid Build Coastguard Worker	sub	x15,x2,x14
562*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x0,#8*6]
563*8fb009dcSAndroid Build Coastguard Worker	cbz	x14,Lsqr8x_outer_loop
564*8fb009dcSAndroid Build Coastguard Worker
565*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2,#8*0]
566*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x15,#8*0]
567*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
568*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x15,#8*2]
569*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
570*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x15,#8*4]
571*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
572*8fb009dcSAndroid Build Coastguard Worker	mov	x2,x15
573*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x15,#8*6]
574*8fb009dcSAndroid Build Coastguard Worker	b	Lsqr8x_outer_loop
575*8fb009dcSAndroid Build Coastguard Worker
576*8fb009dcSAndroid Build Coastguard Worker.align	4
577*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_outer_break:
578*8fb009dcSAndroid Build Coastguard Worker	// Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
579*8fb009dcSAndroid Build Coastguard Worker	ldp	x7,x9,[x14,#8*0]	// recall that x14 is &a[0]
580*8fb009dcSAndroid Build Coastguard Worker	ldp	x15,x16,[sp,#8*1]
581*8fb009dcSAndroid Build Coastguard Worker	ldp	x11,x13,[x14,#8*2]
582*8fb009dcSAndroid Build Coastguard Worker	add	x1,x14,#8*4
583*8fb009dcSAndroid Build Coastguard Worker	ldp	x17,x14,[sp,#8*3]
584*8fb009dcSAndroid Build Coastguard Worker
585*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2,#8*0]
586*8fb009dcSAndroid Build Coastguard Worker	mul	x19,x7,x7
587*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
588*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x7
589*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
590*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x9,x9
591*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
592*8fb009dcSAndroid Build Coastguard Worker	mov	x2,sp
593*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x9,x9
594*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x7,x15,lsl#1
595*8fb009dcSAndroid Build Coastguard Worker	extr	x15,x16,x15,#63
596*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x5,#8*4
597*8fb009dcSAndroid Build Coastguard Worker
598*8fb009dcSAndroid Build Coastguard WorkerLsqr4x_shift_n_add:
599*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x8,x15
600*8fb009dcSAndroid Build Coastguard Worker	extr	x16,x17,x16,#63
601*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#8*4
602*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x9,x16
603*8fb009dcSAndroid Build Coastguard Worker	ldp	x15,x16,[x2,#8*5]
604*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x11,x11
605*8fb009dcSAndroid Build Coastguard Worker	ldp	x7,x9,[x1],#8*2
606*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x11,x11
607*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x13,x13
608*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x13,x13
609*8fb009dcSAndroid Build Coastguard Worker	extr	x17,x14,x17,#63
610*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2,#8*0]
611*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x10,x17
612*8fb009dcSAndroid Build Coastguard Worker	extr	x14,x15,x14,#63
613*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
614*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x11,x14
615*8fb009dcSAndroid Build Coastguard Worker	ldp	x17,x14,[x2,#8*7]
616*8fb009dcSAndroid Build Coastguard Worker	extr	x15,x16,x15,#63
617*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x12,x15
618*8fb009dcSAndroid Build Coastguard Worker	extr	x16,x17,x16,#63
619*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x13,x16
620*8fb009dcSAndroid Build Coastguard Worker	ldp	x15,x16,[x2,#8*9]
621*8fb009dcSAndroid Build Coastguard Worker	mul	x6,x7,x7
622*8fb009dcSAndroid Build Coastguard Worker	ldp	x11,x13,[x1],#8*2
623*8fb009dcSAndroid Build Coastguard Worker	umulh	x7,x7,x7
624*8fb009dcSAndroid Build Coastguard Worker	mul	x8,x9,x9
625*8fb009dcSAndroid Build Coastguard Worker	umulh	x9,x9,x9
626*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
627*8fb009dcSAndroid Build Coastguard Worker	extr	x17,x14,x17,#63
628*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
629*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*8
630*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x6,x17
631*8fb009dcSAndroid Build Coastguard Worker	extr	x14,x15,x14,#63
632*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x7,x14
633*8fb009dcSAndroid Build Coastguard Worker	ldp	x17,x14,[x2,#8*3]
634*8fb009dcSAndroid Build Coastguard Worker	extr	x15,x16,x15,#63
635*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,Lsqr4x_shift_n_add
636*8fb009dcSAndroid Build Coastguard Worker	ldp	x1,x4,[x29,#104]	// pull np and n0
637*8fb009dcSAndroid Build Coastguard Worker
638*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x8,x15
639*8fb009dcSAndroid Build Coastguard Worker	extr	x16,x17,x16,#63
640*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x9,x16
641*8fb009dcSAndroid Build Coastguard Worker	ldp	x15,x16,[x2,#8*5]
642*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x11,x11
643*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x11,x11
644*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x2,#8*0]
645*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x13,x13
646*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x13,x13
647*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
648*8fb009dcSAndroid Build Coastguard Worker	extr	x17,x14,x17,#63
649*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x10,x17
650*8fb009dcSAndroid Build Coastguard Worker	extr	x14,x15,x14,#63
651*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#8*0]
652*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x11,x14
653*8fb009dcSAndroid Build Coastguard Worker	extr	x15,x16,x15,#63
654*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
655*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x12,x15
656*8fb009dcSAndroid Build Coastguard Worker	extr	x16,xzr,x16,#63
657*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
658*8fb009dcSAndroid Build Coastguard Worker	adc	x26,x13,x16
659*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
660*8fb009dcSAndroid Build Coastguard Worker
661*8fb009dcSAndroid Build Coastguard Worker	// Reduce by 512 bits per iteration
662*8fb009dcSAndroid Build Coastguard Worker	mul	x28,x4,x19		// t[0]*n0
663*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
664*8fb009dcSAndroid Build Coastguard Worker	add	x3,x1,x5
665*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#8*2]
666*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
667*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[sp,#8*4]
668*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
669*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[sp,#8*6]
670*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
671*8fb009dcSAndroid Build Coastguard Worker	mov	x30,xzr		// initial top-most carry
672*8fb009dcSAndroid Build Coastguard Worker	mov	x2,sp
673*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#8
674*8fb009dcSAndroid Build Coastguard Worker
675*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_reduction:
676*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x14,x6,x28	// lo(n[0-7])*lo(t[0]*n0)
677*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x7,x28
678*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#1
679*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x8,x28
680*8fb009dcSAndroid Build Coastguard Worker	str	x28,[x2],#8		// put aside t[0]*n0 for tail processing
681*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x9,x28
682*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	xzr,x19,x14
683*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x19,#1		// (*)
684*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x28
685*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x20,x15
686*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x28
687*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x16
688*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x28
689*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x17
690*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x28
691*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x14
692*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x6,x28		// hi(n[0-7])*lo(t[0]*n0)
693*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x24,x15
694*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x7,x28
695*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x25,x16
696*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x8,x28
697*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x26,x17
698*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x9,x28
699*8fb009dcSAndroid Build Coastguard Worker	adc	x26,xzr,xzr
700*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
701*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x28
702*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
703*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x28
704*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
705*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x28
706*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x17
707*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x28
708*8fb009dcSAndroid Build Coastguard Worker	mul	x28,x4,x19		// next t[0]*n0
709*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x14
710*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
711*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
712*8fb009dcSAndroid Build Coastguard Worker	adc	x26,x26,x17
713*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,Lsqr8x_reduction
714*8fb009dcSAndroid Build Coastguard Worker
715*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x2,#8*0]
716*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x2,#8*2]
717*8fb009dcSAndroid Build Coastguard Worker	mov	x0,x2
718*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x3,x1	// done yet?
719*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
720*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
721*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x2,#8*4]
722*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
723*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x17
724*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x2,#8*6]
725*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x14
726*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
727*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
728*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x17
729*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved below
730*8fb009dcSAndroid Build Coastguard Worker	cbz	x27,Lsqr8x8_post_condition
731*8fb009dcSAndroid Build Coastguard Worker
732*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x2,#-8*8]
733*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
734*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
735*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
736*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#-8*8
737*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
738*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
739*8fb009dcSAndroid Build Coastguard Worker
740*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_tail:
741*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x6,x4
742*8fb009dcSAndroid Build Coastguard Worker	adc	x28,xzr,xzr		// carry bit, modulo-scheduled
743*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x7,x4
744*8fb009dcSAndroid Build Coastguard Worker	add	x27,x27,#8
745*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x8,x4
746*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x9,x4
747*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x14
748*8fb009dcSAndroid Build Coastguard Worker	mul	x14,x10,x4
749*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x15
750*8fb009dcSAndroid Build Coastguard Worker	mul	x15,x11,x4
751*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x16
752*8fb009dcSAndroid Build Coastguard Worker	mul	x16,x12,x4
753*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x17
754*8fb009dcSAndroid Build Coastguard Worker	mul	x17,x13,x4
755*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x14
756*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x6,x4
757*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x15
758*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x7,x4
759*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x16
760*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x8,x4
761*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x17
762*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x9,x4
763*8fb009dcSAndroid Build Coastguard Worker	adc	x28,x28,xzr
764*8fb009dcSAndroid Build Coastguard Worker	str	x19,[x2],#8
765*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x20,x14
766*8fb009dcSAndroid Build Coastguard Worker	umulh	x14,x10,x4
767*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x15
768*8fb009dcSAndroid Build Coastguard Worker	umulh	x15,x11,x4
769*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x16
770*8fb009dcSAndroid Build Coastguard Worker	umulh	x16,x12,x4
771*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x17
772*8fb009dcSAndroid Build Coastguard Worker	umulh	x17,x13,x4
773*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x0,x27]
774*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x24,x14
775*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x25,x15
776*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x26,x16
777*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x28,x17
778*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved above
779*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,Lsqr8x_tail
780*8fb009dcSAndroid Build Coastguard Worker					// note that carry flag is guaranteed
781*8fb009dcSAndroid Build Coastguard Worker					// to be zero at this point
782*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x2,#8*0]
783*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x3,x1	// done yet?
784*8fb009dcSAndroid Build Coastguard Worker	sub	x16,x3,x5	// rewinded np
785*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x2,#8*2]
786*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x2,#8*4]
787*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x2,#8*6]
788*8fb009dcSAndroid Build Coastguard Worker	cbz	x27,Lsqr8x_tail_break
789*8fb009dcSAndroid Build Coastguard Worker
790*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x0,#-8*8]
791*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x6
792*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x7
793*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
794*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x8
795*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x9
796*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
797*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x10
798*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x11
799*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
800*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x12
801*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#-8*8
802*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x13
803*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
804*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
805*8fb009dcSAndroid Build Coastguard Worker	//adc	x28,xzr,xzr		// moved above
806*8fb009dcSAndroid Build Coastguard Worker	b	Lsqr8x_tail
807*8fb009dcSAndroid Build Coastguard Worker
808*8fb009dcSAndroid Build Coastguard Worker.align	4
809*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_tail_break:
810*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x29,#112]		// pull n0
811*8fb009dcSAndroid Build Coastguard Worker	add	x27,x2,#8*8		// end of current t[num] window
812*8fb009dcSAndroid Build Coastguard Worker
813*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x30,#1		// "move" top-most carry to carry bit
814*8fb009dcSAndroid Build Coastguard Worker	adcs	x14,x19,x6
815*8fb009dcSAndroid Build Coastguard Worker	adcs	x15,x20,x7
816*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x0,#8*0]
817*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x8
818*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x16,#8*0]	// recall that x16 is &n[0]
819*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x9
820*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x16,#8*2]
821*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x10
822*8fb009dcSAndroid Build Coastguard Worker	adcs	x24,x24,x11
823*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x16,#8*4]
824*8fb009dcSAndroid Build Coastguard Worker	adcs	x25,x25,x12
825*8fb009dcSAndroid Build Coastguard Worker	adcs	x26,x26,x13
826*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x16,#8*6]
827*8fb009dcSAndroid Build Coastguard Worker	add	x1,x16,#8*8
828*8fb009dcSAndroid Build Coastguard Worker	adc	x30,xzr,xzr	// top-most carry
829*8fb009dcSAndroid Build Coastguard Worker	mul	x28,x4,x19
830*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x2,#8*0]
831*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x2,#8*2]
832*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x0,#8*2]
833*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[x2,#8*4]
834*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x0,#8*4]
835*8fb009dcSAndroid Build Coastguard Worker	cmp	x27,x29		// did we hit the bottom?
836*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[x2,#8*6]
837*8fb009dcSAndroid Build Coastguard Worker	mov	x2,x0			// slide the window
838*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x0,#8*6]
839*8fb009dcSAndroid Build Coastguard Worker	mov	x27,#8
840*8fb009dcSAndroid Build Coastguard Worker	b.ne	Lsqr8x_reduction
841*8fb009dcSAndroid Build Coastguard Worker
842*8fb009dcSAndroid Build Coastguard Worker	// Final step. We see if result is larger than modulus, and
843*8fb009dcSAndroid Build Coastguard Worker	// if it is, subtract the modulus. But comparison implies
844*8fb009dcSAndroid Build Coastguard Worker	// subtraction. So we subtract modulus, see if it borrowed,
845*8fb009dcSAndroid Build Coastguard Worker	// and conditionally copy original value.
846*8fb009dcSAndroid Build Coastguard Worker	ldr	x0,[x29,#96]		// pull rp
847*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*8
848*8fb009dcSAndroid Build Coastguard Worker	subs	x14,x19,x6
849*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x20,x7
850*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x5,#8*8
851*8fb009dcSAndroid Build Coastguard Worker	mov	x3,x0		// x0 copy
852*8fb009dcSAndroid Build Coastguard Worker
853*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_sub:
854*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x21,x8
855*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
856*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x22,x9
857*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0,#8*0]
858*8fb009dcSAndroid Build Coastguard Worker	sbcs	x14,x23,x10
859*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
860*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x24,x11
861*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#8*2]
862*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x25,x12
863*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x1,#8*4]
864*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x26,x13
865*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x1,#8*6]
866*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*8
867*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x2,#8*0]
868*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#8*8
869*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x2,#8*2]
870*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x2,#8*4]
871*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x2,#8*6]
872*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*8
873*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0,#8*4]
874*8fb009dcSAndroid Build Coastguard Worker	sbcs	x14,x19,x6
875*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#8*6]
876*8fb009dcSAndroid Build Coastguard Worker	add	x0,x0,#8*8
877*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x20,x7
878*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,Lsqr8x_sub
879*8fb009dcSAndroid Build Coastguard Worker
880*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x21,x8
881*8fb009dcSAndroid Build Coastguard Worker	mov	x2,sp
882*8fb009dcSAndroid Build Coastguard Worker	add	x1,sp,x5
883*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x3,#8*0]
884*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x22,x9
885*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0,#8*0]
886*8fb009dcSAndroid Build Coastguard Worker	sbcs	x14,x23,x10
887*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x3,#8*2]
888*8fb009dcSAndroid Build Coastguard Worker	sbcs	x15,x24,x11
889*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#8*2]
890*8fb009dcSAndroid Build Coastguard Worker	sbcs	x16,x25,x12
891*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x1,#8*0]
892*8fb009dcSAndroid Build Coastguard Worker	sbcs	x17,x26,x13
893*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x1,#8*2]
894*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x30,xzr	// did it borrow?
895*8fb009dcSAndroid Build Coastguard Worker	ldr	x30,[x29,#8]		// pull return address
896*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x0,#8*4]
897*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x0,#8*6]
898*8fb009dcSAndroid Build Coastguard Worker
899*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x5,#8*4
900*8fb009dcSAndroid Build Coastguard WorkerLsqr4x_cond_copy:
901*8fb009dcSAndroid Build Coastguard Worker	sub	x27,x27,#8*4
902*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x19,x6,lo
903*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*0]
904*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x20,x7,lo
905*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x3,#8*4]
906*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x1,#8*4]
907*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x21,x8,lo
908*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*2]
909*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*4
910*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x22,x9,lo
911*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x3,#8*6]
912*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x1,#8*6]
913*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
914*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x3,#8*0]
915*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x3,#8*2]
916*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
917*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x1,#8*0]
918*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x1,#8*2]
919*8fb009dcSAndroid Build Coastguard Worker	cbnz	x27,Lsqr4x_cond_copy
920*8fb009dcSAndroid Build Coastguard Worker
921*8fb009dcSAndroid Build Coastguard Worker	csel	x14,x19,x6,lo
922*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*0]
923*8fb009dcSAndroid Build Coastguard Worker	csel	x15,x20,x7,lo
924*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x2,#8*2]
925*8fb009dcSAndroid Build Coastguard Worker	csel	x16,x21,x8,lo
926*8fb009dcSAndroid Build Coastguard Worker	csel	x17,x22,x9,lo
927*8fb009dcSAndroid Build Coastguard Worker	stp	x14,x15,[x3,#8*0]
928*8fb009dcSAndroid Build Coastguard Worker	stp	x16,x17,[x3,#8*2]
929*8fb009dcSAndroid Build Coastguard Worker
930*8fb009dcSAndroid Build Coastguard Worker	b	Lsqr8x_done
931*8fb009dcSAndroid Build Coastguard Worker
932*8fb009dcSAndroid Build Coastguard Worker.align	4
933*8fb009dcSAndroid Build Coastguard WorkerLsqr8x8_post_condition:
934*8fb009dcSAndroid Build Coastguard Worker	adc	x28,xzr,xzr
935*8fb009dcSAndroid Build Coastguard Worker	ldr	x30,[x29,#8]		// pull return address
936*8fb009dcSAndroid Build Coastguard Worker	// x19-7,x28 hold result, x6-7 hold modulus
937*8fb009dcSAndroid Build Coastguard Worker	subs	x6,x19,x6
938*8fb009dcSAndroid Build Coastguard Worker	ldr	x1,[x29,#96]		// pull rp
939*8fb009dcSAndroid Build Coastguard Worker	sbcs	x7,x20,x7
940*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*0]
941*8fb009dcSAndroid Build Coastguard Worker	sbcs	x8,x21,x8
942*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*2]
943*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x22,x9
944*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*4]
945*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x23,x10
946*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*6]
947*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x24,x11
948*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*8]
949*8fb009dcSAndroid Build Coastguard Worker	sbcs	x12,x25,x12
950*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*10]
951*8fb009dcSAndroid Build Coastguard Worker	sbcs	x13,x26,x13
952*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*12]
953*8fb009dcSAndroid Build Coastguard Worker	sbcs	x28,x28,xzr	// did it borrow?
954*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*14]
955*8fb009dcSAndroid Build Coastguard Worker
956*8fb009dcSAndroid Build Coastguard Worker	// x6-7 hold result-modulus
957*8fb009dcSAndroid Build Coastguard Worker	csel	x6,x19,x6,lo
958*8fb009dcSAndroid Build Coastguard Worker	csel	x7,x20,x7,lo
959*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x21,x8,lo
960*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x22,x9,lo
961*8fb009dcSAndroid Build Coastguard Worker	stp	x6,x7,[x1,#8*0]
962*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x23,x10,lo
963*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x24,x11,lo
964*8fb009dcSAndroid Build Coastguard Worker	stp	x8,x9,[x1,#8*2]
965*8fb009dcSAndroid Build Coastguard Worker	csel	x12,x25,x12,lo
966*8fb009dcSAndroid Build Coastguard Worker	csel	x13,x26,x13,lo
967*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x1,#8*4]
968*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x1,#8*6]
969*8fb009dcSAndroid Build Coastguard Worker
970*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_done:
971*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
972*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x29
973*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
974*8fb009dcSAndroid Build Coastguard Worker	mov	x0,#1
975*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
976*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
977*8fb009dcSAndroid Build Coastguard Worker	ldp	x27,x28,[x29,#80]
978*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#128
979*8fb009dcSAndroid Build Coastguard Worker	// x30 is popped earlier
980*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
981*8fb009dcSAndroid Build Coastguard Worker	ret
982*8fb009dcSAndroid Build Coastguard Worker
983*8fb009dcSAndroid Build Coastguard Worker.def __bn_mul4x_mont
984*8fb009dcSAndroid Build Coastguard Worker   .type 32
985*8fb009dcSAndroid Build Coastguard Worker.endef
986*8fb009dcSAndroid Build Coastguard Worker.align	5
987*8fb009dcSAndroid Build Coastguard Worker__bn_mul4x_mont:
988*8fb009dcSAndroid Build Coastguard Worker	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
989*8fb009dcSAndroid Build Coastguard Worker	// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
990*8fb009dcSAndroid Build Coastguard Worker	// return address.
991*8fb009dcSAndroid Build Coastguard Worker	stp	x29,x30,[sp,#-128]!
992*8fb009dcSAndroid Build Coastguard Worker	add	x29,sp,#0
993*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[sp,#16]
994*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[sp,#32]
995*8fb009dcSAndroid Build Coastguard Worker	stp	x23,x24,[sp,#48]
996*8fb009dcSAndroid Build Coastguard Worker	stp	x25,x26,[sp,#64]
997*8fb009dcSAndroid Build Coastguard Worker	stp	x27,x28,[sp,#80]
998*8fb009dcSAndroid Build Coastguard Worker
999*8fb009dcSAndroid Build Coastguard Worker	sub	x26,sp,x5,lsl#3
1000*8fb009dcSAndroid Build Coastguard Worker	lsl	x5,x5,#3
1001*8fb009dcSAndroid Build Coastguard Worker	ldr	x4,[x4]		// *n0
1002*8fb009dcSAndroid Build Coastguard Worker	sub	sp,x26,#8*4		// alloca
1003*8fb009dcSAndroid Build Coastguard Worker
1004*8fb009dcSAndroid Build Coastguard Worker	add	x10,x2,x5
1005*8fb009dcSAndroid Build Coastguard Worker	add	x27,x1,x5
1006*8fb009dcSAndroid Build Coastguard Worker	stp	x0,x10,[x29,#96]	// offload rp and &b[num]
1007*8fb009dcSAndroid Build Coastguard Worker
1008*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,#8*0]		// b[0]
1009*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]	// a[0..3]
1010*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1011*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1012*8fb009dcSAndroid Build Coastguard Worker	mov	x19,xzr
1013*8fb009dcSAndroid Build Coastguard Worker	mov	x20,xzr
1014*8fb009dcSAndroid Build Coastguard Worker	mov	x21,xzr
1015*8fb009dcSAndroid Build Coastguard Worker	mov	x22,xzr
1016*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]	// n[0..3]
1017*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1018*8fb009dcSAndroid Build Coastguard Worker	adds	x3,x3,#8*4		// clear carry bit
1019*8fb009dcSAndroid Build Coastguard Worker	mov	x0,xzr
1020*8fb009dcSAndroid Build Coastguard Worker	mov	x28,#0
1021*8fb009dcSAndroid Build Coastguard Worker	mov	x26,sp
1022*8fb009dcSAndroid Build Coastguard Worker
1023*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_1st_reduction:
1024*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x24		// lo(a[0..3]*b[0])
1025*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr	// modulo-scheduled
1026*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x24
1027*8fb009dcSAndroid Build Coastguard Worker	add	x28,x28,#8
1028*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x8,x24
1029*8fb009dcSAndroid Build Coastguard Worker	and	x28,x28,#31
1030*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x9,x24
1031*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1032*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x24		// hi(a[0..3]*b[0])
1033*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1034*8fb009dcSAndroid Build Coastguard Worker	mul	x25,x19,x4		// t[0]*n0
1035*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1036*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x24
1037*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1038*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x8,x24
1039*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr
1040*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x9,x24
1041*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,x28]		// next b[i] (or b[0])
1042*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x10
1043*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x10,x14,x25	// lo(n[0..3]*t[0]*n0)
1044*8fb009dcSAndroid Build Coastguard Worker	str	x25,[x26],#8		// put aside t[0]*n0 for tail processing
1045*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x11
1046*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x15,x25
1047*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x12
1048*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x16,x25
1049*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x13		// can't overflow
1050*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x17,x25
1051*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	xzr,x19,x10
1052*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x19,#1		// (*)
1053*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x14,x25		// hi(n[0..3]*t[0]*n0)
1054*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x20,x11
1055*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x15,x25
1056*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x12
1057*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x16,x25
1058*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x13
1059*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x17,x25
1060*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x0
1061*8fb009dcSAndroid Build Coastguard Worker	adc	x0,xzr,xzr
1062*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1063*8fb009dcSAndroid Build Coastguard Worker	sub	x10,x27,x1
1064*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1065*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1066*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1067*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1068*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,Loop_mul4x_1st_reduction
1069*8fb009dcSAndroid Build Coastguard Worker
1070*8fb009dcSAndroid Build Coastguard Worker	cbz	x10,Lmul4x4_post_condition
1071*8fb009dcSAndroid Build Coastguard Worker
1072*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]	// a[4..7]
1073*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1074*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1075*8fb009dcSAndroid Build Coastguard Worker	ldr	x25,[sp]		// a[0]*n0
1076*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]	// n[4..7]
1077*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1078*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1079*8fb009dcSAndroid Build Coastguard Worker
1080*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_1st_tail:
1081*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x24		// lo(a[4..7]*b[i])
1082*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr	// modulo-scheduled
1083*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x24
1084*8fb009dcSAndroid Build Coastguard Worker	add	x28,x28,#8
1085*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x8,x24
1086*8fb009dcSAndroid Build Coastguard Worker	and	x28,x28,#31
1087*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x9,x24
1088*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1089*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x24		// hi(a[4..7]*b[i])
1090*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1091*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x24
1092*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1093*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x8,x24
1094*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1095*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x9,x24
1096*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr
1097*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,x28]		// next b[i] (or b[0])
1098*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x10
1099*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x14,x25		// lo(n[4..7]*a[0]*n0)
1100*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x11
1101*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x15,x25
1102*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x12
1103*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x16,x25
1104*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x13		// can't overflow
1105*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x17,x25
1106*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1107*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x14,x25		// hi(n[4..7]*a[0]*n0)
1108*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1109*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x15,x25
1110*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1111*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x16,x25
1112*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1113*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x0
1114*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x17,x25
1115*8fb009dcSAndroid Build Coastguard Worker	adc	x0,xzr,xzr
1116*8fb009dcSAndroid Build Coastguard Worker	ldr	x25,[sp,x28]		// next t[0]*n0
1117*8fb009dcSAndroid Build Coastguard Worker	str	x19,[x26],#8		// result!!!
1118*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x20,x10
1119*8fb009dcSAndroid Build Coastguard Worker	sub	x10,x27,x1		// done yet?
1120*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x11
1121*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x12
1122*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x13
1123*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1124*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,Loop_mul4x_1st_tail
1125*8fb009dcSAndroid Build Coastguard Worker
1126*8fb009dcSAndroid Build Coastguard Worker	sub	x11,x27,x5	// rewinded x1
1127*8fb009dcSAndroid Build Coastguard Worker	cbz	x10,Lmul4x_proceed
1128*8fb009dcSAndroid Build Coastguard Worker
1129*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
1130*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1131*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1132*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]
1133*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1134*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1135*8fb009dcSAndroid Build Coastguard Worker	b	Loop_mul4x_1st_tail
1136*8fb009dcSAndroid Build Coastguard Worker
1137*8fb009dcSAndroid Build Coastguard Worker.align	5
1138*8fb009dcSAndroid Build Coastguard WorkerLmul4x_proceed:
1139*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,#8*4]!		// *++b
1140*8fb009dcSAndroid Build Coastguard Worker	adc	x30,x0,xzr
1141*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x11,#8*0]	// a[0..3]
1142*8fb009dcSAndroid Build Coastguard Worker	sub	x3,x3,x5		// rewind np
1143*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x11,#8*2]
1144*8fb009dcSAndroid Build Coastguard Worker	add	x1,x11,#8*4
1145*8fb009dcSAndroid Build Coastguard Worker
1146*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x26,#8*0]	// result!!!
1147*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#8*4]	// t[0..3]
1148*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x26,#8*2]	// result!!!
1149*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#8*6]
1150*8fb009dcSAndroid Build Coastguard Worker
1151*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]	// n[0..3]
1152*8fb009dcSAndroid Build Coastguard Worker	mov	x26,sp
1153*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1154*8fb009dcSAndroid Build Coastguard Worker	adds	x3,x3,#8*4		// clear carry bit
1155*8fb009dcSAndroid Build Coastguard Worker	mov	x0,xzr
1156*8fb009dcSAndroid Build Coastguard Worker
1157*8fb009dcSAndroid Build Coastguard Worker.align	4
1158*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_reduction:
1159*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x24		// lo(a[0..3]*b[4])
1160*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr	// modulo-scheduled
1161*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x24
1162*8fb009dcSAndroid Build Coastguard Worker	add	x28,x28,#8
1163*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x8,x24
1164*8fb009dcSAndroid Build Coastguard Worker	and	x28,x28,#31
1165*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x9,x24
1166*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1167*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x24		// hi(a[0..3]*b[4])
1168*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1169*8fb009dcSAndroid Build Coastguard Worker	mul	x25,x19,x4		// t[0]*n0
1170*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1171*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x24
1172*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1173*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x8,x24
1174*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr
1175*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x9,x24
1176*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,x28]		// next b[i]
1177*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x10
1178*8fb009dcSAndroid Build Coastguard Worker	// (*)	mul	x10,x14,x25
1179*8fb009dcSAndroid Build Coastguard Worker	str	x25,[x26],#8		// put aside t[0]*n0 for tail processing
1180*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x11
1181*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x15,x25		// lo(n[0..3]*t[0]*n0
1182*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x12
1183*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x16,x25
1184*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x13		// can't overflow
1185*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x17,x25
1186*8fb009dcSAndroid Build Coastguard Worker	// (*)	adds	xzr,x19,x10
1187*8fb009dcSAndroid Build Coastguard Worker	subs	xzr,x19,#1		// (*)
1188*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x14,x25		// hi(n[0..3]*t[0]*n0
1189*8fb009dcSAndroid Build Coastguard Worker	adcs	x19,x20,x11
1190*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x15,x25
1191*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x12
1192*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x16,x25
1193*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x13
1194*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x17,x25
1195*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x0
1196*8fb009dcSAndroid Build Coastguard Worker	adc	x0,xzr,xzr
1197*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1198*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1199*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1200*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1201*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1202*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,Loop_mul4x_reduction
1203*8fb009dcSAndroid Build Coastguard Worker
1204*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr
1205*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x26,#8*4]	// t[4..7]
1206*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x26,#8*6]
1207*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]	// a[4..7]
1208*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1209*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1210*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1211*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1212*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1213*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1214*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1215*8fb009dcSAndroid Build Coastguard Worker
1216*8fb009dcSAndroid Build Coastguard Worker	ldr	x25,[sp]		// t[0]*n0
1217*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]	// n[4..7]
1218*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1219*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1220*8fb009dcSAndroid Build Coastguard Worker
1221*8fb009dcSAndroid Build Coastguard Worker.align	4
1222*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_tail:
1223*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x6,x24		// lo(a[4..7]*b[4])
1224*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr	// modulo-scheduled
1225*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x7,x24
1226*8fb009dcSAndroid Build Coastguard Worker	add	x28,x28,#8
1227*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x8,x24
1228*8fb009dcSAndroid Build Coastguard Worker	and	x28,x28,#31
1229*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x9,x24
1230*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1231*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x6,x24		// hi(a[4..7]*b[4])
1232*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1233*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x7,x24
1234*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1235*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x8,x24
1236*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1237*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x9,x24
1238*8fb009dcSAndroid Build Coastguard Worker	adc	x23,xzr,xzr
1239*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2,x28]		// next b[i]
1240*8fb009dcSAndroid Build Coastguard Worker	adds	x20,x20,x10
1241*8fb009dcSAndroid Build Coastguard Worker	mul	x10,x14,x25		// lo(n[4..7]*t[0]*n0)
1242*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x11
1243*8fb009dcSAndroid Build Coastguard Worker	mul	x11,x15,x25
1244*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x12
1245*8fb009dcSAndroid Build Coastguard Worker	mul	x12,x16,x25
1246*8fb009dcSAndroid Build Coastguard Worker	adc	x23,x23,x13		// can't overflow
1247*8fb009dcSAndroid Build Coastguard Worker	mul	x13,x17,x25
1248*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1249*8fb009dcSAndroid Build Coastguard Worker	umulh	x10,x14,x25		// hi(n[4..7]*t[0]*n0)
1250*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1251*8fb009dcSAndroid Build Coastguard Worker	umulh	x11,x15,x25
1252*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1253*8fb009dcSAndroid Build Coastguard Worker	umulh	x12,x16,x25
1254*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1255*8fb009dcSAndroid Build Coastguard Worker	umulh	x13,x17,x25
1256*8fb009dcSAndroid Build Coastguard Worker	adcs	x23,x23,x0
1257*8fb009dcSAndroid Build Coastguard Worker	ldr	x25,[sp,x28]		// next a[0]*n0
1258*8fb009dcSAndroid Build Coastguard Worker	adc	x0,xzr,xzr
1259*8fb009dcSAndroid Build Coastguard Worker	str	x19,[x26],#8		// result!!!
1260*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x20,x10
1261*8fb009dcSAndroid Build Coastguard Worker	sub	x10,x27,x1		// done yet?
1262*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x21,x11
1263*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x22,x12
1264*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x23,x13
1265*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1266*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,Loop_mul4x_tail
1267*8fb009dcSAndroid Build Coastguard Worker
1268*8fb009dcSAndroid Build Coastguard Worker	sub	x11,x3,x5		// rewinded np?
1269*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr
1270*8fb009dcSAndroid Build Coastguard Worker	cbz	x10,Loop_mul4x_break
1271*8fb009dcSAndroid Build Coastguard Worker
1272*8fb009dcSAndroid Build Coastguard Worker	ldp	x10,x11,[x26,#8*4]
1273*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x26,#8*6]
1274*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]
1275*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1276*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1277*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x10
1278*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,x11
1279*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,x12
1280*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,x13
1281*8fb009dcSAndroid Build Coastguard Worker	//adc	x0,x0,xzr
1282*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]
1283*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1284*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1285*8fb009dcSAndroid Build Coastguard Worker	b	Loop_mul4x_tail
1286*8fb009dcSAndroid Build Coastguard Worker
1287*8fb009dcSAndroid Build Coastguard Worker.align	4
1288*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_break:
1289*8fb009dcSAndroid Build Coastguard Worker	ldp	x12,x13,[x29,#96]	// pull rp and &b[num]
1290*8fb009dcSAndroid Build Coastguard Worker	adds	x19,x19,x30
1291*8fb009dcSAndroid Build Coastguard Worker	add	x2,x2,#8*4		// bp++
1292*8fb009dcSAndroid Build Coastguard Worker	adcs	x20,x20,xzr
1293*8fb009dcSAndroid Build Coastguard Worker	sub	x1,x1,x5		// rewind ap
1294*8fb009dcSAndroid Build Coastguard Worker	adcs	x21,x21,xzr
1295*8fb009dcSAndroid Build Coastguard Worker	stp	x19,x20,[x26,#8*0]	// result!!!
1296*8fb009dcSAndroid Build Coastguard Worker	adcs	x22,x22,xzr
1297*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[sp,#8*4]	// t[0..3]
1298*8fb009dcSAndroid Build Coastguard Worker	adc	x30,x0,xzr
1299*8fb009dcSAndroid Build Coastguard Worker	stp	x21,x22,[x26,#8*2]	// result!!!
1300*8fb009dcSAndroid Build Coastguard Worker	cmp	x2,x13			// done yet?
1301*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[sp,#8*6]
1302*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x11,#8*0]	// n[0..3]
1303*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x11,#8*2]
1304*8fb009dcSAndroid Build Coastguard Worker	add	x3,x11,#8*4
1305*8fb009dcSAndroid Build Coastguard Worker	b.eq	Lmul4x_post
1306*8fb009dcSAndroid Build Coastguard Worker
1307*8fb009dcSAndroid Build Coastguard Worker	ldr	x24,[x2]
1308*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x1,#8*0]	// a[0..3]
1309*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x1,#8*2]
1310*8fb009dcSAndroid Build Coastguard Worker	adds	x1,x1,#8*4		// clear carry bit
1311*8fb009dcSAndroid Build Coastguard Worker	mov	x0,xzr
1312*8fb009dcSAndroid Build Coastguard Worker	mov	x26,sp
1313*8fb009dcSAndroid Build Coastguard Worker	b	Loop_mul4x_reduction
1314*8fb009dcSAndroid Build Coastguard Worker
1315*8fb009dcSAndroid Build Coastguard Worker.align	4
1316*8fb009dcSAndroid Build Coastguard WorkerLmul4x_post:
1317*8fb009dcSAndroid Build Coastguard Worker	// Final step. We see if result is larger than modulus, and
1318*8fb009dcSAndroid Build Coastguard Worker	// if it is, subtract the modulus. But comparison implies
1319*8fb009dcSAndroid Build Coastguard Worker	// subtraction. So we subtract modulus, see if it borrowed,
1320*8fb009dcSAndroid Build Coastguard Worker	// and conditionally copy original value.
1321*8fb009dcSAndroid Build Coastguard Worker	mov	x0,x12
1322*8fb009dcSAndroid Build Coastguard Worker	mov	x27,x12		// x0 copy
1323*8fb009dcSAndroid Build Coastguard Worker	subs	x10,x19,x14
1324*8fb009dcSAndroid Build Coastguard Worker	add	x26,sp,#8*8
1325*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x20,x15
1326*8fb009dcSAndroid Build Coastguard Worker	sub	x28,x5,#8*4
1327*8fb009dcSAndroid Build Coastguard Worker
1328*8fb009dcSAndroid Build Coastguard WorkerLmul4x_sub:
1329*8fb009dcSAndroid Build Coastguard Worker	sbcs	x12,x21,x16
1330*8fb009dcSAndroid Build Coastguard Worker	ldp	x14,x15,[x3,#8*0]
1331*8fb009dcSAndroid Build Coastguard Worker	sub	x28,x28,#8*4
1332*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x26,#8*0]
1333*8fb009dcSAndroid Build Coastguard Worker	sbcs	x13,x22,x17
1334*8fb009dcSAndroid Build Coastguard Worker	ldp	x16,x17,[x3,#8*2]
1335*8fb009dcSAndroid Build Coastguard Worker	add	x3,x3,#8*4
1336*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x26,#8*2]
1337*8fb009dcSAndroid Build Coastguard Worker	add	x26,x26,#8*4
1338*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x0,#8*0]
1339*8fb009dcSAndroid Build Coastguard Worker	sbcs	x10,x19,x14
1340*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x0,#8*2]
1341*8fb009dcSAndroid Build Coastguard Worker	add	x0,x0,#8*4
1342*8fb009dcSAndroid Build Coastguard Worker	sbcs	x11,x20,x15
1343*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,Lmul4x_sub
1344*8fb009dcSAndroid Build Coastguard Worker
1345*8fb009dcSAndroid Build Coastguard Worker	sbcs	x12,x21,x16
1346*8fb009dcSAndroid Build Coastguard Worker	mov	x26,sp
1347*8fb009dcSAndroid Build Coastguard Worker	add	x1,sp,#8*4
1348*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x27,#8*0]
1349*8fb009dcSAndroid Build Coastguard Worker	sbcs	x13,x22,x17
1350*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x0,#8*0]
1351*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x27,#8*2]
1352*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x0,#8*2]
1353*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x1,#8*0]
1354*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x1,#8*2]
1355*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x30,xzr	// did it borrow?
1356*8fb009dcSAndroid Build Coastguard Worker	ldr	x30,[x29,#8]		// pull return address
1357*8fb009dcSAndroid Build Coastguard Worker
1358*8fb009dcSAndroid Build Coastguard Worker	sub	x28,x5,#8*4
1359*8fb009dcSAndroid Build Coastguard WorkerLmul4x_cond_copy:
1360*8fb009dcSAndroid Build Coastguard Worker	sub	x28,x28,#8*4
1361*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x19,x6,lo
1362*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*0]
1363*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x20,x7,lo
1364*8fb009dcSAndroid Build Coastguard Worker	ldp	x6,x7,[x27,#8*4]
1365*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x1,#8*4]
1366*8fb009dcSAndroid Build Coastguard Worker	csel	x12,x21,x8,lo
1367*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*2]
1368*8fb009dcSAndroid Build Coastguard Worker	add	x26,x26,#8*4
1369*8fb009dcSAndroid Build Coastguard Worker	csel	x13,x22,x9,lo
1370*8fb009dcSAndroid Build Coastguard Worker	ldp	x8,x9,[x27,#8*6]
1371*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x1,#8*6]
1372*8fb009dcSAndroid Build Coastguard Worker	add	x1,x1,#8*4
1373*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x27,#8*0]
1374*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x27,#8*2]
1375*8fb009dcSAndroid Build Coastguard Worker	add	x27,x27,#8*4
1376*8fb009dcSAndroid Build Coastguard Worker	cbnz	x28,Lmul4x_cond_copy
1377*8fb009dcSAndroid Build Coastguard Worker
1378*8fb009dcSAndroid Build Coastguard Worker	csel	x10,x19,x6,lo
1379*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*0]
1380*8fb009dcSAndroid Build Coastguard Worker	csel	x11,x20,x7,lo
1381*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*2]
1382*8fb009dcSAndroid Build Coastguard Worker	csel	x12,x21,x8,lo
1383*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*3]
1384*8fb009dcSAndroid Build Coastguard Worker	csel	x13,x22,x9,lo
1385*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[x26,#8*4]
1386*8fb009dcSAndroid Build Coastguard Worker	stp	x10,x11,[x27,#8*0]
1387*8fb009dcSAndroid Build Coastguard Worker	stp	x12,x13,[x27,#8*2]
1388*8fb009dcSAndroid Build Coastguard Worker
1389*8fb009dcSAndroid Build Coastguard Worker	b	Lmul4x_done
1390*8fb009dcSAndroid Build Coastguard Worker
1391*8fb009dcSAndroid Build Coastguard Worker.align	4
1392*8fb009dcSAndroid Build Coastguard WorkerLmul4x4_post_condition:
1393*8fb009dcSAndroid Build Coastguard Worker	adc	x0,x0,xzr
1394*8fb009dcSAndroid Build Coastguard Worker	ldr	x1,[x29,#96]		// pull rp
1395*8fb009dcSAndroid Build Coastguard Worker	// x19-3,x0 hold result, x14-7 hold modulus
1396*8fb009dcSAndroid Build Coastguard Worker	subs	x6,x19,x14
1397*8fb009dcSAndroid Build Coastguard Worker	ldr	x30,[x29,#8]		// pull return address
1398*8fb009dcSAndroid Build Coastguard Worker	sbcs	x7,x20,x15
1399*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*0]
1400*8fb009dcSAndroid Build Coastguard Worker	sbcs	x8,x21,x16
1401*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*2]
1402*8fb009dcSAndroid Build Coastguard Worker	sbcs	x9,x22,x17
1403*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*4]
1404*8fb009dcSAndroid Build Coastguard Worker	sbcs	xzr,x0,xzr		// did it borrow?
1405*8fb009dcSAndroid Build Coastguard Worker	stp	xzr,xzr,[sp,#8*6]
1406*8fb009dcSAndroid Build Coastguard Worker
1407*8fb009dcSAndroid Build Coastguard Worker	// x6-3 hold result-modulus
1408*8fb009dcSAndroid Build Coastguard Worker	csel	x6,x19,x6,lo
1409*8fb009dcSAndroid Build Coastguard Worker	csel	x7,x20,x7,lo
1410*8fb009dcSAndroid Build Coastguard Worker	csel	x8,x21,x8,lo
1411*8fb009dcSAndroid Build Coastguard Worker	csel	x9,x22,x9,lo
1412*8fb009dcSAndroid Build Coastguard Worker	stp	x6,x7,[x1,#8*0]
1413*8fb009dcSAndroid Build Coastguard Worker	stp	x8,x9,[x1,#8*2]
1414*8fb009dcSAndroid Build Coastguard Worker
1415*8fb009dcSAndroid Build Coastguard WorkerLmul4x_done:
1416*8fb009dcSAndroid Build Coastguard Worker	ldp	x19,x20,[x29,#16]
1417*8fb009dcSAndroid Build Coastguard Worker	mov	sp,x29
1418*8fb009dcSAndroid Build Coastguard Worker	ldp	x21,x22,[x29,#32]
1419*8fb009dcSAndroid Build Coastguard Worker	mov	x0,#1
1420*8fb009dcSAndroid Build Coastguard Worker	ldp	x23,x24,[x29,#48]
1421*8fb009dcSAndroid Build Coastguard Worker	ldp	x25,x26,[x29,#64]
1422*8fb009dcSAndroid Build Coastguard Worker	ldp	x27,x28,[x29,#80]
1423*8fb009dcSAndroid Build Coastguard Worker	ldr	x29,[sp],#128
1424*8fb009dcSAndroid Build Coastguard Worker	// x30 is popped earlier
1425*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALIDATE_LINK_REGISTER
1426*8fb009dcSAndroid Build Coastguard Worker	ret
1427*8fb009dcSAndroid Build Coastguard Worker
1428*8fb009dcSAndroid Build Coastguard Worker.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1429*8fb009dcSAndroid Build Coastguard Worker.align	2
1430*8fb009dcSAndroid Build Coastguard Worker.align	4
1431*8fb009dcSAndroid Build Coastguard Worker#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32)
1432