xref: /aosp_15_r20/external/boringssl/src/gen/bcm/armv4-mont-linux.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1*8fb009dcSAndroid Build Coastguard Worker// This file is generated from a similarly-named Perl script in the BoringSSL
2*8fb009dcSAndroid Build Coastguard Worker// source tree. Do not edit by hand.
3*8fb009dcSAndroid Build Coastguard Worker
4*8fb009dcSAndroid Build Coastguard Worker#include <openssl/asm_base.h>
5*8fb009dcSAndroid Build Coastguard Worker
6*8fb009dcSAndroid Build Coastguard Worker#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
7*8fb009dcSAndroid Build Coastguard Worker#include <openssl/arm_arch.h>
8*8fb009dcSAndroid Build Coastguard Worker
9*8fb009dcSAndroid Build Coastguard Worker@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
10*8fb009dcSAndroid Build Coastguard Worker@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
11*8fb009dcSAndroid Build Coastguard Worker.arch	armv7-a
12*8fb009dcSAndroid Build Coastguard Worker
13*8fb009dcSAndroid Build Coastguard Worker.text
14*8fb009dcSAndroid Build Coastguard Worker#if defined(__thumb2__)
15*8fb009dcSAndroid Build Coastguard Worker.syntax	unified
16*8fb009dcSAndroid Build Coastguard Worker.thumb
17*8fb009dcSAndroid Build Coastguard Worker#else
18*8fb009dcSAndroid Build Coastguard Worker.code	32
19*8fb009dcSAndroid Build Coastguard Worker#endif
20*8fb009dcSAndroid Build Coastguard Worker
21*8fb009dcSAndroid Build Coastguard Worker.globl	bn_mul_mont_nohw
22*8fb009dcSAndroid Build Coastguard Worker.hidden	bn_mul_mont_nohw
23*8fb009dcSAndroid Build Coastguard Worker.type	bn_mul_mont_nohw,%function
24*8fb009dcSAndroid Build Coastguard Worker
25*8fb009dcSAndroid Build Coastguard Worker.align	5
26*8fb009dcSAndroid Build Coastguard Workerbn_mul_mont_nohw:
27*8fb009dcSAndroid Build Coastguard Worker	ldr	ip,[sp,#4]		@ load num
28*8fb009dcSAndroid Build Coastguard Worker	stmdb	sp!,{r0,r2}		@ sp points at argument block
29*8fb009dcSAndroid Build Coastguard Worker	cmp	ip,#2
30*8fb009dcSAndroid Build Coastguard Worker	mov	r0,ip			@ load num
31*8fb009dcSAndroid Build Coastguard Worker#ifdef	__thumb2__
32*8fb009dcSAndroid Build Coastguard Worker	ittt	lt
33*8fb009dcSAndroid Build Coastguard Worker#endif
34*8fb009dcSAndroid Build Coastguard Worker	movlt	r0,#0
35*8fb009dcSAndroid Build Coastguard Worker	addlt	sp,sp,#2*4
36*8fb009dcSAndroid Build Coastguard Worker	blt	.Labrt
37*8fb009dcSAndroid Build Coastguard Worker
38*8fb009dcSAndroid Build Coastguard Worker	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ save 10 registers
39*8fb009dcSAndroid Build Coastguard Worker
40*8fb009dcSAndroid Build Coastguard Worker	mov	r0,r0,lsl#2		@ rescale r0 for byte count
41*8fb009dcSAndroid Build Coastguard Worker	sub	sp,sp,r0		@ alloca(4*num)
42*8fb009dcSAndroid Build Coastguard Worker	sub	sp,sp,#4		@ +extra dword
43*8fb009dcSAndroid Build Coastguard Worker	sub	r0,r0,#4		@ "num=num-1"
44*8fb009dcSAndroid Build Coastguard Worker	add	r4,r2,r0		@ &bp[num-1]
45*8fb009dcSAndroid Build Coastguard Worker
46*8fb009dcSAndroid Build Coastguard Worker	add	r0,sp,r0		@ r0 to point at &tp[num-1]
47*8fb009dcSAndroid Build Coastguard Worker	ldr	r8,[r0,#14*4]		@ &n0
48*8fb009dcSAndroid Build Coastguard Worker	ldr	r2,[r2]		@ bp[0]
49*8fb009dcSAndroid Build Coastguard Worker	ldr	r5,[r1],#4		@ ap[0],ap++
50*8fb009dcSAndroid Build Coastguard Worker	ldr	r6,[r3],#4		@ np[0],np++
51*8fb009dcSAndroid Build Coastguard Worker	ldr	r8,[r8]		@ *n0
52*8fb009dcSAndroid Build Coastguard Worker	str	r4,[r0,#15*4]		@ save &bp[num]
53*8fb009dcSAndroid Build Coastguard Worker
54*8fb009dcSAndroid Build Coastguard Worker	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
55*8fb009dcSAndroid Build Coastguard Worker	str	r8,[r0,#14*4]		@ save n0 value
56*8fb009dcSAndroid Build Coastguard Worker	mul	r8,r10,r8		@ "tp[0]"*n0
57*8fb009dcSAndroid Build Coastguard Worker	mov	r12,#0
58*8fb009dcSAndroid Build Coastguard Worker	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
59*8fb009dcSAndroid Build Coastguard Worker	mov	r4,sp
60*8fb009dcSAndroid Build Coastguard Worker
61*8fb009dcSAndroid Build Coastguard Worker.L1st:
62*8fb009dcSAndroid Build Coastguard Worker	ldr	r5,[r1],#4		@ ap[j],ap++
63*8fb009dcSAndroid Build Coastguard Worker	mov	r10,r11
64*8fb009dcSAndroid Build Coastguard Worker	ldr	r6,[r3],#4		@ np[j],np++
65*8fb009dcSAndroid Build Coastguard Worker	mov	r11,#0
66*8fb009dcSAndroid Build Coastguard Worker	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
67*8fb009dcSAndroid Build Coastguard Worker	mov	r14,#0
68*8fb009dcSAndroid Build Coastguard Worker	umlal	r12,r14,r6,r8	@ np[j]*n0
69*8fb009dcSAndroid Build Coastguard Worker	adds	r12,r12,r10
70*8fb009dcSAndroid Build Coastguard Worker	str	r12,[r4],#4		@ tp[j-1]=,tp++
71*8fb009dcSAndroid Build Coastguard Worker	adc	r12,r14,#0
72*8fb009dcSAndroid Build Coastguard Worker	cmp	r4,r0
73*8fb009dcSAndroid Build Coastguard Worker	bne	.L1st
74*8fb009dcSAndroid Build Coastguard Worker
75*8fb009dcSAndroid Build Coastguard Worker	adds	r12,r12,r11
76*8fb009dcSAndroid Build Coastguard Worker	ldr	r4,[r0,#13*4]		@ restore bp
77*8fb009dcSAndroid Build Coastguard Worker	mov	r14,#0
78*8fb009dcSAndroid Build Coastguard Worker	ldr	r8,[r0,#14*4]		@ restore n0
79*8fb009dcSAndroid Build Coastguard Worker	adc	r14,r14,#0
80*8fb009dcSAndroid Build Coastguard Worker	str	r12,[r0]		@ tp[num-1]=
81*8fb009dcSAndroid Build Coastguard Worker	mov	r7,sp
82*8fb009dcSAndroid Build Coastguard Worker	str	r14,[r0,#4]		@ tp[num]=
83*8fb009dcSAndroid Build Coastguard Worker
84*8fb009dcSAndroid Build Coastguard Worker.Louter:
85*8fb009dcSAndroid Build Coastguard Worker	sub	r7,r0,r7		@ "original" r0-1 value
86*8fb009dcSAndroid Build Coastguard Worker	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
87*8fb009dcSAndroid Build Coastguard Worker	ldr	r2,[r4,#4]!		@ *(++bp)
88*8fb009dcSAndroid Build Coastguard Worker	sub	r3,r3,r7		@ "rewind" np to &np[1]
89*8fb009dcSAndroid Build Coastguard Worker	ldr	r5,[r1,#-4]		@ ap[0]
90*8fb009dcSAndroid Build Coastguard Worker	ldr	r10,[sp]		@ tp[0]
91*8fb009dcSAndroid Build Coastguard Worker	ldr	r6,[r3,#-4]		@ np[0]
92*8fb009dcSAndroid Build Coastguard Worker	ldr	r7,[sp,#4]		@ tp[1]
93*8fb009dcSAndroid Build Coastguard Worker
94*8fb009dcSAndroid Build Coastguard Worker	mov	r11,#0
95*8fb009dcSAndroid Build Coastguard Worker	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
96*8fb009dcSAndroid Build Coastguard Worker	str	r4,[r0,#13*4]		@ save bp
97*8fb009dcSAndroid Build Coastguard Worker	mul	r8,r10,r8
98*8fb009dcSAndroid Build Coastguard Worker	mov	r12,#0
99*8fb009dcSAndroid Build Coastguard Worker	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
100*8fb009dcSAndroid Build Coastguard Worker	mov	r4,sp
101*8fb009dcSAndroid Build Coastguard Worker
102*8fb009dcSAndroid Build Coastguard Worker.Linner:
103*8fb009dcSAndroid Build Coastguard Worker	ldr	r5,[r1],#4		@ ap[j],ap++
104*8fb009dcSAndroid Build Coastguard Worker	adds	r10,r11,r7		@ +=tp[j]
105*8fb009dcSAndroid Build Coastguard Worker	ldr	r6,[r3],#4		@ np[j],np++
106*8fb009dcSAndroid Build Coastguard Worker	mov	r11,#0
107*8fb009dcSAndroid Build Coastguard Worker	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
108*8fb009dcSAndroid Build Coastguard Worker	mov	r14,#0
109*8fb009dcSAndroid Build Coastguard Worker	umlal	r12,r14,r6,r8	@ np[j]*n0
110*8fb009dcSAndroid Build Coastguard Worker	adc	r11,r11,#0
111*8fb009dcSAndroid Build Coastguard Worker	ldr	r7,[r4,#8]		@ tp[j+1]
112*8fb009dcSAndroid Build Coastguard Worker	adds	r12,r12,r10
113*8fb009dcSAndroid Build Coastguard Worker	str	r12,[r4],#4		@ tp[j-1]=,tp++
114*8fb009dcSAndroid Build Coastguard Worker	adc	r12,r14,#0
115*8fb009dcSAndroid Build Coastguard Worker	cmp	r4,r0
116*8fb009dcSAndroid Build Coastguard Worker	bne	.Linner
117*8fb009dcSAndroid Build Coastguard Worker
118*8fb009dcSAndroid Build Coastguard Worker	adds	r12,r12,r11
119*8fb009dcSAndroid Build Coastguard Worker	mov	r14,#0
120*8fb009dcSAndroid Build Coastguard Worker	ldr	r4,[r0,#13*4]		@ restore bp
121*8fb009dcSAndroid Build Coastguard Worker	adc	r14,r14,#0
122*8fb009dcSAndroid Build Coastguard Worker	ldr	r8,[r0,#14*4]		@ restore n0
123*8fb009dcSAndroid Build Coastguard Worker	adds	r12,r12,r7
124*8fb009dcSAndroid Build Coastguard Worker	ldr	r7,[r0,#15*4]		@ restore &bp[num]
125*8fb009dcSAndroid Build Coastguard Worker	adc	r14,r14,#0
126*8fb009dcSAndroid Build Coastguard Worker	str	r12,[r0]		@ tp[num-1]=
127*8fb009dcSAndroid Build Coastguard Worker	str	r14,[r0,#4]		@ tp[num]=
128*8fb009dcSAndroid Build Coastguard Worker
129*8fb009dcSAndroid Build Coastguard Worker	cmp	r4,r7
130*8fb009dcSAndroid Build Coastguard Worker#ifdef	__thumb2__
131*8fb009dcSAndroid Build Coastguard Worker	itt	ne
132*8fb009dcSAndroid Build Coastguard Worker#endif
133*8fb009dcSAndroid Build Coastguard Worker	movne	r7,sp
134*8fb009dcSAndroid Build Coastguard Worker	bne	.Louter
135*8fb009dcSAndroid Build Coastguard Worker
136*8fb009dcSAndroid Build Coastguard Worker	ldr	r2,[r0,#12*4]		@ pull rp
137*8fb009dcSAndroid Build Coastguard Worker	mov	r5,sp
138*8fb009dcSAndroid Build Coastguard Worker	add	r0,r0,#4		@ r0 to point at &tp[num]
139*8fb009dcSAndroid Build Coastguard Worker	sub	r5,r0,r5		@ "original" num value
140*8fb009dcSAndroid Build Coastguard Worker	mov	r4,sp			@ "rewind" r4
141*8fb009dcSAndroid Build Coastguard Worker	mov	r1,r4			@ "borrow" r1
142*8fb009dcSAndroid Build Coastguard Worker	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
143*8fb009dcSAndroid Build Coastguard Worker
144*8fb009dcSAndroid Build Coastguard Worker	subs	r7,r7,r7		@ "clear" carry flag
145*8fb009dcSAndroid Build Coastguard Worker.Lsub:	ldr	r7,[r4],#4
146*8fb009dcSAndroid Build Coastguard Worker	ldr	r6,[r3],#4
147*8fb009dcSAndroid Build Coastguard Worker	sbcs	r7,r7,r6		@ tp[j]-np[j]
148*8fb009dcSAndroid Build Coastguard Worker	str	r7,[r2],#4		@ rp[j]=
149*8fb009dcSAndroid Build Coastguard Worker	teq	r4,r0		@ preserve carry
150*8fb009dcSAndroid Build Coastguard Worker	bne	.Lsub
151*8fb009dcSAndroid Build Coastguard Worker	sbcs	r14,r14,#0		@ upmost carry
152*8fb009dcSAndroid Build Coastguard Worker	mov	r4,sp			@ "rewind" r4
153*8fb009dcSAndroid Build Coastguard Worker	sub	r2,r2,r5		@ "rewind" r2
154*8fb009dcSAndroid Build Coastguard Worker
155*8fb009dcSAndroid Build Coastguard Worker.Lcopy:	ldr	r7,[r4]		@ conditional copy
156*8fb009dcSAndroid Build Coastguard Worker	ldr	r5,[r2]
157*8fb009dcSAndroid Build Coastguard Worker	str	sp,[r4],#4		@ zap tp
158*8fb009dcSAndroid Build Coastguard Worker#ifdef	__thumb2__
159*8fb009dcSAndroid Build Coastguard Worker	it	cc
160*8fb009dcSAndroid Build Coastguard Worker#endif
161*8fb009dcSAndroid Build Coastguard Worker	movcc	r5,r7
162*8fb009dcSAndroid Build Coastguard Worker	str	r5,[r2],#4
163*8fb009dcSAndroid Build Coastguard Worker	teq	r4,r0		@ preserve carry
164*8fb009dcSAndroid Build Coastguard Worker	bne	.Lcopy
165*8fb009dcSAndroid Build Coastguard Worker
166*8fb009dcSAndroid Build Coastguard Worker	mov	sp,r0
167*8fb009dcSAndroid Build Coastguard Worker	add	sp,sp,#4		@ skip over tp[num+1]
168*8fb009dcSAndroid Build Coastguard Worker	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ restore registers
169*8fb009dcSAndroid Build Coastguard Worker	add	sp,sp,#2*4		@ skip over {r0,r2}
170*8fb009dcSAndroid Build Coastguard Worker	mov	r0,#1
171*8fb009dcSAndroid Build Coastguard Worker.Labrt:
172*8fb009dcSAndroid Build Coastguard Worker#if __ARM_ARCH>=5
173*8fb009dcSAndroid Build Coastguard Worker	bx	lr				@ bx lr
174*8fb009dcSAndroid Build Coastguard Worker#else
175*8fb009dcSAndroid Build Coastguard Worker	tst	lr,#1
176*8fb009dcSAndroid Build Coastguard Worker	moveq	pc,lr			@ be binary compatible with V4, yet
177*8fb009dcSAndroid Build Coastguard Worker.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
178*8fb009dcSAndroid Build Coastguard Worker#endif
179*8fb009dcSAndroid Build Coastguard Worker.size	bn_mul_mont_nohw,.-bn_mul_mont_nohw
180*8fb009dcSAndroid Build Coastguard Worker#if __ARM_MAX_ARCH__>=7
181*8fb009dcSAndroid Build Coastguard Worker.arch	armv7-a
182*8fb009dcSAndroid Build Coastguard Worker.fpu	neon
183*8fb009dcSAndroid Build Coastguard Worker
184*8fb009dcSAndroid Build Coastguard Worker.globl	bn_mul8x_mont_neon
185*8fb009dcSAndroid Build Coastguard Worker.hidden	bn_mul8x_mont_neon
186*8fb009dcSAndroid Build Coastguard Worker.type	bn_mul8x_mont_neon,%function
187*8fb009dcSAndroid Build Coastguard Worker.align	5
188*8fb009dcSAndroid Build Coastguard Workerbn_mul8x_mont_neon:
189*8fb009dcSAndroid Build Coastguard Worker	mov	ip,sp
190*8fb009dcSAndroid Build Coastguard Worker	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
191*8fb009dcSAndroid Build Coastguard Worker	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI specification says so
192*8fb009dcSAndroid Build Coastguard Worker	ldmia	ip,{r4,r5}		@ load rest of parameter block
193*8fb009dcSAndroid Build Coastguard Worker	mov	ip,sp
194*8fb009dcSAndroid Build Coastguard Worker
195*8fb009dcSAndroid Build Coastguard Worker	cmp	r5,#8
196*8fb009dcSAndroid Build Coastguard Worker	bhi	.LNEON_8n
197*8fb009dcSAndroid Build Coastguard Worker
198*8fb009dcSAndroid Build Coastguard Worker	@ special case for r5==8, everything is in register bank...
199*8fb009dcSAndroid Build Coastguard Worker
200*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]}, [r2,:32]!
201*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
202*8fb009dcSAndroid Build Coastguard Worker	sub	r7,sp,r5,lsl#4
203*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d0,d1,d2,d3},  [r1]!		@ can't specify :32 :-(
204*8fb009dcSAndroid Build Coastguard Worker	and	r7,r7,#-64
205*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d30[0]}, [r4,:32]
206*8fb009dcSAndroid Build Coastguard Worker	mov	sp,r7			@ alloca
207*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d8
208*8fb009dcSAndroid Build Coastguard Worker
209*8fb009dcSAndroid Build Coastguard Worker	vmull.u32	q6,d28,d0[0]
210*8fb009dcSAndroid Build Coastguard Worker	vmull.u32	q7,d28,d0[1]
211*8fb009dcSAndroid Build Coastguard Worker	vmull.u32	q8,d28,d1[0]
212*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d13,#16
213*8fb009dcSAndroid Build Coastguard Worker	vmull.u32	q9,d28,d1[1]
214*8fb009dcSAndroid Build Coastguard Worker
215*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d12
216*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
217*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
218*8fb009dcSAndroid Build Coastguard Worker
219*8fb009dcSAndroid Build Coastguard Worker	vmull.u32	q10,d28,d2[0]
220*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d4,d5,d6,d7}, [r3]!
221*8fb009dcSAndroid Build Coastguard Worker	vmull.u32	q11,d28,d2[1]
222*8fb009dcSAndroid Build Coastguard Worker	vmull.u32	q12,d28,d3[0]
223*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
224*8fb009dcSAndroid Build Coastguard Worker	vmull.u32	q13,d28,d3[1]
225*8fb009dcSAndroid Build Coastguard Worker
226*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d4[0]
227*8fb009dcSAndroid Build Coastguard Worker	sub	r9,r5,#1
228*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d4[1]
229*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d5[0]
230*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d5[1]
231*8fb009dcSAndroid Build Coastguard Worker
232*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d6[0]
233*8fb009dcSAndroid Build Coastguard Worker	vmov	q5,q6
234*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d6[1]
235*8fb009dcSAndroid Build Coastguard Worker	vmov	q6,q7
236*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d7[0]
237*8fb009dcSAndroid Build Coastguard Worker	vmov	q7,q8
238*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d7[1]
239*8fb009dcSAndroid Build Coastguard Worker	vmov	q8,q9
240*8fb009dcSAndroid Build Coastguard Worker	vmov	q9,q10
241*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d10,#16
242*8fb009dcSAndroid Build Coastguard Worker	vmov	q10,q11
243*8fb009dcSAndroid Build Coastguard Worker	vmov	q11,q12
244*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d10,d10,d11
245*8fb009dcSAndroid Build Coastguard Worker	vmov	q12,q13
246*8fb009dcSAndroid Build Coastguard Worker	veor	q13,q13
247*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d10,#16
248*8fb009dcSAndroid Build Coastguard Worker
249*8fb009dcSAndroid Build Coastguard Worker	b	.LNEON_outer8
250*8fb009dcSAndroid Build Coastguard Worker
251*8fb009dcSAndroid Build Coastguard Worker.align	4
252*8fb009dcSAndroid Build Coastguard Worker.LNEON_outer8:
253*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]}, [r2,:32]!
254*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
255*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d8
256*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d12,d12,d10
257*8fb009dcSAndroid Build Coastguard Worker
258*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d0[0]
259*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d0[1]
260*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d1[0]
261*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d13,#16
262*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d1[1]
263*8fb009dcSAndroid Build Coastguard Worker
264*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d12
265*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
266*8fb009dcSAndroid Build Coastguard Worker	subs	r9,r9,#1
267*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
268*8fb009dcSAndroid Build Coastguard Worker
269*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d2[0]
270*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d2[1]
271*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d3[0]
272*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
273*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d3[1]
274*8fb009dcSAndroid Build Coastguard Worker
275*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d4[0]
276*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d4[1]
277*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d5[0]
278*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d5[1]
279*8fb009dcSAndroid Build Coastguard Worker
280*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d6[0]
281*8fb009dcSAndroid Build Coastguard Worker	vmov	q5,q6
282*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d6[1]
283*8fb009dcSAndroid Build Coastguard Worker	vmov	q6,q7
284*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d7[0]
285*8fb009dcSAndroid Build Coastguard Worker	vmov	q7,q8
286*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d7[1]
287*8fb009dcSAndroid Build Coastguard Worker	vmov	q8,q9
288*8fb009dcSAndroid Build Coastguard Worker	vmov	q9,q10
289*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d10,#16
290*8fb009dcSAndroid Build Coastguard Worker	vmov	q10,q11
291*8fb009dcSAndroid Build Coastguard Worker	vmov	q11,q12
292*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d10,d10,d11
293*8fb009dcSAndroid Build Coastguard Worker	vmov	q12,q13
294*8fb009dcSAndroid Build Coastguard Worker	veor	q13,q13
295*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d10,#16
296*8fb009dcSAndroid Build Coastguard Worker
297*8fb009dcSAndroid Build Coastguard Worker	bne	.LNEON_outer8
298*8fb009dcSAndroid Build Coastguard Worker
299*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d12,d12,d10
300*8fb009dcSAndroid Build Coastguard Worker	mov	r7,sp
301*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d12,#16
302*8fb009dcSAndroid Build Coastguard Worker	mov	r8,r5
303*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d13,d13,d10
304*8fb009dcSAndroid Build Coastguard Worker	add	r6,sp,#96
305*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d13,#16
306*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d12,d13
307*8fb009dcSAndroid Build Coastguard Worker
308*8fb009dcSAndroid Build Coastguard Worker	b	.LNEON_tail_entry
309*8fb009dcSAndroid Build Coastguard Worker
310*8fb009dcSAndroid Build Coastguard Worker.align	4
311*8fb009dcSAndroid Build Coastguard Worker.LNEON_8n:
312*8fb009dcSAndroid Build Coastguard Worker	veor	q6,q6,q6
313*8fb009dcSAndroid Build Coastguard Worker	sub	r7,sp,#128
314*8fb009dcSAndroid Build Coastguard Worker	veor	q7,q7,q7
315*8fb009dcSAndroid Build Coastguard Worker	sub	r7,r7,r5,lsl#4
316*8fb009dcSAndroid Build Coastguard Worker	veor	q8,q8,q8
317*8fb009dcSAndroid Build Coastguard Worker	and	r7,r7,#-64
318*8fb009dcSAndroid Build Coastguard Worker	veor	q9,q9,q9
319*8fb009dcSAndroid Build Coastguard Worker	mov	sp,r7			@ alloca
320*8fb009dcSAndroid Build Coastguard Worker	veor	q10,q10,q10
321*8fb009dcSAndroid Build Coastguard Worker	add	r7,r7,#256
322*8fb009dcSAndroid Build Coastguard Worker	veor	q11,q11,q11
323*8fb009dcSAndroid Build Coastguard Worker	sub	r8,r5,#8
324*8fb009dcSAndroid Build Coastguard Worker	veor	q12,q12,q12
325*8fb009dcSAndroid Build Coastguard Worker	veor	q13,q13,q13
326*8fb009dcSAndroid Build Coastguard Worker
327*8fb009dcSAndroid Build Coastguard Worker.LNEON_8n_init:
328*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q6,q7},[r7,:256]!
329*8fb009dcSAndroid Build Coastguard Worker	subs	r8,r8,#8
330*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q8,q9},[r7,:256]!
331*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q10,q11},[r7,:256]!
332*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q12,q13},[r7,:256]!
333*8fb009dcSAndroid Build Coastguard Worker	bne	.LNEON_8n_init
334*8fb009dcSAndroid Build Coastguard Worker
335*8fb009dcSAndroid Build Coastguard Worker	add	r6,sp,#256
336*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d0,d1,d2,d3},[r1]!
337*8fb009dcSAndroid Build Coastguard Worker	add	r10,sp,#8
338*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d30[0]},[r4,:32]
339*8fb009dcSAndroid Build Coastguard Worker	mov	r9,r5
340*8fb009dcSAndroid Build Coastguard Worker	b	.LNEON_8n_outer
341*8fb009dcSAndroid Build Coastguard Worker
342*8fb009dcSAndroid Build Coastguard Worker.align	4
343*8fb009dcSAndroid Build Coastguard Worker.LNEON_8n_outer:
344*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]},[r2,:32]!	@ *b++
345*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
346*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d8
347*8fb009dcSAndroid Build Coastguard Worker	add	r7,sp,#128
348*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d4,d5,d6,d7},[r3]!
349*8fb009dcSAndroid Build Coastguard Worker
350*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d0[0]
351*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d0[1]
352*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
353*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d1[0]
354*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d13,#16
355*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d1[1]
356*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d12
357*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d2[0]
358*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
359*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d2[1]
360*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d28},[sp,:64]		@ put aside smashed b[8*i+0]
361*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d3[0]
362*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
363*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d3[1]
364*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]},[r2,:32]!	@ *b++
365*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d4[0]
366*8fb009dcSAndroid Build Coastguard Worker	veor	d10,d10,d10
367*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d4[1]
368*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d10
369*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d5[0]
370*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d12,d12,#16
371*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d5[1]
372*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d6[0]
373*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d12,d12,d13
374*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d6[1]
375*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d12,d12,#16
376*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d7[0]
377*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d7[1]
378*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d14,d14,d12
379*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+0]
380*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d0[0]
381*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q6},[r6,:128]!
382*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d0[1]
383*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
384*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d1[0]
385*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d15,#16
386*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d1[1]
387*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d14
388*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d2[0]
389*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
390*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d2[1]
391*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+1]
392*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d3[0]
393*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
394*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d3[1]
395*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]},[r2,:32]!	@ *b++
396*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d4[0]
397*8fb009dcSAndroid Build Coastguard Worker	veor	d10,d10,d10
398*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d4[1]
399*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d10
400*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d5[0]
401*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d14,d14,#16
402*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d5[1]
403*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d6[0]
404*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d14,d14,d15
405*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d6[1]
406*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d14,d14,#16
407*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d7[0]
408*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d7[1]
409*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d16,d16,d14
410*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+1]
411*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d0[0]
412*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q7},[r6,:128]!
413*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d0[1]
414*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
415*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d1[0]
416*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d17,#16
417*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d1[1]
418*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d16
419*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d2[0]
420*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
421*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d2[1]
422*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+2]
423*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d3[0]
424*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
425*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d3[1]
426*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]},[r2,:32]!	@ *b++
427*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d4[0]
428*8fb009dcSAndroid Build Coastguard Worker	veor	d10,d10,d10
429*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d4[1]
430*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d10
431*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d5[0]
432*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d16,d16,#16
433*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d5[1]
434*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d6[0]
435*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d16,d16,d17
436*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d6[1]
437*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d16,d16,#16
438*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d7[0]
439*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d7[1]
440*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d18,d18,d16
441*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+2]
442*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d0[0]
443*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q8},[r6,:128]!
444*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d0[1]
445*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
446*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d1[0]
447*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d19,#16
448*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d1[1]
449*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d18
450*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d2[0]
451*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
452*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d2[1]
453*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+3]
454*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d3[0]
455*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
456*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d3[1]
457*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]},[r2,:32]!	@ *b++
458*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d4[0]
459*8fb009dcSAndroid Build Coastguard Worker	veor	d10,d10,d10
460*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d4[1]
461*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d10
462*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d5[0]
463*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d18,d18,#16
464*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d5[1]
465*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d6[0]
466*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d18,d18,d19
467*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d6[1]
468*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d18,d18,#16
469*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d7[0]
470*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d7[1]
471*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d20,d20,d18
472*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+3]
473*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d0[0]
474*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q9},[r6,:128]!
475*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d0[1]
476*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
477*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d1[0]
478*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d21,#16
479*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d1[1]
480*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d20
481*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d2[0]
482*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
483*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d2[1]
484*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+4]
485*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d3[0]
486*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
487*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d3[1]
488*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]},[r2,:32]!	@ *b++
489*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d4[0]
490*8fb009dcSAndroid Build Coastguard Worker	veor	d10,d10,d10
491*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d4[1]
492*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d10
493*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d5[0]
494*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d20,d20,#16
495*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d5[1]
496*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d6[0]
497*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d20,d20,d21
498*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d6[1]
499*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d20,d20,#16
500*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d7[0]
501*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d7[1]
502*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d22,d22,d20
503*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+4]
504*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d0[0]
505*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q10},[r6,:128]!
506*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d0[1]
507*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
508*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d1[0]
509*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d23,#16
510*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d1[1]
511*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d22
512*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d2[0]
513*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
514*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d2[1]
515*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+5]
516*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d3[0]
517*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
518*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d3[1]
519*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]},[r2,:32]!	@ *b++
520*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d4[0]
521*8fb009dcSAndroid Build Coastguard Worker	veor	d10,d10,d10
522*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d4[1]
523*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d10
524*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d5[0]
525*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d22,d22,#16
526*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d5[1]
527*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d6[0]
528*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d22,d22,d23
529*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d6[1]
530*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d22,d22,#16
531*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d7[0]
532*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d7[1]
533*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d24,d24,d22
534*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+5]
535*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d0[0]
536*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q11},[r6,:128]!
537*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d0[1]
538*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
539*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d1[0]
540*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d25,#16
541*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d1[1]
542*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d24
543*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d2[0]
544*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
545*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d2[1]
546*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+6]
547*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d3[0]
548*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
549*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d3[1]
550*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28[0]},[r2,:32]!	@ *b++
551*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d4[0]
552*8fb009dcSAndroid Build Coastguard Worker	veor	d10,d10,d10
553*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d4[1]
554*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d28,d10
555*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d5[0]
556*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d24,d24,#16
557*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d5[1]
558*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d6[0]
559*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d24,d24,d25
560*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d6[1]
561*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d24,d24,#16
562*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d7[0]
563*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d7[1]
564*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d26,d26,d24
565*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+6]
566*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d0[0]
567*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q12},[r6,:128]!
568*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d0[1]
569*8fb009dcSAndroid Build Coastguard Worker	veor	d8,d8,d8
570*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d1[0]
571*8fb009dcSAndroid Build Coastguard Worker	vshl.i64	d29,d27,#16
572*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d1[1]
573*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d29,d29,d26
574*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d2[0]
575*8fb009dcSAndroid Build Coastguard Worker	vmul.u32	d29,d29,d30
576*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d2[1]
577*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+7]
578*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d3[0]
579*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d29,d8
580*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d3[1]
581*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
582*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d4[0]
583*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d0,d1,d2,d3},[r1]!
584*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d4[1]
585*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d5[0]
586*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d26,d26,#16
587*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d5[1]
588*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d6[0]
589*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d26,d26,d27
590*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d6[1]
591*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d26,d26,#16
592*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d7[0]
593*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d7[1]
594*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d12,d12,d26
595*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d29},[r10,:64]	@ put aside smashed m[8*i+7]
596*8fb009dcSAndroid Build Coastguard Worker	add	r10,sp,#8		@ rewind
597*8fb009dcSAndroid Build Coastguard Worker	sub	r8,r5,#8
598*8fb009dcSAndroid Build Coastguard Worker	b	.LNEON_8n_inner
599*8fb009dcSAndroid Build Coastguard Worker
600*8fb009dcSAndroid Build Coastguard Worker.align	4
601*8fb009dcSAndroid Build Coastguard Worker.LNEON_8n_inner:
602*8fb009dcSAndroid Build Coastguard Worker	subs	r8,r8,#8
603*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d0[0]
604*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q13},[r6,:128]
605*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d0[1]
606*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+0]
607*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d1[0]
608*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d4,d5,d6,d7},[r3]!
609*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d1[1]
610*8fb009dcSAndroid Build Coastguard Worker	it	ne
611*8fb009dcSAndroid Build Coastguard Worker	addne	r6,r6,#16	@ don't advance in last iteration
612*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d2[0]
613*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d2[1]
614*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d3[0]
615*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d3[1]
616*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+1]
617*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d4[0]
618*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d4[1]
619*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d5[0]
620*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d5[1]
621*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d6[0]
622*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d6[1]
623*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d7[0]
624*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d7[1]
625*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q6},[r7,:128]!
626*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d0[0]
627*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q6},[r6,:128]
628*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d0[1]
629*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+1]
630*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d1[0]
631*8fb009dcSAndroid Build Coastguard Worker	it	ne
632*8fb009dcSAndroid Build Coastguard Worker	addne	r6,r6,#16	@ don't advance in last iteration
633*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d1[1]
634*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d2[0]
635*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d2[1]
636*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d3[0]
637*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d3[1]
638*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+2]
639*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d4[0]
640*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d4[1]
641*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d5[0]
642*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d5[1]
643*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d6[0]
644*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d6[1]
645*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d7[0]
646*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d7[1]
647*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q7},[r7,:128]!
648*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d0[0]
649*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q7},[r6,:128]
650*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d0[1]
651*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+2]
652*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d1[0]
653*8fb009dcSAndroid Build Coastguard Worker	it	ne
654*8fb009dcSAndroid Build Coastguard Worker	addne	r6,r6,#16	@ don't advance in last iteration
655*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d1[1]
656*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d2[0]
657*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d2[1]
658*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d3[0]
659*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d3[1]
660*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+3]
661*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d4[0]
662*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d4[1]
663*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d5[0]
664*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d5[1]
665*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d6[0]
666*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d6[1]
667*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d7[0]
668*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d7[1]
669*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q8},[r7,:128]!
670*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d0[0]
671*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q8},[r6,:128]
672*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d0[1]
673*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+3]
674*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d1[0]
675*8fb009dcSAndroid Build Coastguard Worker	it	ne
676*8fb009dcSAndroid Build Coastguard Worker	addne	r6,r6,#16	@ don't advance in last iteration
677*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d1[1]
678*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d2[0]
679*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d2[1]
680*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d3[0]
681*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d3[1]
682*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+4]
683*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d4[0]
684*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d4[1]
685*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d5[0]
686*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d5[1]
687*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d6[0]
688*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d6[1]
689*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d7[0]
690*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d7[1]
691*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q9},[r7,:128]!
692*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d0[0]
693*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q9},[r6,:128]
694*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d0[1]
695*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+4]
696*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d1[0]
697*8fb009dcSAndroid Build Coastguard Worker	it	ne
698*8fb009dcSAndroid Build Coastguard Worker	addne	r6,r6,#16	@ don't advance in last iteration
699*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d1[1]
700*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d2[0]
701*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d2[1]
702*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d3[0]
703*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d3[1]
704*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+5]
705*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d4[0]
706*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d4[1]
707*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d5[0]
708*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d5[1]
709*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d6[0]
710*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d6[1]
711*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d7[0]
712*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d7[1]
713*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q10},[r7,:128]!
714*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d0[0]
715*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q10},[r6,:128]
716*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d0[1]
717*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+5]
718*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d1[0]
719*8fb009dcSAndroid Build Coastguard Worker	it	ne
720*8fb009dcSAndroid Build Coastguard Worker	addne	r6,r6,#16	@ don't advance in last iteration
721*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d1[1]
722*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d2[0]
723*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d2[1]
724*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d3[0]
725*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d3[1]
726*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+6]
727*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d4[0]
728*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d4[1]
729*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d5[0]
730*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d5[1]
731*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d6[0]
732*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d6[1]
733*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d7[0]
734*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d7[1]
735*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q11},[r7,:128]!
736*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d0[0]
737*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q11},[r6,:128]
738*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d0[1]
739*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+6]
740*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d1[0]
741*8fb009dcSAndroid Build Coastguard Worker	it	ne
742*8fb009dcSAndroid Build Coastguard Worker	addne	r6,r6,#16	@ don't advance in last iteration
743*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d1[1]
744*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d2[0]
745*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d2[1]
746*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d3[0]
747*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d3[1]
748*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+7]
749*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d4[0]
750*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d4[1]
751*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d5[0]
752*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d5[1]
753*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d6[0]
754*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d6[1]
755*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d7[0]
756*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d7[1]
757*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q12},[r7,:128]!
758*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d28,d0[0]
759*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q12},[r6,:128]
760*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d28,d0[1]
761*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+7]
762*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d28,d1[0]
763*8fb009dcSAndroid Build Coastguard Worker	it	ne
764*8fb009dcSAndroid Build Coastguard Worker	addne	r6,r6,#16	@ don't advance in last iteration
765*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d28,d1[1]
766*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d28,d2[0]
767*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d28,d2[1]
768*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d28,d3[0]
769*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d28,d3[1]
770*8fb009dcSAndroid Build Coastguard Worker	it	eq
771*8fb009dcSAndroid Build Coastguard Worker	subeq	r1,r1,r5,lsl#2	@ rewind
772*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q13,d29,d4[0]
773*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
774*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q6,d29,d4[1]
775*8fb009dcSAndroid Build Coastguard Worker	vld1.32	{d0,d1,d2,d3},[r1]!
776*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q7,d29,d5[0]
777*8fb009dcSAndroid Build Coastguard Worker	add	r10,sp,#8		@ rewind
778*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q8,d29,d5[1]
779*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q9,d29,d6[0]
780*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q10,d29,d6[1]
781*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q11,d29,d7[0]
782*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q13},[r7,:128]!
783*8fb009dcSAndroid Build Coastguard Worker	vmlal.u32	q12,d29,d7[1]
784*8fb009dcSAndroid Build Coastguard Worker
785*8fb009dcSAndroid Build Coastguard Worker	bne	.LNEON_8n_inner
786*8fb009dcSAndroid Build Coastguard Worker	add	r6,sp,#128
787*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q6,q7},[r7,:256]!
788*8fb009dcSAndroid Build Coastguard Worker	veor	q2,q2,q2		@ d4-d5
789*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q8,q9},[r7,:256]!
790*8fb009dcSAndroid Build Coastguard Worker	veor	q3,q3,q3		@ d6-d7
791*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q10,q11},[r7,:256]!
792*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q12},[r7,:128]
793*8fb009dcSAndroid Build Coastguard Worker
794*8fb009dcSAndroid Build Coastguard Worker	subs	r9,r9,#8
795*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q6,q7},[r6,:256]!
796*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q8,q9},[r6,:256]!
797*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q10,q11},[r6,:256]!
798*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q12,q13},[r6,:256]!
799*8fb009dcSAndroid Build Coastguard Worker
800*8fb009dcSAndroid Build Coastguard Worker	itt	ne
801*8fb009dcSAndroid Build Coastguard Worker	subne	r3,r3,r5,lsl#2	@ rewind
802*8fb009dcSAndroid Build Coastguard Worker	bne	.LNEON_8n_outer
803*8fb009dcSAndroid Build Coastguard Worker
804*8fb009dcSAndroid Build Coastguard Worker	add	r7,sp,#128
805*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q2,q3}, [sp,:256]!	@ start wiping stack frame
806*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d12,#16
807*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q2,q3},[sp,:256]!
808*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d13,d13,d10
809*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q2,q3}, [sp,:256]!
810*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d13,#16
811*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q2,q3}, [sp,:256]!
812*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d12,d13
813*8fb009dcSAndroid Build Coastguard Worker
814*8fb009dcSAndroid Build Coastguard Worker	mov	r8,r5
815*8fb009dcSAndroid Build Coastguard Worker	b	.LNEON_tail_entry
816*8fb009dcSAndroid Build Coastguard Worker
817*8fb009dcSAndroid Build Coastguard Worker.align	4
818*8fb009dcSAndroid Build Coastguard Worker.LNEON_tail:
819*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d12,d12,d10
820*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d12,#16
821*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q8,q9}, [r6, :256]!
822*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d13,d13,d10
823*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q10,q11}, [r6, :256]!
824*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d13,#16
825*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q12,q13}, [r6, :256]!
826*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d12,d13
827*8fb009dcSAndroid Build Coastguard Worker
828*8fb009dcSAndroid Build Coastguard Worker.LNEON_tail_entry:
829*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d14,d14,d10
830*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d12[0]}, [r7, :32]!
831*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d14,#16
832*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d15,d15,d10
833*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d15,#16
834*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d14,d15
835*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d16,d16,d10
836*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d14[0]}, [r7, :32]!
837*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d16,#16
838*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d17,d17,d10
839*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d17,#16
840*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d16,d17
841*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d18,d18,d10
842*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d16[0]}, [r7, :32]!
843*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d18,#16
844*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d19,d19,d10
845*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d19,#16
846*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d18,d19
847*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d20,d20,d10
848*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d18[0]}, [r7, :32]!
849*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d20,#16
850*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d21,d21,d10
851*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d21,#16
852*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d20,d21
853*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d22,d22,d10
854*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d20[0]}, [r7, :32]!
855*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d22,#16
856*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d23,d23,d10
857*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d23,#16
858*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d22,d23
859*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d24,d24,d10
860*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d22[0]}, [r7, :32]!
861*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d24,#16
862*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d25,d25,d10
863*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d25,#16
864*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d24,d25
865*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d26,d26,d10
866*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d24[0]}, [r7, :32]!
867*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d26,#16
868*8fb009dcSAndroid Build Coastguard Worker	vadd.u64	d27,d27,d10
869*8fb009dcSAndroid Build Coastguard Worker	vshr.u64	d10,d27,#16
870*8fb009dcSAndroid Build Coastguard Worker	vzip.16	d26,d27
871*8fb009dcSAndroid Build Coastguard Worker	vld1.64	{q6,q7}, [r6, :256]!
872*8fb009dcSAndroid Build Coastguard Worker	subs	r8,r8,#8
873*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d26[0]},   [r7, :32]!
874*8fb009dcSAndroid Build Coastguard Worker	bne	.LNEON_tail
875*8fb009dcSAndroid Build Coastguard Worker
876*8fb009dcSAndroid Build Coastguard Worker	vst1.32	{d10[0]}, [r7, :32]		@ top-most bit
877*8fb009dcSAndroid Build Coastguard Worker	sub	r3,r3,r5,lsl#2			@ rewind r3
878*8fb009dcSAndroid Build Coastguard Worker	subs	r1,sp,#0				@ clear carry flag
879*8fb009dcSAndroid Build Coastguard Worker	add	r2,sp,r5,lsl#2
880*8fb009dcSAndroid Build Coastguard Worker
881*8fb009dcSAndroid Build Coastguard Worker.LNEON_sub:
882*8fb009dcSAndroid Build Coastguard Worker	ldmia	r1!, {r4,r5,r6,r7}
883*8fb009dcSAndroid Build Coastguard Worker	ldmia	r3!, {r8,r9,r10,r11}
884*8fb009dcSAndroid Build Coastguard Worker	sbcs	r8, r4,r8
885*8fb009dcSAndroid Build Coastguard Worker	sbcs	r9, r5,r9
886*8fb009dcSAndroid Build Coastguard Worker	sbcs	r10,r6,r10
887*8fb009dcSAndroid Build Coastguard Worker	sbcs	r11,r7,r11
888*8fb009dcSAndroid Build Coastguard Worker	teq	r1,r2				@ preserves carry
889*8fb009dcSAndroid Build Coastguard Worker	stmia	r0!, {r8,r9,r10,r11}
890*8fb009dcSAndroid Build Coastguard Worker	bne	.LNEON_sub
891*8fb009dcSAndroid Build Coastguard Worker
892*8fb009dcSAndroid Build Coastguard Worker	ldr	r10, [r1]				@ load top-most bit
893*8fb009dcSAndroid Build Coastguard Worker	mov	r11,sp
894*8fb009dcSAndroid Build Coastguard Worker	veor	q0,q0,q0
895*8fb009dcSAndroid Build Coastguard Worker	sub	r11,r2,r11				@ this is num*4
896*8fb009dcSAndroid Build Coastguard Worker	veor	q1,q1,q1
897*8fb009dcSAndroid Build Coastguard Worker	mov	r1,sp
898*8fb009dcSAndroid Build Coastguard Worker	sub	r0,r0,r11				@ rewind r0
899*8fb009dcSAndroid Build Coastguard Worker	mov	r3,r2				@ second 3/4th of frame
900*8fb009dcSAndroid Build Coastguard Worker	sbcs	r10,r10,#0				@ result is carry flag
901*8fb009dcSAndroid Build Coastguard Worker
902*8fb009dcSAndroid Build Coastguard Worker.LNEON_copy_n_zap:
903*8fb009dcSAndroid Build Coastguard Worker	ldmia	r1!, {r4,r5,r6,r7}
904*8fb009dcSAndroid Build Coastguard Worker	ldmia	r0,  {r8,r9,r10,r11}
905*8fb009dcSAndroid Build Coastguard Worker	it	cc
906*8fb009dcSAndroid Build Coastguard Worker	movcc	r8, r4
907*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
908*8fb009dcSAndroid Build Coastguard Worker	itt	cc
909*8fb009dcSAndroid Build Coastguard Worker	movcc	r9, r5
910*8fb009dcSAndroid Build Coastguard Worker	movcc	r10,r6
911*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
912*8fb009dcSAndroid Build Coastguard Worker	it	cc
913*8fb009dcSAndroid Build Coastguard Worker	movcc	r11,r7
914*8fb009dcSAndroid Build Coastguard Worker	ldmia	r1, {r4,r5,r6,r7}
915*8fb009dcSAndroid Build Coastguard Worker	stmia	r0!, {r8,r9,r10,r11}
916*8fb009dcSAndroid Build Coastguard Worker	sub	r1,r1,#16
917*8fb009dcSAndroid Build Coastguard Worker	ldmia	r0, {r8,r9,r10,r11}
918*8fb009dcSAndroid Build Coastguard Worker	it	cc
919*8fb009dcSAndroid Build Coastguard Worker	movcc	r8, r4
920*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q0,q1}, [r1,:256]!			@ wipe
921*8fb009dcSAndroid Build Coastguard Worker	itt	cc
922*8fb009dcSAndroid Build Coastguard Worker	movcc	r9, r5
923*8fb009dcSAndroid Build Coastguard Worker	movcc	r10,r6
924*8fb009dcSAndroid Build Coastguard Worker	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
925*8fb009dcSAndroid Build Coastguard Worker	it	cc
926*8fb009dcSAndroid Build Coastguard Worker	movcc	r11,r7
927*8fb009dcSAndroid Build Coastguard Worker	teq	r1,r2				@ preserves carry
928*8fb009dcSAndroid Build Coastguard Worker	stmia	r0!, {r8,r9,r10,r11}
929*8fb009dcSAndroid Build Coastguard Worker	bne	.LNEON_copy_n_zap
930*8fb009dcSAndroid Build Coastguard Worker
931*8fb009dcSAndroid Build Coastguard Worker	mov	sp,ip
932*8fb009dcSAndroid Build Coastguard Worker	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
933*8fb009dcSAndroid Build Coastguard Worker	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
934*8fb009dcSAndroid Build Coastguard Worker	bx	lr						@ bx lr
935*8fb009dcSAndroid Build Coastguard Worker.size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
936*8fb009dcSAndroid Build Coastguard Worker#endif
937*8fb009dcSAndroid Build Coastguard Worker.byte	77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
938*8fb009dcSAndroid Build Coastguard Worker.align	2
939*8fb009dcSAndroid Build Coastguard Worker#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
940