xref: /aosp_15_r20/external/boringssl/src/gen/bcm/ghashv8-armv8-win.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1*8fb009dcSAndroid Build Coastguard Worker// This file is generated from a similarly-named Perl script in the BoringSSL
2*8fb009dcSAndroid Build Coastguard Worker// source tree. Do not edit by hand.
3*8fb009dcSAndroid Build Coastguard Worker
4*8fb009dcSAndroid Build Coastguard Worker#include <openssl/asm_base.h>
5*8fb009dcSAndroid Build Coastguard Worker
6*8fb009dcSAndroid Build Coastguard Worker#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32)
7*8fb009dcSAndroid Build Coastguard Worker#include <openssl/arm_arch.h>
8*8fb009dcSAndroid Build Coastguard Worker
9*8fb009dcSAndroid Build Coastguard Worker#if __ARM_MAX_ARCH__>=7
10*8fb009dcSAndroid Build Coastguard Worker.text
11*8fb009dcSAndroid Build Coastguard Worker.arch	armv8-a+crypto
12*8fb009dcSAndroid Build Coastguard Worker.globl	gcm_init_v8
13*8fb009dcSAndroid Build Coastguard Worker
14*8fb009dcSAndroid Build Coastguard Worker.def gcm_init_v8
15*8fb009dcSAndroid Build Coastguard Worker   .type 32
16*8fb009dcSAndroid Build Coastguard Worker.endef
17*8fb009dcSAndroid Build Coastguard Worker.align	4
18*8fb009dcSAndroid Build Coastguard Workergcm_init_v8:
19*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
20*8fb009dcSAndroid Build Coastguard Worker	ld1	{v17.2d},[x1]		//load input H
21*8fb009dcSAndroid Build Coastguard Worker	movi	v19.16b,#0xe1
22*8fb009dcSAndroid Build Coastguard Worker	shl	v19.2d,v19.2d,#57		//0xc2.0
23*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v17.16b,v17.16b,#8
24*8fb009dcSAndroid Build Coastguard Worker	ushr	v18.2d,v19.2d,#63
25*8fb009dcSAndroid Build Coastguard Worker	dup	v17.4s,v17.s[1]
26*8fb009dcSAndroid Build Coastguard Worker	ext	v16.16b,v18.16b,v19.16b,#8		//t0=0xc2....01
27*8fb009dcSAndroid Build Coastguard Worker	ushr	v18.2d,v3.2d,#63
28*8fb009dcSAndroid Build Coastguard Worker	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
29*8fb009dcSAndroid Build Coastguard Worker	and	v18.16b,v18.16b,v16.16b
30*8fb009dcSAndroid Build Coastguard Worker	shl	v3.2d,v3.2d,#1
31*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v18.16b,v18.16b,#8
32*8fb009dcSAndroid Build Coastguard Worker	and	v16.16b,v16.16b,v17.16b
33*8fb009dcSAndroid Build Coastguard Worker	orr	v3.16b,v3.16b,v18.16b		//H<<<=1
34*8fb009dcSAndroid Build Coastguard Worker	eor	v20.16b,v3.16b,v16.16b		//twisted H
35*8fb009dcSAndroid Build Coastguard Worker	st1	{v20.2d},[x0],#16		//store Htable[0]
36*8fb009dcSAndroid Build Coastguard Worker
37*8fb009dcSAndroid Build Coastguard Worker	//calculate H^2
38*8fb009dcSAndroid Build Coastguard Worker	ext	v16.16b,v20.16b,v20.16b,#8		//Karatsuba pre-processing
39*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v20.1d,v20.1d
40*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v16.16b,v20.16b
41*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v20.2d,v20.2d
42*8fb009dcSAndroid Build Coastguard Worker	pmull	v1.1q,v16.1d,v16.1d
43*8fb009dcSAndroid Build Coastguard Worker
44*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
45*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
46*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v17.16b
47*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
48*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase
49*8fb009dcSAndroid Build Coastguard Worker
50*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
51*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
52*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
53*8fb009dcSAndroid Build Coastguard Worker
54*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
55*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
56*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v2.16b
57*8fb009dcSAndroid Build Coastguard Worker	eor	v22.16b,v0.16b,v18.16b
58*8fb009dcSAndroid Build Coastguard Worker
59*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v22.16b,v22.16b,#8		//Karatsuba pre-processing
60*8fb009dcSAndroid Build Coastguard Worker	eor	v17.16b,v17.16b,v22.16b
61*8fb009dcSAndroid Build Coastguard Worker	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
62*8fb009dcSAndroid Build Coastguard Worker	st1	{v21.2d,v22.2d},[x0],#32	//store Htable[1..2]
63*8fb009dcSAndroid Build Coastguard Worker	//calculate H^3 and H^4
64*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v20.1d, v22.1d
65*8fb009dcSAndroid Build Coastguard Worker	pmull	v5.1q,v22.1d,v22.1d
66*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v20.2d, v22.2d
67*8fb009dcSAndroid Build Coastguard Worker	pmull2	v7.1q,v22.2d,v22.2d
68*8fb009dcSAndroid Build Coastguard Worker	pmull	v1.1q,v16.1d,v17.1d
69*8fb009dcSAndroid Build Coastguard Worker	pmull	v6.1q,v17.1d,v17.1d
70*8fb009dcSAndroid Build Coastguard Worker
71*8fb009dcSAndroid Build Coastguard Worker	ext	v16.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
72*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v5.16b,v7.16b,#8
73*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
74*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v16.16b
75*8fb009dcSAndroid Build Coastguard Worker	eor	v4.16b,v5.16b,v7.16b
76*8fb009dcSAndroid Build Coastguard Worker	eor	v6.16b,v6.16b,v17.16b
77*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
78*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase
79*8fb009dcSAndroid Build Coastguard Worker	eor	v6.16b,v6.16b,v4.16b
80*8fb009dcSAndroid Build Coastguard Worker	pmull	v4.1q,v5.1d,v19.1d
81*8fb009dcSAndroid Build Coastguard Worker
82*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
83*8fb009dcSAndroid Build Coastguard Worker	ins	v7.d[0],v6.d[1]
84*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
85*8fb009dcSAndroid Build Coastguard Worker	ins	v6.d[1],v5.d[0]
86*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
87*8fb009dcSAndroid Build Coastguard Worker	eor	v5.16b,v6.16b,v4.16b
88*8fb009dcSAndroid Build Coastguard Worker
89*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
90*8fb009dcSAndroid Build Coastguard Worker	ext	v4.16b,v5.16b,v5.16b,#8
91*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
92*8fb009dcSAndroid Build Coastguard Worker	pmull	v5.1q,v5.1d,v19.1d
93*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v2.16b
94*8fb009dcSAndroid Build Coastguard Worker	eor	v4.16b,v4.16b,v7.16b
95*8fb009dcSAndroid Build Coastguard Worker	eor	v20.16b, v0.16b,v18.16b		//H^3
96*8fb009dcSAndroid Build Coastguard Worker	eor	v22.16b,v5.16b,v4.16b		//H^4
97*8fb009dcSAndroid Build Coastguard Worker
98*8fb009dcSAndroid Build Coastguard Worker	ext	v16.16b,v20.16b, v20.16b,#8		//Karatsuba pre-processing
99*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v22.16b,v22.16b,#8
100*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v16.16b,v20.16b
101*8fb009dcSAndroid Build Coastguard Worker	eor	v17.16b,v17.16b,v22.16b
102*8fb009dcSAndroid Build Coastguard Worker	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
103*8fb009dcSAndroid Build Coastguard Worker	st1	{v20.2d,v21.2d,v22.2d},[x0]		//store Htable[3..5]
104*8fb009dcSAndroid Build Coastguard Worker	ret
105*8fb009dcSAndroid Build Coastguard Worker
106*8fb009dcSAndroid Build Coastguard Worker.globl	gcm_gmult_v8
107*8fb009dcSAndroid Build Coastguard Worker
108*8fb009dcSAndroid Build Coastguard Worker.def gcm_gmult_v8
109*8fb009dcSAndroid Build Coastguard Worker   .type 32
110*8fb009dcSAndroid Build Coastguard Worker.endef
111*8fb009dcSAndroid Build Coastguard Worker.align	4
112*8fb009dcSAndroid Build Coastguard Workergcm_gmult_v8:
113*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
114*8fb009dcSAndroid Build Coastguard Worker	ld1	{v17.2d},[x0]		//load Xi
115*8fb009dcSAndroid Build Coastguard Worker	movi	v19.16b,#0xe1
116*8fb009dcSAndroid Build Coastguard Worker	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
117*8fb009dcSAndroid Build Coastguard Worker	shl	v19.2d,v19.2d,#57
118*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
119*8fb009dcSAndroid Build Coastguard Worker	rev64	v17.16b,v17.16b
120*8fb009dcSAndroid Build Coastguard Worker#endif
121*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v17.16b,v17.16b,#8
122*8fb009dcSAndroid Build Coastguard Worker
123*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
124*8fb009dcSAndroid Build Coastguard Worker	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
125*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v20.2d,v3.2d		//H.hXi.hi
126*8fb009dcSAndroid Build Coastguard Worker	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
127*8fb009dcSAndroid Build Coastguard Worker
128*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
129*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
130*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v17.16b
131*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
132*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
133*8fb009dcSAndroid Build Coastguard Worker
134*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
135*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
136*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
137*8fb009dcSAndroid Build Coastguard Worker
138*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
139*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
140*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v2.16b
141*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v18.16b
142*8fb009dcSAndroid Build Coastguard Worker
143*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
144*8fb009dcSAndroid Build Coastguard Worker	rev64	v0.16b,v0.16b
145*8fb009dcSAndroid Build Coastguard Worker#endif
146*8fb009dcSAndroid Build Coastguard Worker	ext	v0.16b,v0.16b,v0.16b,#8
147*8fb009dcSAndroid Build Coastguard Worker	st1	{v0.2d},[x0]		//write out Xi
148*8fb009dcSAndroid Build Coastguard Worker
149*8fb009dcSAndroid Build Coastguard Worker	ret
150*8fb009dcSAndroid Build Coastguard Worker
151*8fb009dcSAndroid Build Coastguard Worker.globl	gcm_ghash_v8
152*8fb009dcSAndroid Build Coastguard Worker
153*8fb009dcSAndroid Build Coastguard Worker.def gcm_ghash_v8
154*8fb009dcSAndroid Build Coastguard Worker   .type 32
155*8fb009dcSAndroid Build Coastguard Worker.endef
156*8fb009dcSAndroid Build Coastguard Worker.align	4
157*8fb009dcSAndroid Build Coastguard Workergcm_ghash_v8:
158*8fb009dcSAndroid Build Coastguard Worker	AARCH64_VALID_CALL_TARGET
159*8fb009dcSAndroid Build Coastguard Worker	cmp	x3,#64
160*8fb009dcSAndroid Build Coastguard Worker	b.hs	Lgcm_ghash_v8_4x
161*8fb009dcSAndroid Build Coastguard Worker	ld1	{v0.2d},[x0]		//load [rotated] Xi
162*8fb009dcSAndroid Build Coastguard Worker						//"[rotated]" means that
163*8fb009dcSAndroid Build Coastguard Worker						//loaded value would have
164*8fb009dcSAndroid Build Coastguard Worker						//to be rotated in order to
165*8fb009dcSAndroid Build Coastguard Worker						//make it appear as in
166*8fb009dcSAndroid Build Coastguard Worker						//algorithm specification
167*8fb009dcSAndroid Build Coastguard Worker	subs	x3,x3,#32		//see if x3 is 32 or larger
168*8fb009dcSAndroid Build Coastguard Worker	mov	x12,#16		//x12 is used as post-
169*8fb009dcSAndroid Build Coastguard Worker						//increment for input pointer;
170*8fb009dcSAndroid Build Coastguard Worker						//as loop is modulo-scheduled
171*8fb009dcSAndroid Build Coastguard Worker						//x12 is zeroed just in time
172*8fb009dcSAndroid Build Coastguard Worker						//to preclude overstepping
173*8fb009dcSAndroid Build Coastguard Worker						//inp[len], which means that
174*8fb009dcSAndroid Build Coastguard Worker						//last block[s] are actually
175*8fb009dcSAndroid Build Coastguard Worker						//loaded twice, but last
176*8fb009dcSAndroid Build Coastguard Worker						//copy is not processed
177*8fb009dcSAndroid Build Coastguard Worker	ld1	{v20.2d,v21.2d},[x1],#32	//load twisted H, ..., H^2
178*8fb009dcSAndroid Build Coastguard Worker	movi	v19.16b,#0xe1
179*8fb009dcSAndroid Build Coastguard Worker	ld1	{v22.2d},[x1]
180*8fb009dcSAndroid Build Coastguard Worker	csel	x12,xzr,x12,eq			//is it time to zero x12?
181*8fb009dcSAndroid Build Coastguard Worker	ext	v0.16b,v0.16b,v0.16b,#8		//rotate Xi
182*8fb009dcSAndroid Build Coastguard Worker	ld1	{v16.2d},[x2],#16	//load [rotated] I[0]
183*8fb009dcSAndroid Build Coastguard Worker	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
184*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
185*8fb009dcSAndroid Build Coastguard Worker	rev64	v16.16b,v16.16b
186*8fb009dcSAndroid Build Coastguard Worker	rev64	v0.16b,v0.16b
187*8fb009dcSAndroid Build Coastguard Worker#endif
188*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v16.16b,v16.16b,#8		//rotate I[0]
189*8fb009dcSAndroid Build Coastguard Worker	b.lo	Lodd_tail_v8		//x3 was less than 32
190*8fb009dcSAndroid Build Coastguard Worker	ld1	{v17.2d},[x2],x12	//load [rotated] I[1]
191*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
192*8fb009dcSAndroid Build Coastguard Worker	rev64	v17.16b,v17.16b
193*8fb009dcSAndroid Build Coastguard Worker#endif
194*8fb009dcSAndroid Build Coastguard Worker	ext	v7.16b,v17.16b,v17.16b,#8
195*8fb009dcSAndroid Build Coastguard Worker	eor	v3.16b,v3.16b,v0.16b		//I[i]^=Xi
196*8fb009dcSAndroid Build Coastguard Worker	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
197*8fb009dcSAndroid Build Coastguard Worker	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
198*8fb009dcSAndroid Build Coastguard Worker	pmull2	v6.1q,v20.2d,v7.2d
199*8fb009dcSAndroid Build Coastguard Worker	b	Loop_mod2x_v8
200*8fb009dcSAndroid Build Coastguard Worker
201*8fb009dcSAndroid Build Coastguard Worker.align	4
202*8fb009dcSAndroid Build Coastguard WorkerLoop_mod2x_v8:
203*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v3.16b,v3.16b,#8
204*8fb009dcSAndroid Build Coastguard Worker	subs	x3,x3,#32		//is there more data?
205*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v22.1d,v3.1d		//H^2.lo·Xi.lo
206*8fb009dcSAndroid Build Coastguard Worker	csel	x12,xzr,x12,lo			//is it time to zero x12?
207*8fb009dcSAndroid Build Coastguard Worker
208*8fb009dcSAndroid Build Coastguard Worker	pmull	v5.1q,v21.1d,v17.1d
209*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v3.16b		//Karatsuba pre-processing
210*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v22.2d,v3.2d		//H^2.hi·Xi.hi
211*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v4.16b		//accumulate
212*8fb009dcSAndroid Build Coastguard Worker	pmull2	v1.1q,v21.2d,v18.2d		//(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
213*8fb009dcSAndroid Build Coastguard Worker	ld1	{v16.2d},[x2],x12	//load [rotated] I[i+2]
214*8fb009dcSAndroid Build Coastguard Worker
215*8fb009dcSAndroid Build Coastguard Worker	eor	v2.16b,v2.16b,v6.16b
216*8fb009dcSAndroid Build Coastguard Worker	csel	x12,xzr,x12,eq			//is it time to zero x12?
217*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v5.16b
218*8fb009dcSAndroid Build Coastguard Worker
219*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
220*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
221*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v17.16b
222*8fb009dcSAndroid Build Coastguard Worker	ld1	{v17.2d},[x2],x12	//load [rotated] I[i+3]
223*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
224*8fb009dcSAndroid Build Coastguard Worker	rev64	v16.16b,v16.16b
225*8fb009dcSAndroid Build Coastguard Worker#endif
226*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
227*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
228*8fb009dcSAndroid Build Coastguard Worker
229*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
230*8fb009dcSAndroid Build Coastguard Worker	rev64	v17.16b,v17.16b
231*8fb009dcSAndroid Build Coastguard Worker#endif
232*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
233*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
234*8fb009dcSAndroid Build Coastguard Worker	ext	v7.16b,v17.16b,v17.16b,#8
235*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v16.16b,v16.16b,#8
236*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
237*8fb009dcSAndroid Build Coastguard Worker	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
238*8fb009dcSAndroid Build Coastguard Worker	eor	v3.16b,v3.16b,v2.16b		//accumulate v3.16b early
239*8fb009dcSAndroid Build Coastguard Worker
240*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
241*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
242*8fb009dcSAndroid Build Coastguard Worker	eor	v3.16b,v3.16b,v18.16b
243*8fb009dcSAndroid Build Coastguard Worker	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
244*8fb009dcSAndroid Build Coastguard Worker	eor	v3.16b,v3.16b,v0.16b
245*8fb009dcSAndroid Build Coastguard Worker	pmull2	v6.1q,v20.2d,v7.2d
246*8fb009dcSAndroid Build Coastguard Worker	b.hs	Loop_mod2x_v8		//there was at least 32 more bytes
247*8fb009dcSAndroid Build Coastguard Worker
248*8fb009dcSAndroid Build Coastguard Worker	eor	v2.16b,v2.16b,v18.16b
249*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v16.16b,v16.16b,#8		//re-construct v3.16b
250*8fb009dcSAndroid Build Coastguard Worker	adds	x3,x3,#32		//re-construct x3
251*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v2.16b		//re-construct v0.16b
252*8fb009dcSAndroid Build Coastguard Worker	b.eq	Ldone_v8		//is x3 zero?
253*8fb009dcSAndroid Build Coastguard WorkerLodd_tail_v8:
254*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8
255*8fb009dcSAndroid Build Coastguard Worker	eor	v3.16b,v3.16b,v0.16b		//inp^=Xi
256*8fb009dcSAndroid Build Coastguard Worker	eor	v17.16b,v16.16b,v18.16b		//v17.16b is rotated inp^Xi
257*8fb009dcSAndroid Build Coastguard Worker
258*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
259*8fb009dcSAndroid Build Coastguard Worker	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
260*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v20.2d,v3.2d		//H.hXi.hi
261*8fb009dcSAndroid Build Coastguard Worker	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
262*8fb009dcSAndroid Build Coastguard Worker
263*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
264*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
265*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v17.16b
266*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
267*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
268*8fb009dcSAndroid Build Coastguard Worker
269*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
270*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
271*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
272*8fb009dcSAndroid Build Coastguard Worker
273*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
274*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
275*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v2.16b
276*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v18.16b
277*8fb009dcSAndroid Build Coastguard Worker
278*8fb009dcSAndroid Build Coastguard WorkerLdone_v8:
279*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
280*8fb009dcSAndroid Build Coastguard Worker	rev64	v0.16b,v0.16b
281*8fb009dcSAndroid Build Coastguard Worker#endif
282*8fb009dcSAndroid Build Coastguard Worker	ext	v0.16b,v0.16b,v0.16b,#8
283*8fb009dcSAndroid Build Coastguard Worker	st1	{v0.2d},[x0]		//write out Xi
284*8fb009dcSAndroid Build Coastguard Worker
285*8fb009dcSAndroid Build Coastguard Worker	ret
286*8fb009dcSAndroid Build Coastguard Worker
287*8fb009dcSAndroid Build Coastguard Worker.def gcm_ghash_v8_4x
288*8fb009dcSAndroid Build Coastguard Worker   .type 32
289*8fb009dcSAndroid Build Coastguard Worker.endef
290*8fb009dcSAndroid Build Coastguard Worker.align	4
291*8fb009dcSAndroid Build Coastguard Workergcm_ghash_v8_4x:
292*8fb009dcSAndroid Build Coastguard WorkerLgcm_ghash_v8_4x:
293*8fb009dcSAndroid Build Coastguard Worker	ld1	{v0.2d},[x0]		//load [rotated] Xi
294*8fb009dcSAndroid Build Coastguard Worker	ld1	{v20.2d,v21.2d,v22.2d},[x1],#48	//load twisted H, ..., H^2
295*8fb009dcSAndroid Build Coastguard Worker	movi	v19.16b,#0xe1
296*8fb009dcSAndroid Build Coastguard Worker	ld1	{v26.2d,v27.2d,v28.2d},[x1]	//load twisted H^3, ..., H^4
297*8fb009dcSAndroid Build Coastguard Worker	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
298*8fb009dcSAndroid Build Coastguard Worker
299*8fb009dcSAndroid Build Coastguard Worker	ld1	{v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
300*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
301*8fb009dcSAndroid Build Coastguard Worker	rev64	v0.16b,v0.16b
302*8fb009dcSAndroid Build Coastguard Worker	rev64	v5.16b,v5.16b
303*8fb009dcSAndroid Build Coastguard Worker	rev64	v6.16b,v6.16b
304*8fb009dcSAndroid Build Coastguard Worker	rev64	v7.16b,v7.16b
305*8fb009dcSAndroid Build Coastguard Worker	rev64	v4.16b,v4.16b
306*8fb009dcSAndroid Build Coastguard Worker#endif
307*8fb009dcSAndroid Build Coastguard Worker	ext	v25.16b,v7.16b,v7.16b,#8
308*8fb009dcSAndroid Build Coastguard Worker	ext	v24.16b,v6.16b,v6.16b,#8
309*8fb009dcSAndroid Build Coastguard Worker	ext	v23.16b,v5.16b,v5.16b,#8
310*8fb009dcSAndroid Build Coastguard Worker
311*8fb009dcSAndroid Build Coastguard Worker	pmull	v29.1q,v20.1d,v25.1d		//H·Ii+3
312*8fb009dcSAndroid Build Coastguard Worker	eor	v7.16b,v7.16b,v25.16b
313*8fb009dcSAndroid Build Coastguard Worker	pmull2	v31.1q,v20.2d,v25.2d
314*8fb009dcSAndroid Build Coastguard Worker	pmull	v30.1q,v21.1d,v7.1d
315*8fb009dcSAndroid Build Coastguard Worker
316*8fb009dcSAndroid Build Coastguard Worker	pmull	v16.1q,v22.1d,v24.1d		//H^2·Ii+2
317*8fb009dcSAndroid Build Coastguard Worker	eor	v6.16b,v6.16b,v24.16b
318*8fb009dcSAndroid Build Coastguard Worker	pmull2	v24.1q,v22.2d,v24.2d
319*8fb009dcSAndroid Build Coastguard Worker	pmull2	v6.1q,v21.2d,v6.2d
320*8fb009dcSAndroid Build Coastguard Worker
321*8fb009dcSAndroid Build Coastguard Worker	eor	v29.16b,v29.16b,v16.16b
322*8fb009dcSAndroid Build Coastguard Worker	eor	v31.16b,v31.16b,v24.16b
323*8fb009dcSAndroid Build Coastguard Worker	eor	v30.16b,v30.16b,v6.16b
324*8fb009dcSAndroid Build Coastguard Worker
325*8fb009dcSAndroid Build Coastguard Worker	pmull	v7.1q,v26.1d,v23.1d		//H^3·Ii+1
326*8fb009dcSAndroid Build Coastguard Worker	eor	v5.16b,v5.16b,v23.16b
327*8fb009dcSAndroid Build Coastguard Worker	pmull2	v23.1q,v26.2d,v23.2d
328*8fb009dcSAndroid Build Coastguard Worker	pmull	v5.1q,v27.1d,v5.1d
329*8fb009dcSAndroid Build Coastguard Worker
330*8fb009dcSAndroid Build Coastguard Worker	eor	v29.16b,v29.16b,v7.16b
331*8fb009dcSAndroid Build Coastguard Worker	eor	v31.16b,v31.16b,v23.16b
332*8fb009dcSAndroid Build Coastguard Worker	eor	v30.16b,v30.16b,v5.16b
333*8fb009dcSAndroid Build Coastguard Worker
334*8fb009dcSAndroid Build Coastguard Worker	subs	x3,x3,#128
335*8fb009dcSAndroid Build Coastguard Worker	b.lo	Ltail4x
336*8fb009dcSAndroid Build Coastguard Worker
337*8fb009dcSAndroid Build Coastguard Worker	b	Loop4x
338*8fb009dcSAndroid Build Coastguard Worker
339*8fb009dcSAndroid Build Coastguard Worker.align	4
340*8fb009dcSAndroid Build Coastguard WorkerLoop4x:
341*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v4.16b,v0.16b
342*8fb009dcSAndroid Build Coastguard Worker	ld1	{v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
343*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v16.16b,v16.16b,#8
344*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
345*8fb009dcSAndroid Build Coastguard Worker	rev64	v5.16b,v5.16b
346*8fb009dcSAndroid Build Coastguard Worker	rev64	v6.16b,v6.16b
347*8fb009dcSAndroid Build Coastguard Worker	rev64	v7.16b,v7.16b
348*8fb009dcSAndroid Build Coastguard Worker	rev64	v4.16b,v4.16b
349*8fb009dcSAndroid Build Coastguard Worker#endif
350*8fb009dcSAndroid Build Coastguard Worker
351*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v28.1d,v3.1d		//H^4·(Xi+Ii)
352*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v16.16b,v3.16b
353*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v28.2d,v3.2d
354*8fb009dcSAndroid Build Coastguard Worker	ext	v25.16b,v7.16b,v7.16b,#8
355*8fb009dcSAndroid Build Coastguard Worker	pmull2	v1.1q,v27.2d,v16.2d
356*8fb009dcSAndroid Build Coastguard Worker
357*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v29.16b
358*8fb009dcSAndroid Build Coastguard Worker	eor	v2.16b,v2.16b,v31.16b
359*8fb009dcSAndroid Build Coastguard Worker	ext	v24.16b,v6.16b,v6.16b,#8
360*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v30.16b
361*8fb009dcSAndroid Build Coastguard Worker	ext	v23.16b,v5.16b,v5.16b,#8
362*8fb009dcSAndroid Build Coastguard Worker
363*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
364*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
365*8fb009dcSAndroid Build Coastguard Worker	pmull	v29.1q,v20.1d,v25.1d		//H·Ii+3
366*8fb009dcSAndroid Build Coastguard Worker	eor	v7.16b,v7.16b,v25.16b
367*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v17.16b
368*8fb009dcSAndroid Build Coastguard Worker	pmull2	v31.1q,v20.2d,v25.2d
369*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
370*8fb009dcSAndroid Build Coastguard Worker	pmull	v30.1q,v21.1d,v7.1d
371*8fb009dcSAndroid Build Coastguard Worker
372*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
373*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
374*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
375*8fb009dcSAndroid Build Coastguard Worker	pmull	v16.1q,v22.1d,v24.1d		//H^2·Ii+2
376*8fb009dcSAndroid Build Coastguard Worker	eor	v6.16b,v6.16b,v24.16b
377*8fb009dcSAndroid Build Coastguard Worker	pmull2	v24.1q,v22.2d,v24.2d
378*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
379*8fb009dcSAndroid Build Coastguard Worker	pmull2	v6.1q,v21.2d,v6.2d
380*8fb009dcSAndroid Build Coastguard Worker
381*8fb009dcSAndroid Build Coastguard Worker	eor	v29.16b,v29.16b,v16.16b
382*8fb009dcSAndroid Build Coastguard Worker	eor	v31.16b,v31.16b,v24.16b
383*8fb009dcSAndroid Build Coastguard Worker	eor	v30.16b,v30.16b,v6.16b
384*8fb009dcSAndroid Build Coastguard Worker
385*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
386*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
387*8fb009dcSAndroid Build Coastguard Worker	pmull	v7.1q,v26.1d,v23.1d		//H^3·Ii+1
388*8fb009dcSAndroid Build Coastguard Worker	eor	v5.16b,v5.16b,v23.16b
389*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v2.16b
390*8fb009dcSAndroid Build Coastguard Worker	pmull2	v23.1q,v26.2d,v23.2d
391*8fb009dcSAndroid Build Coastguard Worker	pmull	v5.1q,v27.1d,v5.1d
392*8fb009dcSAndroid Build Coastguard Worker
393*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v18.16b
394*8fb009dcSAndroid Build Coastguard Worker	eor	v29.16b,v29.16b,v7.16b
395*8fb009dcSAndroid Build Coastguard Worker	eor	v31.16b,v31.16b,v23.16b
396*8fb009dcSAndroid Build Coastguard Worker	ext	v0.16b,v0.16b,v0.16b,#8
397*8fb009dcSAndroid Build Coastguard Worker	eor	v30.16b,v30.16b,v5.16b
398*8fb009dcSAndroid Build Coastguard Worker
399*8fb009dcSAndroid Build Coastguard Worker	subs	x3,x3,#64
400*8fb009dcSAndroid Build Coastguard Worker	b.hs	Loop4x
401*8fb009dcSAndroid Build Coastguard Worker
402*8fb009dcSAndroid Build Coastguard WorkerLtail4x:
403*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v4.16b,v0.16b
404*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v16.16b,v16.16b,#8
405*8fb009dcSAndroid Build Coastguard Worker
406*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v28.1d,v3.1d		//H^4·(Xi+Ii)
407*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v16.16b,v3.16b
408*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v28.2d,v3.2d
409*8fb009dcSAndroid Build Coastguard Worker	pmull2	v1.1q,v27.2d,v16.2d
410*8fb009dcSAndroid Build Coastguard Worker
411*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v29.16b
412*8fb009dcSAndroid Build Coastguard Worker	eor	v2.16b,v2.16b,v31.16b
413*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v30.16b
414*8fb009dcSAndroid Build Coastguard Worker
415*8fb009dcSAndroid Build Coastguard Worker	adds	x3,x3,#64
416*8fb009dcSAndroid Build Coastguard Worker	b.eq	Ldone4x
417*8fb009dcSAndroid Build Coastguard Worker
418*8fb009dcSAndroid Build Coastguard Worker	cmp	x3,#32
419*8fb009dcSAndroid Build Coastguard Worker	b.lo	Lone
420*8fb009dcSAndroid Build Coastguard Worker	b.eq	Ltwo
421*8fb009dcSAndroid Build Coastguard WorkerLthree:
422*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
423*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
424*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v17.16b
425*8fb009dcSAndroid Build Coastguard Worker	ld1	{v4.2d,v5.2d,v6.2d},[x2]
426*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
427*8fb009dcSAndroid Build Coastguard Worker#ifndef	__AARCH64EB__
428*8fb009dcSAndroid Build Coastguard Worker	rev64	v5.16b,v5.16b
429*8fb009dcSAndroid Build Coastguard Worker	rev64	v6.16b,v6.16b
430*8fb009dcSAndroid Build Coastguard Worker	rev64	v4.16b,v4.16b
431*8fb009dcSAndroid Build Coastguard Worker#endif
432*8fb009dcSAndroid Build Coastguard Worker
433*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
434*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
435*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
436*8fb009dcSAndroid Build Coastguard Worker	ext	v24.16b,v6.16b,v6.16b,#8
437*8fb009dcSAndroid Build Coastguard Worker	ext	v23.16b,v5.16b,v5.16b,#8
438*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
439*8fb009dcSAndroid Build Coastguard Worker
440*8fb009dcSAndroid Build Coastguard Worker	pmull	v29.1q,v20.1d,v24.1d		//H·Ii+2
441*8fb009dcSAndroid Build Coastguard Worker	eor	v6.16b,v6.16b,v24.16b
442*8fb009dcSAndroid Build Coastguard Worker
443*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
444*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
445*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v2.16b
446*8fb009dcSAndroid Build Coastguard Worker	pmull2	v31.1q,v20.2d,v24.2d
447*8fb009dcSAndroid Build Coastguard Worker	pmull	v30.1q,v21.1d,v6.1d
448*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v18.16b
449*8fb009dcSAndroid Build Coastguard Worker	pmull	v7.1q,v22.1d,v23.1d		//H^2·Ii+1
450*8fb009dcSAndroid Build Coastguard Worker	eor	v5.16b,v5.16b,v23.16b
451*8fb009dcSAndroid Build Coastguard Worker	ext	v0.16b,v0.16b,v0.16b,#8
452*8fb009dcSAndroid Build Coastguard Worker
453*8fb009dcSAndroid Build Coastguard Worker	pmull2	v23.1q,v22.2d,v23.2d
454*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v4.16b,v0.16b
455*8fb009dcSAndroid Build Coastguard Worker	pmull2	v5.1q,v21.2d,v5.2d
456*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v16.16b,v16.16b,#8
457*8fb009dcSAndroid Build Coastguard Worker
458*8fb009dcSAndroid Build Coastguard Worker	eor	v29.16b,v29.16b,v7.16b
459*8fb009dcSAndroid Build Coastguard Worker	eor	v31.16b,v31.16b,v23.16b
460*8fb009dcSAndroid Build Coastguard Worker	eor	v30.16b,v30.16b,v5.16b
461*8fb009dcSAndroid Build Coastguard Worker
462*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v26.1d,v3.1d		//H^3·(Xi+Ii)
463*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v16.16b,v3.16b
464*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v26.2d,v3.2d
465*8fb009dcSAndroid Build Coastguard Worker	pmull	v1.1q,v27.1d,v16.1d
466*8fb009dcSAndroid Build Coastguard Worker
467*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v29.16b
468*8fb009dcSAndroid Build Coastguard Worker	eor	v2.16b,v2.16b,v31.16b
469*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v30.16b
470*8fb009dcSAndroid Build Coastguard Worker	b	Ldone4x
471*8fb009dcSAndroid Build Coastguard Worker
472*8fb009dcSAndroid Build Coastguard Worker.align	4
473*8fb009dcSAndroid Build Coastguard WorkerLtwo:
474*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
475*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
476*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v17.16b
477*8fb009dcSAndroid Build Coastguard Worker	ld1	{v4.2d,v5.2d},[x2]
478*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
479*8fb009dcSAndroid Build Coastguard Worker#ifndef	__AARCH64EB__
480*8fb009dcSAndroid Build Coastguard Worker	rev64	v5.16b,v5.16b
481*8fb009dcSAndroid Build Coastguard Worker	rev64	v4.16b,v4.16b
482*8fb009dcSAndroid Build Coastguard Worker#endif
483*8fb009dcSAndroid Build Coastguard Worker
484*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
485*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
486*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
487*8fb009dcSAndroid Build Coastguard Worker	ext	v23.16b,v5.16b,v5.16b,#8
488*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
489*8fb009dcSAndroid Build Coastguard Worker
490*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
491*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
492*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v2.16b
493*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v18.16b
494*8fb009dcSAndroid Build Coastguard Worker	ext	v0.16b,v0.16b,v0.16b,#8
495*8fb009dcSAndroid Build Coastguard Worker
496*8fb009dcSAndroid Build Coastguard Worker	pmull	v29.1q,v20.1d,v23.1d		//H·Ii+1
497*8fb009dcSAndroid Build Coastguard Worker	eor	v5.16b,v5.16b,v23.16b
498*8fb009dcSAndroid Build Coastguard Worker
499*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v4.16b,v0.16b
500*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v16.16b,v16.16b,#8
501*8fb009dcSAndroid Build Coastguard Worker
502*8fb009dcSAndroid Build Coastguard Worker	pmull2	v31.1q,v20.2d,v23.2d
503*8fb009dcSAndroid Build Coastguard Worker	pmull	v30.1q,v21.1d,v5.1d
504*8fb009dcSAndroid Build Coastguard Worker
505*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v22.1d,v3.1d		//H^2·(Xi+Ii)
506*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v16.16b,v3.16b
507*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v22.2d,v3.2d
508*8fb009dcSAndroid Build Coastguard Worker	pmull2	v1.1q,v21.2d,v16.2d
509*8fb009dcSAndroid Build Coastguard Worker
510*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v29.16b
511*8fb009dcSAndroid Build Coastguard Worker	eor	v2.16b,v2.16b,v31.16b
512*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v30.16b
513*8fb009dcSAndroid Build Coastguard Worker	b	Ldone4x
514*8fb009dcSAndroid Build Coastguard Worker
515*8fb009dcSAndroid Build Coastguard Worker.align	4
516*8fb009dcSAndroid Build Coastguard WorkerLone:
517*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
518*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
519*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v17.16b
520*8fb009dcSAndroid Build Coastguard Worker	ld1	{v4.2d},[x2]
521*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
522*8fb009dcSAndroid Build Coastguard Worker#ifndef	__AARCH64EB__
523*8fb009dcSAndroid Build Coastguard Worker	rev64	v4.16b,v4.16b
524*8fb009dcSAndroid Build Coastguard Worker#endif
525*8fb009dcSAndroid Build Coastguard Worker
526*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
527*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
528*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
529*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
530*8fb009dcSAndroid Build Coastguard Worker
531*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
532*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
533*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v2.16b
534*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v18.16b
535*8fb009dcSAndroid Build Coastguard Worker	ext	v0.16b,v0.16b,v0.16b,#8
536*8fb009dcSAndroid Build Coastguard Worker
537*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v4.16b,v0.16b
538*8fb009dcSAndroid Build Coastguard Worker	ext	v3.16b,v16.16b,v16.16b,#8
539*8fb009dcSAndroid Build Coastguard Worker
540*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v20.1d,v3.1d
541*8fb009dcSAndroid Build Coastguard Worker	eor	v16.16b,v16.16b,v3.16b
542*8fb009dcSAndroid Build Coastguard Worker	pmull2	v2.1q,v20.2d,v3.2d
543*8fb009dcSAndroid Build Coastguard Worker	pmull	v1.1q,v21.1d,v16.1d
544*8fb009dcSAndroid Build Coastguard Worker
545*8fb009dcSAndroid Build Coastguard WorkerLdone4x:
546*8fb009dcSAndroid Build Coastguard Worker	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
547*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v0.16b,v2.16b
548*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v17.16b
549*8fb009dcSAndroid Build Coastguard Worker	eor	v1.16b,v1.16b,v18.16b
550*8fb009dcSAndroid Build Coastguard Worker
551*8fb009dcSAndroid Build Coastguard Worker	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
552*8fb009dcSAndroid Build Coastguard Worker	ins	v2.d[0],v1.d[1]
553*8fb009dcSAndroid Build Coastguard Worker	ins	v1.d[1],v0.d[0]
554*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v1.16b,v18.16b
555*8fb009dcSAndroid Build Coastguard Worker
556*8fb009dcSAndroid Build Coastguard Worker	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
557*8fb009dcSAndroid Build Coastguard Worker	pmull	v0.1q,v0.1d,v19.1d
558*8fb009dcSAndroid Build Coastguard Worker	eor	v18.16b,v18.16b,v2.16b
559*8fb009dcSAndroid Build Coastguard Worker	eor	v0.16b,v0.16b,v18.16b
560*8fb009dcSAndroid Build Coastguard Worker	ext	v0.16b,v0.16b,v0.16b,#8
561*8fb009dcSAndroid Build Coastguard Worker
562*8fb009dcSAndroid Build Coastguard Worker#ifndef __AARCH64EB__
563*8fb009dcSAndroid Build Coastguard Worker	rev64	v0.16b,v0.16b
564*8fb009dcSAndroid Build Coastguard Worker#endif
565*8fb009dcSAndroid Build Coastguard Worker	st1	{v0.2d},[x0]		//write out Xi
566*8fb009dcSAndroid Build Coastguard Worker
567*8fb009dcSAndroid Build Coastguard Worker	ret
568*8fb009dcSAndroid Build Coastguard Worker
569*8fb009dcSAndroid Build Coastguard Worker.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
570*8fb009dcSAndroid Build Coastguard Worker.align	2
571*8fb009dcSAndroid Build Coastguard Worker.align	2
572*8fb009dcSAndroid Build Coastguard Worker#endif
573*8fb009dcSAndroid Build Coastguard Worker#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32)
574