xref: /aosp_15_r20/bionic/libc/arch-x86_64/string/sse2-memmove-slm.S (revision 8d67ca893c1523eb926b9080dbe4e2ffd2a27ba1)
1*8d67ca89SAndroid Build Coastguard Worker/*
2*8d67ca89SAndroid Build Coastguard WorkerCopyright (c) 2014, Intel Corporation
3*8d67ca89SAndroid Build Coastguard WorkerAll rights reserved.
4*8d67ca89SAndroid Build Coastguard Worker
5*8d67ca89SAndroid Build Coastguard WorkerRedistribution and use in source and binary forms, with or without
6*8d67ca89SAndroid Build Coastguard Workermodification, are permitted provided that the following conditions are met:
7*8d67ca89SAndroid Build Coastguard Worker
8*8d67ca89SAndroid Build Coastguard Worker    * Redistributions of source code must retain the above copyright notice,
9*8d67ca89SAndroid Build Coastguard Worker    * this list of conditions and the following disclaimer.
10*8d67ca89SAndroid Build Coastguard Worker
11*8d67ca89SAndroid Build Coastguard Worker    * Redistributions in binary form must reproduce the above copyright notice,
12*8d67ca89SAndroid Build Coastguard Worker    * this list of conditions and the following disclaimer in the documentation
13*8d67ca89SAndroid Build Coastguard Worker    * and/or other materials provided with the distribution.
14*8d67ca89SAndroid Build Coastguard Worker
15*8d67ca89SAndroid Build Coastguard Worker    * Neither the name of Intel Corporation nor the names of its contributors
16*8d67ca89SAndroid Build Coastguard Worker    * may be used to endorse or promote products derived from this software
17*8d67ca89SAndroid Build Coastguard Worker    * without specific prior written permission.
18*8d67ca89SAndroid Build Coastguard Worker
19*8d67ca89SAndroid Build Coastguard WorkerTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20*8d67ca89SAndroid Build Coastguard WorkerANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21*8d67ca89SAndroid Build Coastguard WorkerWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22*8d67ca89SAndroid Build Coastguard WorkerDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23*8d67ca89SAndroid Build Coastguard WorkerANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24*8d67ca89SAndroid Build Coastguard Worker(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25*8d67ca89SAndroid Build Coastguard WorkerLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26*8d67ca89SAndroid Build Coastguard WorkerANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27*8d67ca89SAndroid Build Coastguard Worker(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28*8d67ca89SAndroid Build Coastguard WorkerSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*8d67ca89SAndroid Build Coastguard Worker*/
30*8d67ca89SAndroid Build Coastguard Worker
31*8d67ca89SAndroid Build Coastguard Worker
32*8d67ca89SAndroid Build Coastguard Worker#ifndef MEMMOVE
33*8d67ca89SAndroid Build Coastguard Worker# define MEMMOVE		memmove
34*8d67ca89SAndroid Build Coastguard Worker#endif
35*8d67ca89SAndroid Build Coastguard Worker
36*8d67ca89SAndroid Build Coastguard Worker#ifndef L
37*8d67ca89SAndroid Build Coastguard Worker# define L(label)	.L##label
38*8d67ca89SAndroid Build Coastguard Worker#endif
39*8d67ca89SAndroid Build Coastguard Worker
40*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_startproc
41*8d67ca89SAndroid Build Coastguard Worker# define cfi_startproc	.cfi_startproc
42*8d67ca89SAndroid Build Coastguard Worker#endif
43*8d67ca89SAndroid Build Coastguard Worker
44*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_endproc
45*8d67ca89SAndroid Build Coastguard Worker# define cfi_endproc	.cfi_endproc
46*8d67ca89SAndroid Build Coastguard Worker#endif
47*8d67ca89SAndroid Build Coastguard Worker
48*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_rel_offset
49*8d67ca89SAndroid Build Coastguard Worker# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
50*8d67ca89SAndroid Build Coastguard Worker#endif
51*8d67ca89SAndroid Build Coastguard Worker
52*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_restore
53*8d67ca89SAndroid Build Coastguard Worker# define cfi_restore(reg)	.cfi_restore reg
54*8d67ca89SAndroid Build Coastguard Worker#endif
55*8d67ca89SAndroid Build Coastguard Worker
56*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_adjust_cfa_offset
57*8d67ca89SAndroid Build Coastguard Worker# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
58*8d67ca89SAndroid Build Coastguard Worker#endif
59*8d67ca89SAndroid Build Coastguard Worker
60*8d67ca89SAndroid Build Coastguard Worker#ifndef ENTRY
61*8d67ca89SAndroid Build Coastguard Worker# define ENTRY(name)		\
62*8d67ca89SAndroid Build Coastguard Worker	.type name,  @function;		\
63*8d67ca89SAndroid Build Coastguard Worker	.globl name;		\
64*8d67ca89SAndroid Build Coastguard Worker	.p2align 4;		\
65*8d67ca89SAndroid Build Coastguard Workername:		\
66*8d67ca89SAndroid Build Coastguard Worker	cfi_startproc
67*8d67ca89SAndroid Build Coastguard Worker#endif
68*8d67ca89SAndroid Build Coastguard Worker
69*8d67ca89SAndroid Build Coastguard Worker#ifndef ALIAS_SYMBOL
70*8d67ca89SAndroid Build Coastguard Worker# define ALIAS_SYMBOL(alias, original) \
71*8d67ca89SAndroid Build Coastguard Worker	.globl alias; \
72*8d67ca89SAndroid Build Coastguard Worker	.equ alias, original
73*8d67ca89SAndroid Build Coastguard Worker#endif
74*8d67ca89SAndroid Build Coastguard Worker
75*8d67ca89SAndroid Build Coastguard Worker#ifndef END
76*8d67ca89SAndroid Build Coastguard Worker# define END(name)		\
77*8d67ca89SAndroid Build Coastguard Worker	cfi_endproc;		\
78*8d67ca89SAndroid Build Coastguard Worker	.size name, .-name
79*8d67ca89SAndroid Build Coastguard Worker#endif
80*8d67ca89SAndroid Build Coastguard Worker
81*8d67ca89SAndroid Build Coastguard Worker#define CFI_PUSH(REG)		\
82*8d67ca89SAndroid Build Coastguard Worker	cfi_adjust_cfa_offset (4);		\
83*8d67ca89SAndroid Build Coastguard Worker	cfi_rel_offset (REG, 0)
84*8d67ca89SAndroid Build Coastguard Worker
85*8d67ca89SAndroid Build Coastguard Worker#define CFI_POP(REG)		\
86*8d67ca89SAndroid Build Coastguard Worker	cfi_adjust_cfa_offset (-4);		\
87*8d67ca89SAndroid Build Coastguard Worker	cfi_restore (REG)
88*8d67ca89SAndroid Build Coastguard Worker
89*8d67ca89SAndroid Build Coastguard Worker#define PUSH(REG)	push REG;
90*8d67ca89SAndroid Build Coastguard Worker#define POP(REG)	pop REG;
91*8d67ca89SAndroid Build Coastguard Worker
92*8d67ca89SAndroid Build Coastguard Worker#define ENTRANCE	PUSH (%rbx);
93*8d67ca89SAndroid Build Coastguard Worker#define RETURN_END	POP (%rbx); ret
94*8d67ca89SAndroid Build Coastguard Worker#define RETURN		RETURN_END;
95*8d67ca89SAndroid Build Coastguard Worker
96*8d67ca89SAndroid Build Coastguard Worker	.section .text.sse2,"ax",@progbits
97*8d67ca89SAndroid Build Coastguard WorkerENTRY (MEMMOVE)
98*8d67ca89SAndroid Build Coastguard Worker	ENTRANCE
99*8d67ca89SAndroid Build Coastguard Worker	mov	%rdi, %rax
100*8d67ca89SAndroid Build Coastguard Worker
101*8d67ca89SAndroid Build Coastguard Worker/* Check whether we should copy backward or forward.  */
102*8d67ca89SAndroid Build Coastguard Worker	cmp	%rsi, %rdi
103*8d67ca89SAndroid Build Coastguard Worker	je	L(mm_return)
104*8d67ca89SAndroid Build Coastguard Worker	jg	L(mm_len_0_or_more_backward)
105*8d67ca89SAndroid Build Coastguard Worker
106*8d67ca89SAndroid Build Coastguard Worker/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
107*8d67ca89SAndroid Build Coastguard Worker	separately.  */
108*8d67ca89SAndroid Build Coastguard Worker	cmp	$16, %rdx
109*8d67ca89SAndroid Build Coastguard Worker	jbe	L(mm_len_0_16_bytes_forward)
110*8d67ca89SAndroid Build Coastguard Worker
111*8d67ca89SAndroid Build Coastguard Worker	cmp	$32, %rdx
112*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_len_32_or_more_forward)
113*8d67ca89SAndroid Build Coastguard Worker
114*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..32] and return.  */
115*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%rsi), %xmm0
116*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%rsi, %rdx), %xmm1
117*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%rdi)
118*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, -16(%rdi, %rdx)
119*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
120*8d67ca89SAndroid Build Coastguard Worker
121*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_32_or_more_forward):
122*8d67ca89SAndroid Build Coastguard Worker	cmp	$64, %rdx
123*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_len_64_or_more_forward)
124*8d67ca89SAndroid Build Coastguard Worker
125*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..64] and return.  */
126*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%rsi), %xmm0
127*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%rsi), %xmm1
128*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%rsi, %rdx), %xmm2
129*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%rsi, %rdx), %xmm3
130*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%rdi)
131*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%rdi)
132*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, -16(%rdi, %rdx)
133*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, -32(%rdi, %rdx)
134*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
135*8d67ca89SAndroid Build Coastguard Worker
136*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_64_or_more_forward):
137*8d67ca89SAndroid Build Coastguard Worker	cmp	$128, %rdx
138*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_len_128_or_more_forward)
139*8d67ca89SAndroid Build Coastguard Worker
140*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..128] and return.  */
141*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%rsi), %xmm0
142*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%rsi), %xmm1
143*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%rsi), %xmm2
144*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%rsi), %xmm3
145*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%rsi, %rdx), %xmm4
146*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%rsi, %rdx), %xmm5
147*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%rsi, %rdx), %xmm6
148*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%rsi, %rdx), %xmm7
149*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%rdi)
150*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%rdi)
151*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, 32(%rdi)
152*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, 48(%rdi)
153*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm4, -64(%rdi, %rdx)
154*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm5, -48(%rdi, %rdx)
155*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm6, -32(%rdi, %rdx)
156*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm7, -16(%rdi, %rdx)
157*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
158*8d67ca89SAndroid Build Coastguard Worker
159*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_128_or_more_forward):
160*8d67ca89SAndroid Build Coastguard Worker/* Aligning the address of destination.  */
161*8d67ca89SAndroid Build Coastguard Worker/*  save first unaligned 64 bytes */
162*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%rsi), %xmm0
163*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%rsi), %xmm1
164*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%rsi), %xmm2
165*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%rsi), %xmm3
166*8d67ca89SAndroid Build Coastguard Worker
167*8d67ca89SAndroid Build Coastguard Worker	lea	64(%rdi), %r8
168*8d67ca89SAndroid Build Coastguard Worker	and	$-64, %r8  /* r8 now aligned to next 64 byte boundary */
169*8d67ca89SAndroid Build Coastguard Worker	sub	%rdi, %rsi /* rsi = src - dst = diff */
170*8d67ca89SAndroid Build Coastguard Worker
171*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%r8, %rsi), %xmm4
172*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%r8, %rsi), %xmm5
173*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%r8, %rsi), %xmm6
174*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%r8, %rsi), %xmm7
175*8d67ca89SAndroid Build Coastguard Worker
176*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%rdi)
177*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%rdi)
178*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, 32(%rdi)
179*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, 48(%rdi)
180*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, (%r8)
181*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm5, 16(%r8)
182*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm6, 32(%r8)
183*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm7, 48(%r8)
184*8d67ca89SAndroid Build Coastguard Worker	add	$64, %r8
185*8d67ca89SAndroid Build Coastguard Worker
186*8d67ca89SAndroid Build Coastguard Worker	lea	(%rdi, %rdx), %rbx
187*8d67ca89SAndroid Build Coastguard Worker	and	$-64, %rbx
188*8d67ca89SAndroid Build Coastguard Worker	cmp	%r8, %rbx
189*8d67ca89SAndroid Build Coastguard Worker	jbe	L(mm_copy_remaining_forward)
190*8d67ca89SAndroid Build Coastguard Worker
191*8d67ca89SAndroid Build Coastguard Worker	cmp	__x86_shared_cache_size_half(%rip), %rdx
192*8d67ca89SAndroid Build Coastguard Worker
193*8d67ca89SAndroid Build Coastguard Worker	ja      L(mm_overlapping_check_forward)
194*8d67ca89SAndroid Build Coastguard Worker
195*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
196*8d67ca89SAndroid Build Coastguard WorkerL(mm_main_loop_forward):
197*8d67ca89SAndroid Build Coastguard Worker
198*8d67ca89SAndroid Build Coastguard Worker	prefetcht0 128(%r8, %rsi)
199*8d67ca89SAndroid Build Coastguard Worker
200*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%r8, %rsi), %xmm0
201*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%r8, %rsi), %xmm1
202*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%r8, %rsi), %xmm2
203*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%r8, %rsi), %xmm3
204*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm0, (%r8)
205*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm1, 16(%r8)
206*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm2, 32(%r8)
207*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm3, 48(%r8)
208*8d67ca89SAndroid Build Coastguard Worker	lea	64(%r8), %r8
209*8d67ca89SAndroid Build Coastguard Worker	cmp	%r8, %rbx
210*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_main_loop_forward)
211*8d67ca89SAndroid Build Coastguard Worker
212*8d67ca89SAndroid Build Coastguard WorkerL(mm_copy_remaining_forward):
213*8d67ca89SAndroid Build Coastguard Worker	add	%rdi, %rdx
214*8d67ca89SAndroid Build Coastguard Worker	sub	%r8, %rdx
215*8d67ca89SAndroid Build Coastguard Worker/* We copied all up till %rdi position in the dst.
216*8d67ca89SAndroid Build Coastguard Worker	In %rdx now is how many bytes are left to copy.
217*8d67ca89SAndroid Build Coastguard Worker	Now we need to advance %r8. */
218*8d67ca89SAndroid Build Coastguard Worker	lea	(%r8, %rsi), %r9
219*8d67ca89SAndroid Build Coastguard Worker
220*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_0_64_bytes_forward):
221*8d67ca89SAndroid Build Coastguard Worker	cmp	$32, %rdx
222*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_33_64_bytes_forward)
223*8d67ca89SAndroid Build Coastguard Worker	cmp	$16, %rdx
224*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_17_32_bytes_forward)
225*8d67ca89SAndroid Build Coastguard Worker	test	%rdx, %rdx
226*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,2
227*8d67ca89SAndroid Build Coastguard Worker	je	L(mm_return)
228*8d67ca89SAndroid Build Coastguard Worker
229*8d67ca89SAndroid Build Coastguard Worker	cmpb	$8, %dl
230*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_9_16_bytes_forward)
231*8d67ca89SAndroid Build Coastguard Worker	cmpb	$4, %dl
232*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,5
233*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_5_8_bytes_forward)
234*8d67ca89SAndroid Build Coastguard Worker	cmpb	$2, %dl
235*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,1
236*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_3_4_bytes_forward)
237*8d67ca89SAndroid Build Coastguard Worker	movzbl	-1(%r9,%rdx), %esi
238*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%r9), %ebx
239*8d67ca89SAndroid Build Coastguard Worker	movb	%sil, -1(%r8,%rdx)
240*8d67ca89SAndroid Build Coastguard Worker	movb	%bl, (%r8)
241*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
242*8d67ca89SAndroid Build Coastguard Worker
243*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_33_64_bytes_forward):
244*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%r9), %xmm0
245*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%r9), %xmm1
246*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%r9, %rdx), %xmm2
247*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%r9, %rdx), %xmm3
248*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%r8)
249*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%r8)
250*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, -32(%r8, %rdx)
251*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, -16(%r8, %rdx)
252*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
253*8d67ca89SAndroid Build Coastguard Worker
254*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_17_32_bytes_forward):
255*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%r9), %xmm0
256*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%r9, %rdx), %xmm1
257*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%r8)
258*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, -16(%r8, %rdx)
259*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
260*8d67ca89SAndroid Build Coastguard Worker
261*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_5_8_bytes_forward):
262*8d67ca89SAndroid Build Coastguard Worker	movl	(%r9), %esi
263*8d67ca89SAndroid Build Coastguard Worker	movl	-4(%r9,%rdx), %ebx
264*8d67ca89SAndroid Build Coastguard Worker	movl	%esi, (%r8)
265*8d67ca89SAndroid Build Coastguard Worker	movl	%ebx, -4(%r8,%rdx)
266*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
267*8d67ca89SAndroid Build Coastguard Worker
268*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_9_16_bytes_forward):
269*8d67ca89SAndroid Build Coastguard Worker	mov	(%r9), %rsi
270*8d67ca89SAndroid Build Coastguard Worker	mov	-8(%r9, %rdx), %rbx
271*8d67ca89SAndroid Build Coastguard Worker	mov	%rsi, (%r8)
272*8d67ca89SAndroid Build Coastguard Worker	mov	%rbx, -8(%r8, %rdx)
273*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
274*8d67ca89SAndroid Build Coastguard Worker
275*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_3_4_bytes_forward):
276*8d67ca89SAndroid Build Coastguard Worker	movzwl	-2(%r9,%rdx), %esi
277*8d67ca89SAndroid Build Coastguard Worker	movzwl	(%r9), %ebx
278*8d67ca89SAndroid Build Coastguard Worker	movw	%si, -2(%r8,%rdx)
279*8d67ca89SAndroid Build Coastguard Worker	movw	%bx, (%r8)
280*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
281*8d67ca89SAndroid Build Coastguard Worker
282*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_0_16_bytes_forward):
283*8d67ca89SAndroid Build Coastguard Worker	testb	$24, %dl
284*8d67ca89SAndroid Build Coastguard Worker	jne	L(mm_len_9_16_bytes_forward)
285*8d67ca89SAndroid Build Coastguard Worker	testb	$4, %dl
286*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,5
287*8d67ca89SAndroid Build Coastguard Worker	jne	L(mm_len_5_8_bytes_forward)
288*8d67ca89SAndroid Build Coastguard Worker	test	%rdx, %rdx
289*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,2
290*8d67ca89SAndroid Build Coastguard Worker	je	L(mm_return)
291*8d67ca89SAndroid Build Coastguard Worker	testb	$2, %dl
292*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,1
293*8d67ca89SAndroid Build Coastguard Worker	jne	L(mm_len_2_4_bytes_forward)
294*8d67ca89SAndroid Build Coastguard Worker	movzbl	-1(%rsi,%rdx), %ebx
295*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%rsi), %esi
296*8d67ca89SAndroid Build Coastguard Worker	movb	%bl, -1(%rdi,%rdx)
297*8d67ca89SAndroid Build Coastguard Worker	movb	%sil, (%rdi)
298*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
299*8d67ca89SAndroid Build Coastguard Worker
300*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_2_4_bytes_forward):
301*8d67ca89SAndroid Build Coastguard Worker	movzwl	-2(%rsi,%rdx), %ebx
302*8d67ca89SAndroid Build Coastguard Worker	movzwl	(%rsi), %esi
303*8d67ca89SAndroid Build Coastguard Worker	movw	%bx, -2(%rdi,%rdx)
304*8d67ca89SAndroid Build Coastguard Worker	movw	%si, (%rdi)
305*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
306*8d67ca89SAndroid Build Coastguard Worker
307*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_5_8_bytes_forward):
308*8d67ca89SAndroid Build Coastguard Worker	movl	(%rsi), %ebx
309*8d67ca89SAndroid Build Coastguard Worker	movl	-4(%rsi,%rdx), %esi
310*8d67ca89SAndroid Build Coastguard Worker	movl	%ebx, (%rdi)
311*8d67ca89SAndroid Build Coastguard Worker	movl	%esi, -4(%rdi,%rdx)
312*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
313*8d67ca89SAndroid Build Coastguard Worker
314*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_9_16_bytes_forward):
315*8d67ca89SAndroid Build Coastguard Worker	mov	(%rsi), %rbx
316*8d67ca89SAndroid Build Coastguard Worker	mov	-8(%rsi, %rdx), %rsi
317*8d67ca89SAndroid Build Coastguard Worker	mov	%rbx, (%rdi)
318*8d67ca89SAndroid Build Coastguard Worker	mov	%rsi, -8(%rdi, %rdx)
319*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
320*8d67ca89SAndroid Build Coastguard Worker
321*8d67ca89SAndroid Build Coastguard WorkerL(mm_recalc_len):
322*8d67ca89SAndroid Build Coastguard Worker/* Compute in %rdx how many bytes are left to copy after
323*8d67ca89SAndroid Build Coastguard Worker	the main loop stops.  */
324*8d67ca89SAndroid Build Coastguard Worker	mov 	%rbx, %rdx
325*8d67ca89SAndroid Build Coastguard Worker	sub 	%rdi, %rdx
326*8d67ca89SAndroid Build Coastguard Worker/* The code for copying backwards.  */
327*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_0_or_more_backward):
328*8d67ca89SAndroid Build Coastguard Worker
329*8d67ca89SAndroid Build Coastguard Worker/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
330*8d67ca89SAndroid Build Coastguard Worker	separately.  */
331*8d67ca89SAndroid Build Coastguard Worker	cmp	$16, %rdx
332*8d67ca89SAndroid Build Coastguard Worker	jbe	L(mm_len_0_16_bytes_backward)
333*8d67ca89SAndroid Build Coastguard Worker
334*8d67ca89SAndroid Build Coastguard Worker	cmp	$32, %rdx
335*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_len_32_or_more_backward)
336*8d67ca89SAndroid Build Coastguard Worker
337*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..32] and return.  */
338*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%rsi), %xmm0
339*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%rsi, %rdx), %xmm1
340*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%rdi)
341*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, -16(%rdi, %rdx)
342*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
343*8d67ca89SAndroid Build Coastguard Worker
344*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_32_or_more_backward):
345*8d67ca89SAndroid Build Coastguard Worker	cmp	$64, %rdx
346*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_len_64_or_more_backward)
347*8d67ca89SAndroid Build Coastguard Worker
348*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..64] and return.  */
349*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%rsi), %xmm0
350*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%rsi), %xmm1
351*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%rsi, %rdx), %xmm2
352*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%rsi, %rdx), %xmm3
353*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%rdi)
354*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%rdi)
355*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, -16(%rdi, %rdx)
356*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, -32(%rdi, %rdx)
357*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
358*8d67ca89SAndroid Build Coastguard Worker
359*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_64_or_more_backward):
360*8d67ca89SAndroid Build Coastguard Worker	cmp	$128, %rdx
361*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_len_128_or_more_backward)
362*8d67ca89SAndroid Build Coastguard Worker
363*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..128] and return.  */
364*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%rsi), %xmm0
365*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%rsi), %xmm1
366*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%rsi), %xmm2
367*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%rsi), %xmm3
368*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%rsi, %rdx), %xmm4
369*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%rsi, %rdx), %xmm5
370*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%rsi, %rdx), %xmm6
371*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%rsi, %rdx), %xmm7
372*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%rdi)
373*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%rdi)
374*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, 32(%rdi)
375*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, 48(%rdi)
376*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm4, -64(%rdi, %rdx)
377*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm5, -48(%rdi, %rdx)
378*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm6, -32(%rdi, %rdx)
379*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm7, -16(%rdi, %rdx)
380*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
381*8d67ca89SAndroid Build Coastguard Worker
382*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_128_or_more_backward):
383*8d67ca89SAndroid Build Coastguard Worker/* Aligning the address of destination. We need to save
384*8d67ca89SAndroid Build Coastguard Worker	16 bits from the source in order not to overwrite them.  */
385*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%rsi, %rdx), %xmm0
386*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%rsi, %rdx), %xmm1
387*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%rsi, %rdx), %xmm2
388*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%rsi, %rdx), %xmm3
389*8d67ca89SAndroid Build Coastguard Worker
390*8d67ca89SAndroid Build Coastguard Worker	lea	(%rdi, %rdx), %r9
391*8d67ca89SAndroid Build Coastguard Worker	and	$-64, %r9 /* r9 = aligned dst */
392*8d67ca89SAndroid Build Coastguard Worker
393*8d67ca89SAndroid Build Coastguard Worker	mov	%rsi, %r8
394*8d67ca89SAndroid Build Coastguard Worker	sub	%rdi, %r8 /* r8 = src - dst, diff */
395*8d67ca89SAndroid Build Coastguard Worker
396*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%r9, %r8), %xmm4
397*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%r9, %r8), %xmm5
398*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%r9, %r8), %xmm6
399*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%r9, %r8), %xmm7
400*8d67ca89SAndroid Build Coastguard Worker
401*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, -16(%rdi, %rdx)
402*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, -32(%rdi, %rdx)
403*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, -48(%rdi, %rdx)
404*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, -64(%rdi, %rdx)
405*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, -16(%r9)
406*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm5, -32(%r9)
407*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm6, -48(%r9)
408*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm7, -64(%r9)
409*8d67ca89SAndroid Build Coastguard Worker	lea	-64(%r9), %r9
410*8d67ca89SAndroid Build Coastguard Worker
411*8d67ca89SAndroid Build Coastguard Worker	lea	64(%rdi), %rbx
412*8d67ca89SAndroid Build Coastguard Worker	and	$-64, %rbx
413*8d67ca89SAndroid Build Coastguard Worker
414*8d67ca89SAndroid Build Coastguard Worker	cmp	%r9, %rbx
415*8d67ca89SAndroid Build Coastguard Worker	jae	L(mm_recalc_len)
416*8d67ca89SAndroid Build Coastguard Worker
417*8d67ca89SAndroid Build Coastguard Worker	cmp	__x86_shared_cache_size_half(%rip), %rdx
418*8d67ca89SAndroid Build Coastguard Worker
419*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_overlapping_check_backward)
420*8d67ca89SAndroid Build Coastguard Worker
421*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
422*8d67ca89SAndroid Build Coastguard WorkerL(mm_main_loop_backward):
423*8d67ca89SAndroid Build Coastguard Worker
424*8d67ca89SAndroid Build Coastguard Worker	prefetcht0 -128(%r9, %r8)
425*8d67ca89SAndroid Build Coastguard Worker
426*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%r9, %r8), %xmm0
427*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%r9, %r8), %xmm1
428*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%r9, %r8), %xmm2
429*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%r9, %r8), %xmm3
430*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm0, -64(%r9)
431*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm1, -48(%r9)
432*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm2, -32(%r9)
433*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm3, -16(%r9)
434*8d67ca89SAndroid Build Coastguard Worker	lea	-64(%r9), %r9
435*8d67ca89SAndroid Build Coastguard Worker	cmp	%r9, %rbx
436*8d67ca89SAndroid Build Coastguard Worker	jb	L(mm_main_loop_backward)
437*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_recalc_len)
438*8d67ca89SAndroid Build Coastguard Worker
439*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..16] and return.  */
440*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_0_16_bytes_backward):
441*8d67ca89SAndroid Build Coastguard Worker	testb	$24, %dl
442*8d67ca89SAndroid Build Coastguard Worker	jnz	L(mm_len_9_16_bytes_backward)
443*8d67ca89SAndroid Build Coastguard Worker	testb	$4, %dl
444*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,5
445*8d67ca89SAndroid Build Coastguard Worker	jnz	L(mm_len_5_8_bytes_backward)
446*8d67ca89SAndroid Build Coastguard Worker	test	%rdx, %rdx
447*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,2
448*8d67ca89SAndroid Build Coastguard Worker	je	L(mm_return)
449*8d67ca89SAndroid Build Coastguard Worker	testb	$2, %dl
450*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,1
451*8d67ca89SAndroid Build Coastguard Worker	jne	L(mm_len_3_4_bytes_backward)
452*8d67ca89SAndroid Build Coastguard Worker	movzbl	-1(%rsi,%rdx), %ebx
453*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%rsi), %ecx
454*8d67ca89SAndroid Build Coastguard Worker	movb	%bl, -1(%rdi,%rdx)
455*8d67ca89SAndroid Build Coastguard Worker	movb	%cl, (%rdi)
456*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
457*8d67ca89SAndroid Build Coastguard Worker
458*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_3_4_bytes_backward):
459*8d67ca89SAndroid Build Coastguard Worker	movzwl	-2(%rsi,%rdx), %ebx
460*8d67ca89SAndroid Build Coastguard Worker	movzwl	(%rsi), %ecx
461*8d67ca89SAndroid Build Coastguard Worker	movw	%bx, -2(%rdi,%rdx)
462*8d67ca89SAndroid Build Coastguard Worker	movw	%cx, (%rdi)
463*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
464*8d67ca89SAndroid Build Coastguard Worker
465*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_9_16_bytes_backward):
466*8d67ca89SAndroid Build Coastguard Worker	movl	-4(%rsi,%rdx), %ebx
467*8d67ca89SAndroid Build Coastguard Worker	movl	-8(%rsi,%rdx), %ecx
468*8d67ca89SAndroid Build Coastguard Worker	movl	%ebx, -4(%rdi,%rdx)
469*8d67ca89SAndroid Build Coastguard Worker	movl	%ecx, -8(%rdi,%rdx)
470*8d67ca89SAndroid Build Coastguard Worker	sub	$8, %rdx
471*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_len_0_16_bytes_backward)
472*8d67ca89SAndroid Build Coastguard Worker
473*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_5_8_bytes_backward):
474*8d67ca89SAndroid Build Coastguard Worker	movl	(%rsi), %ebx
475*8d67ca89SAndroid Build Coastguard Worker	movl	-4(%rsi,%rdx), %ecx
476*8d67ca89SAndroid Build Coastguard Worker	movl	%ebx, (%rdi)
477*8d67ca89SAndroid Build Coastguard Worker	movl	%ecx, -4(%rdi,%rdx)
478*8d67ca89SAndroid Build Coastguard Worker
479*8d67ca89SAndroid Build Coastguard WorkerL(mm_return):
480*8d67ca89SAndroid Build Coastguard Worker	RETURN
481*8d67ca89SAndroid Build Coastguard Worker
482*8d67ca89SAndroid Build Coastguard Worker/* Big length copy forward part.  */
483*8d67ca89SAndroid Build Coastguard Worker
484*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
485*8d67ca89SAndroid Build Coastguard Worker
486*8d67ca89SAndroid Build Coastguard WorkerL(mm_overlapping_check_forward):
487*8d67ca89SAndroid Build Coastguard Worker	mov	%rsi, %r9
488*8d67ca89SAndroid Build Coastguard Worker	add	%rdx, %r9
489*8d67ca89SAndroid Build Coastguard Worker	cmp	__x86_shared_cache_size(%rip), %r9
490*8d67ca89SAndroid Build Coastguard Worker	jbe	L(mm_main_loop_forward)
491*8d67ca89SAndroid Build Coastguard Worker
492*8d67ca89SAndroid Build Coastguard WorkerL(mm_large_page_loop_forward):
493*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%r8, %rsi), %xmm0
494*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%r8, %rsi), %xmm1
495*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%r8, %rsi), %xmm2
496*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%r8, %rsi), %xmm3
497*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm0, (%r8)
498*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm1, 16(%r8)
499*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm2, 32(%r8)
500*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm3, 48(%r8)
501*8d67ca89SAndroid Build Coastguard Worker	lea 	64(%r8), %r8
502*8d67ca89SAndroid Build Coastguard Worker	cmp	%r8, %rbx
503*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_large_page_loop_forward)
504*8d67ca89SAndroid Build Coastguard Worker	sfence
505*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_copy_remaining_forward)
506*8d67ca89SAndroid Build Coastguard Worker
507*8d67ca89SAndroid Build Coastguard Worker/* Big length copy backward part.  */
508*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
509*8d67ca89SAndroid Build Coastguard Worker
510*8d67ca89SAndroid Build Coastguard WorkerL(mm_overlapping_check_backward):
511*8d67ca89SAndroid Build Coastguard Worker	mov	%rdi, %r11
512*8d67ca89SAndroid Build Coastguard Worker	sub	%rsi, %r11 /* r11 = dst - src, diff */
513*8d67ca89SAndroid Build Coastguard Worker	add	%rdx, %r11
514*8d67ca89SAndroid Build Coastguard Worker	cmp	__x86_shared_cache_size(%rip), %r11
515*8d67ca89SAndroid Build Coastguard Worker	jbe	L(mm_main_loop_backward)
516*8d67ca89SAndroid Build Coastguard Worker
517*8d67ca89SAndroid Build Coastguard WorkerL(mm_large_page_loop_backward):
518*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%r9, %r8), %xmm0
519*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%r9, %r8), %xmm1
520*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%r9, %r8), %xmm2
521*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%r9, %r8), %xmm3
522*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm0, -64(%r9)
523*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm1, -48(%r9)
524*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm2, -32(%r9)
525*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm3, -16(%r9)
526*8d67ca89SAndroid Build Coastguard Worker	lea 	-64(%r9), %r9
527*8d67ca89SAndroid Build Coastguard Worker	cmp	%r9, %rbx
528*8d67ca89SAndroid Build Coastguard Worker	jb	L(mm_large_page_loop_backward)
529*8d67ca89SAndroid Build Coastguard Worker	sfence
530*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_recalc_len)
531*8d67ca89SAndroid Build Coastguard Worker
532*8d67ca89SAndroid Build Coastguard WorkerEND (MEMMOVE)
533*8d67ca89SAndroid Build Coastguard Worker
534*8d67ca89SAndroid Build Coastguard WorkerALIAS_SYMBOL(memcpy, MEMMOVE)
535