xref: /aosp_15_r20/bionic/libc/arch-x86_64/string/ssse3-strcmp-slm.S (revision 8d67ca893c1523eb926b9080dbe4e2ffd2a27ba1)
1*8d67ca89SAndroid Build Coastguard Worker/*
2*8d67ca89SAndroid Build Coastguard WorkerCopyright (c) 2014, Intel Corporation
3*8d67ca89SAndroid Build Coastguard WorkerAll rights reserved.
4*8d67ca89SAndroid Build Coastguard Worker
5*8d67ca89SAndroid Build Coastguard WorkerRedistribution and use in source and binary forms, with or without
6*8d67ca89SAndroid Build Coastguard Workermodification, are permitted provided that the following conditions are met:
7*8d67ca89SAndroid Build Coastguard Worker
8*8d67ca89SAndroid Build Coastguard Worker    * Redistributions of source code must retain the above copyright notice,
9*8d67ca89SAndroid Build Coastguard Worker    * this list of conditions and the following disclaimer.
10*8d67ca89SAndroid Build Coastguard Worker
11*8d67ca89SAndroid Build Coastguard Worker    * Redistributions in binary form must reproduce the above copyright notice,
12*8d67ca89SAndroid Build Coastguard Worker    * this list of conditions and the following disclaimer in the documentation
13*8d67ca89SAndroid Build Coastguard Worker    * and/or other materials provided with the distribution.
14*8d67ca89SAndroid Build Coastguard Worker
15*8d67ca89SAndroid Build Coastguard Worker    * Neither the name of Intel Corporation nor the names of its contributors
16*8d67ca89SAndroid Build Coastguard Worker    * may be used to endorse or promote products derived from this software
17*8d67ca89SAndroid Build Coastguard Worker    * without specific prior written permission.
18*8d67ca89SAndroid Build Coastguard Worker
19*8d67ca89SAndroid Build Coastguard WorkerTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20*8d67ca89SAndroid Build Coastguard WorkerANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21*8d67ca89SAndroid Build Coastguard WorkerWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22*8d67ca89SAndroid Build Coastguard WorkerDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23*8d67ca89SAndroid Build Coastguard WorkerANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24*8d67ca89SAndroid Build Coastguard Worker(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25*8d67ca89SAndroid Build Coastguard WorkerLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26*8d67ca89SAndroid Build Coastguard WorkerANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27*8d67ca89SAndroid Build Coastguard Worker(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28*8d67ca89SAndroid Build Coastguard WorkerSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*8d67ca89SAndroid Build Coastguard Worker*/
30*8d67ca89SAndroid Build Coastguard Worker
31*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
32*8d67ca89SAndroid Build Coastguard Worker/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
33*8d67ca89SAndroid Build Coastguard Worker   if the new counter > the old one or is 0.  */
34*8d67ca89SAndroid Build Coastguard Worker#define UPDATE_STRNCMP_COUNTER				\
35*8d67ca89SAndroid Build Coastguard Worker	/* calculate left number to compare */		\
36*8d67ca89SAndroid Build Coastguard Worker	lea	-16(%rcx, %r11), %r9;			\
37*8d67ca89SAndroid Build Coastguard Worker	cmp	%r9, %r11;				\
38*8d67ca89SAndroid Build Coastguard Worker	jb	L(strcmp_exitz);			\
39*8d67ca89SAndroid Build Coastguard Worker	test	%r9, %r9;				\
40*8d67ca89SAndroid Build Coastguard Worker	je	L(strcmp_exitz);			\
41*8d67ca89SAndroid Build Coastguard Worker	mov	%r9, %r11
42*8d67ca89SAndroid Build Coastguard Worker
43*8d67ca89SAndroid Build Coastguard Worker#else
44*8d67ca89SAndroid Build Coastguard Worker#define UPDATE_STRNCMP_COUNTER
45*8d67ca89SAndroid Build Coastguard Worker#ifndef STRCMP
46*8d67ca89SAndroid Build Coastguard Worker#define STRCMP		strcmp
47*8d67ca89SAndroid Build Coastguard Worker#endif
48*8d67ca89SAndroid Build Coastguard Worker#endif
49*8d67ca89SAndroid Build Coastguard Worker
50*8d67ca89SAndroid Build Coastguard Worker#ifndef L
51*8d67ca89SAndroid Build Coastguard Worker# define L(label)	.L##label
52*8d67ca89SAndroid Build Coastguard Worker#endif
53*8d67ca89SAndroid Build Coastguard Worker
54*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_startproc
55*8d67ca89SAndroid Build Coastguard Worker# define cfi_startproc			.cfi_startproc
56*8d67ca89SAndroid Build Coastguard Worker#endif
57*8d67ca89SAndroid Build Coastguard Worker
58*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_endproc
59*8d67ca89SAndroid Build Coastguard Worker# define cfi_endproc			.cfi_endproc
60*8d67ca89SAndroid Build Coastguard Worker#endif
61*8d67ca89SAndroid Build Coastguard Worker
62*8d67ca89SAndroid Build Coastguard Worker#ifndef ENTRY
63*8d67ca89SAndroid Build Coastguard Worker# define ENTRY(name)			\
64*8d67ca89SAndroid Build Coastguard Worker	.type name,  @function; 	\
65*8d67ca89SAndroid Build Coastguard Worker	.globl name;			\
66*8d67ca89SAndroid Build Coastguard Worker	.p2align 4;			\
67*8d67ca89SAndroid Build Coastguard Workername:					\
68*8d67ca89SAndroid Build Coastguard Worker	cfi_startproc
69*8d67ca89SAndroid Build Coastguard Worker#endif
70*8d67ca89SAndroid Build Coastguard Worker
71*8d67ca89SAndroid Build Coastguard Worker#ifndef END
72*8d67ca89SAndroid Build Coastguard Worker# define END(name)			\
73*8d67ca89SAndroid Build Coastguard Worker	cfi_endproc;			\
74*8d67ca89SAndroid Build Coastguard Worker	.size name, .-name
75*8d67ca89SAndroid Build Coastguard Worker#endif
76*8d67ca89SAndroid Build Coastguard Worker#define RETURN ret
77*8d67ca89SAndroid Build Coastguard Worker	.section .text.ssse3,"ax",@progbits
78*8d67ca89SAndroid Build Coastguard WorkerENTRY (STRCMP)
79*8d67ca89SAndroid Build Coastguard Worker/*
80*8d67ca89SAndroid Build Coastguard Worker * This implementation uses SSE to compare up to 16 bytes at a time.
81*8d67ca89SAndroid Build Coastguard Worker */
82*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
83*8d67ca89SAndroid Build Coastguard Worker	test	%rdx, %rdx
84*8d67ca89SAndroid Build Coastguard Worker	je	L(strcmp_exitz)
85*8d67ca89SAndroid Build Coastguard Worker	cmp	$1, %rdx
86*8d67ca89SAndroid Build Coastguard Worker	je	L(Byte0)
87*8d67ca89SAndroid Build Coastguard Worker	mov	%rdx, %r11
88*8d67ca89SAndroid Build Coastguard Worker#endif
89*8d67ca89SAndroid Build Coastguard Worker	mov	%esi, %ecx
90*8d67ca89SAndroid Build Coastguard Worker	mov	%edi, %eax
91*8d67ca89SAndroid Build Coastguard Worker/* Use 64bit AND here to avoid long NOP padding.  */
92*8d67ca89SAndroid Build Coastguard Worker	and	$0x3f, %rcx		/* rsi alignment in cache line */
93*8d67ca89SAndroid Build Coastguard Worker	and	$0x3f, %rax		/* rdi alignment in cache line */
94*8d67ca89SAndroid Build Coastguard Worker	cmp	$0x30, %ecx
95*8d67ca89SAndroid Build Coastguard Worker	ja	L(crosscache)	/* rsi: 16-byte load will cross cache line */
96*8d67ca89SAndroid Build Coastguard Worker	cmp	$0x30, %eax
97*8d67ca89SAndroid Build Coastguard Worker	ja	L(crosscache)	/* rdi: 16-byte load will cross cache line */
98*8d67ca89SAndroid Build Coastguard Worker	movlpd	(%rdi), %xmm1
99*8d67ca89SAndroid Build Coastguard Worker	movlpd	(%rsi), %xmm2
100*8d67ca89SAndroid Build Coastguard Worker	movhpd	8(%rdi), %xmm1
101*8d67ca89SAndroid Build Coastguard Worker	movhpd	8(%rsi), %xmm2
102*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
103*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
104*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1		/* compare first 16 bytes for equality */
105*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
106*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
107*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx		/* if first 16 bytes are same, edx == 0xffff */
108*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less16bytes)	/* If not, find different value or null char */
109*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
110*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
111*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)	/* finish comparision */
112*8d67ca89SAndroid Build Coastguard Worker#endif
113*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rsi		/* prepare to search next 16 bytes */
114*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rdi		/* prepare to search next 16 bytes */
115*8d67ca89SAndroid Build Coastguard Worker
116*8d67ca89SAndroid Build Coastguard Worker	/*
117*8d67ca89SAndroid Build Coastguard Worker	 * Determine source and destination string offsets from 16-byte alignment.
118*8d67ca89SAndroid Build Coastguard Worker	 * Use relative offset difference between the two to determine which case
119*8d67ca89SAndroid Build Coastguard Worker	 * below to use.
120*8d67ca89SAndroid Build Coastguard Worker	 */
121*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
122*8d67ca89SAndroid Build Coastguard WorkerL(crosscache):
123*8d67ca89SAndroid Build Coastguard Worker	and	$0xfffffffffffffff0, %rsi	/* force %rsi is 16 byte aligned */
124*8d67ca89SAndroid Build Coastguard Worker	and	$0xfffffffffffffff0, %rdi	/* force %rdi is 16 byte aligned */
125*8d67ca89SAndroid Build Coastguard Worker	mov	$0xffff, %edx			/* for equivalent offset */
126*8d67ca89SAndroid Build Coastguard Worker	xor	%r8d, %r8d
127*8d67ca89SAndroid Build Coastguard Worker	and	$0xf, %ecx			/* offset of rsi */
128*8d67ca89SAndroid Build Coastguard Worker	and	$0xf, %eax			/* offset of rdi */
129*8d67ca89SAndroid Build Coastguard Worker	cmp	%eax, %ecx
130*8d67ca89SAndroid Build Coastguard Worker	je	L(ashr_0)			/* rsi and rdi relative offset same */
131*8d67ca89SAndroid Build Coastguard Worker	ja	L(bigger)
132*8d67ca89SAndroid Build Coastguard Worker	mov	%edx, %r8d			/* r8d is offset flag for exit tail */
133*8d67ca89SAndroid Build Coastguard Worker	xchg	%ecx, %eax
134*8d67ca89SAndroid Build Coastguard Worker	xchg	%rsi, %rdi
135*8d67ca89SAndroid Build Coastguard WorkerL(bigger):
136*8d67ca89SAndroid Build Coastguard Worker	lea	15(%rax), %r9
137*8d67ca89SAndroid Build Coastguard Worker	sub	%rcx, %r9
138*8d67ca89SAndroid Build Coastguard Worker	lea	L(unaligned_table)(%rip), %r10
139*8d67ca89SAndroid Build Coastguard Worker	movslq	(%r10, %r9,4), %r9
140*8d67ca89SAndroid Build Coastguard Worker	lea	(%r10, %r9), %r10
141*8d67ca89SAndroid Build Coastguard Worker	jmp	*%r10				/* jump to corresponding case */
142*8d67ca89SAndroid Build Coastguard Worker
143*8d67ca89SAndroid Build Coastguard Worker/*
144*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_0
145*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)  relative offset  corresponding case
146*8d67ca89SAndroid Build Coastguard Worker *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
147*8d67ca89SAndroid Build Coastguard Worker */
148*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
149*8d67ca89SAndroid Build Coastguard WorkerL(ashr_0):
150*8d67ca89SAndroid Build Coastguard Worker
151*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
152*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0			/* clear %xmm0 for null char check */
153*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0			/* Any null chars? */
154*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	(%rdi), %xmm1			/* compare 16 bytes for equality */
155*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1			/* packed sub of comparison results*/
156*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %r9d
157*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx			/* adjust 0xffff for offset */
158*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d			/* adjust for 16-byte offset */
159*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
160*8d67ca89SAndroid Build Coastguard Worker	/*
161*8d67ca89SAndroid Build Coastguard Worker	 * edx must be the same with r9d if in left byte (16-rcx) is equal to
162*8d67ca89SAndroid Build Coastguard Worker	 * the start from (16-rax) and no null char was seen.
163*8d67ca89SAndroid Build Coastguard Worker	 */
164*8d67ca89SAndroid Build Coastguard Worker	jne	L(less32bytes)		/* mismatch or null char */
165*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
166*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx
167*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %r9
168*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0			/* clear xmm0, may have changed above */
169*8d67ca89SAndroid Build Coastguard Worker
170*8d67ca89SAndroid Build Coastguard Worker	/*
171*8d67ca89SAndroid Build Coastguard Worker	 * Now both strings are aligned at 16-byte boundary. Loop over strings
172*8d67ca89SAndroid Build Coastguard Worker	 * checking 32-bytes per iteration.
173*8d67ca89SAndroid Build Coastguard Worker	 */
174*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
175*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_0):
176*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
177*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
178*8d67ca89SAndroid Build Coastguard Worker
179*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
180*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
181*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
182*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
183*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
184*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)		/* mismatch or null char seen */
185*8d67ca89SAndroid Build Coastguard Worker
186*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
187*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
188*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
189*8d67ca89SAndroid Build Coastguard Worker#endif
190*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
191*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
192*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
193*8d67ca89SAndroid Build Coastguard Worker
194*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
195*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
196*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
197*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
198*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
199*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
200*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
201*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
202*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
203*8d67ca89SAndroid Build Coastguard Worker#endif
204*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
205*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_0)
206*8d67ca89SAndroid Build Coastguard Worker
207*8d67ca89SAndroid Build Coastguard Worker/*
208*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_1
209*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi)  rax(offset of rdi)   relative offset   	corresponding case
210*8d67ca89SAndroid Build Coastguard Worker *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
211*8d67ca89SAndroid Build Coastguard Worker */
212*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
213*8d67ca89SAndroid Build Coastguard WorkerL(ashr_1):
214*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
215*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
216*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
217*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
218*8d67ca89SAndroid Build Coastguard Worker	pslldq	$15, %xmm2		/* shift first string to align with second */
219*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2		/* compare 16 bytes for equality */
220*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
221*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
222*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx		/* adjust 0xffff for offset */
223*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d		/* adjust for 16-byte offset */
224*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
225*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)	/* mismatch or null char seen */
226*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
227*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
228*8d67ca89SAndroid Build Coastguard Worker
229*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
230*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx		/* index for loads*/
231*8d67ca89SAndroid Build Coastguard Worker	mov	$1, %r9d		/* byte position left over from less32bytes case */
232*8d67ca89SAndroid Build Coastguard Worker	/*
233*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
234*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
235*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
236*8d67ca89SAndroid Build Coastguard Worker	 */
237*8d67ca89SAndroid Build Coastguard Worker	lea	1(%rdi), %r10
238*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10		/* offset into 4K page */
239*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10		/* subtract 4K pagesize */
240*8d67ca89SAndroid Build Coastguard Worker
241*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
242*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_1):
243*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
244*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_1)	/* cross page boundary */
245*8d67ca89SAndroid Build Coastguard Worker
246*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_1):
247*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
248*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
249*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4		 /* store for next cycle */
250*8d67ca89SAndroid Build Coastguard Worker
251*8d67ca89SAndroid Build Coastguard Worker	palignr $1, %xmm3, %xmm2        /* merge into one 16byte value */
252*8d67ca89SAndroid Build Coastguard Worker
253*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
254*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
255*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
256*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
257*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
258*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
259*8d67ca89SAndroid Build Coastguard Worker
260*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
261*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
262*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
263*8d67ca89SAndroid Build Coastguard Worker#endif
264*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
265*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
266*8d67ca89SAndroid Build Coastguard Worker
267*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
268*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_1)	/* cross page boundary */
269*8d67ca89SAndroid Build Coastguard Worker
270*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
271*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
272*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4		/* store for next cycle */
273*8d67ca89SAndroid Build Coastguard Worker
274*8d67ca89SAndroid Build Coastguard Worker	palignr $1, %xmm3, %xmm2        /* merge into one 16byte value */
275*8d67ca89SAndroid Build Coastguard Worker
276*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
277*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
278*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
279*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
280*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
281*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
282*8d67ca89SAndroid Build Coastguard Worker
283*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
284*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
285*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
286*8d67ca89SAndroid Build Coastguard Worker#endif
287*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
288*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
289*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_1)
290*8d67ca89SAndroid Build Coastguard Worker
291*8d67ca89SAndroid Build Coastguard Worker	/*
292*8d67ca89SAndroid Build Coastguard Worker	 * Nibble avoids loads across page boundary. This is to avoid a potential
293*8d67ca89SAndroid Build Coastguard Worker	 * access into unmapped memory.
294*8d67ca89SAndroid Build Coastguard Worker	 */
295*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
296*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_1):
297*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		 /* check nibble for null char*/
298*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
299*8d67ca89SAndroid Build Coastguard Worker	test	$0xfffe, %edx
300*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_1_exittail)	/* find null char*/
301*8d67ca89SAndroid Build Coastguard Worker
302*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
303*8d67ca89SAndroid Build Coastguard Worker	cmp	$14, %r11
304*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_1_exittail)
305*8d67ca89SAndroid Build Coastguard Worker#endif
306*8d67ca89SAndroid Build Coastguard Worker
307*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
308*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10		/* substract 4K from %r10 */
309*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_1)
310*8d67ca89SAndroid Build Coastguard Worker
311*8d67ca89SAndroid Build Coastguard Worker	/*
312*8d67ca89SAndroid Build Coastguard Worker	 * Once find null char, determine if there is a string mismatch
313*8d67ca89SAndroid Build Coastguard Worker	 * before the null char.
314*8d67ca89SAndroid Build Coastguard Worker	 */
315*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
316*8d67ca89SAndroid Build Coastguard WorkerL(ashr_1_exittail):
317*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
318*8d67ca89SAndroid Build Coastguard Worker	psrldq	$1, %xmm0
319*8d67ca89SAndroid Build Coastguard Worker	psrldq	$1, %xmm3
320*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
321*8d67ca89SAndroid Build Coastguard Worker
322*8d67ca89SAndroid Build Coastguard Worker/*
323*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_2
324*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi)  rax(offset of rdi)   relative offset   	corresponding case
325*8d67ca89SAndroid Build Coastguard Worker *        n(14~15)            n -14         1(15 +(n-14) - n)         ashr_2
326*8d67ca89SAndroid Build Coastguard Worker */
327*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
328*8d67ca89SAndroid Build Coastguard WorkerL(ashr_2):
329*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
330*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
331*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
332*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
333*8d67ca89SAndroid Build Coastguard Worker	pslldq	$14, %xmm2
334*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
335*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
336*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
337*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
338*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
339*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
340*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
341*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
342*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
343*8d67ca89SAndroid Build Coastguard Worker
344*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
345*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
346*8d67ca89SAndroid Build Coastguard Worker	mov	$2, %r9d	/* byte position left over from less32bytes case */
347*8d67ca89SAndroid Build Coastguard Worker	/*
348*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
349*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
350*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
351*8d67ca89SAndroid Build Coastguard Worker	 */
352*8d67ca89SAndroid Build Coastguard Worker	lea	2(%rdi), %r10
353*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
354*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
355*8d67ca89SAndroid Build Coastguard Worker
356*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
357*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_2):
358*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
359*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_2)
360*8d67ca89SAndroid Build Coastguard Worker
361*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_2):
362*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
363*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
364*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
365*8d67ca89SAndroid Build Coastguard Worker
366*8d67ca89SAndroid Build Coastguard Worker	palignr $2, %xmm3, %xmm2        /* merge into one 16byte value */
367*8d67ca89SAndroid Build Coastguard Worker
368*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
369*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
370*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
371*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
372*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
373*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
374*8d67ca89SAndroid Build Coastguard Worker
375*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
376*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
377*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
378*8d67ca89SAndroid Build Coastguard Worker#endif
379*8d67ca89SAndroid Build Coastguard Worker
380*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
381*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
382*8d67ca89SAndroid Build Coastguard Worker
383*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
384*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_2)	/* cross page boundary */
385*8d67ca89SAndroid Build Coastguard Worker
386*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
387*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
388*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
389*8d67ca89SAndroid Build Coastguard Worker
390*8d67ca89SAndroid Build Coastguard Worker	palignr $2, %xmm3, %xmm2        /* merge into one 16byte value */
391*8d67ca89SAndroid Build Coastguard Worker
392*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
393*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
394*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
395*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
396*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
397*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
398*8d67ca89SAndroid Build Coastguard Worker
399*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
400*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
401*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
402*8d67ca89SAndroid Build Coastguard Worker#endif
403*8d67ca89SAndroid Build Coastguard Worker
404*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
405*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
406*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_2)
407*8d67ca89SAndroid Build Coastguard Worker
408*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
409*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_2):
410*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
411*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
412*8d67ca89SAndroid Build Coastguard Worker	test	$0xfffc, %edx
413*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_2_exittail)
414*8d67ca89SAndroid Build Coastguard Worker
415*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
416*8d67ca89SAndroid Build Coastguard Worker	cmp	$13, %r11
417*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_2_exittail)
418*8d67ca89SAndroid Build Coastguard Worker#endif
419*8d67ca89SAndroid Build Coastguard Worker
420*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
421*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
422*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_2)
423*8d67ca89SAndroid Build Coastguard Worker
424*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
425*8d67ca89SAndroid Build Coastguard WorkerL(ashr_2_exittail):
426*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
427*8d67ca89SAndroid Build Coastguard Worker	psrldq	$2, %xmm0
428*8d67ca89SAndroid Build Coastguard Worker	psrldq	$2, %xmm3
429*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
430*8d67ca89SAndroid Build Coastguard Worker
431*8d67ca89SAndroid Build Coastguard Worker/*
432*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_3
433*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)  relative offset	 corresponding case
434*8d67ca89SAndroid Build Coastguard Worker *        n(13~15)            n -13         2(15 +(n-13) - n)         ashr_3
435*8d67ca89SAndroid Build Coastguard Worker */
436*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
437*8d67ca89SAndroid Build Coastguard WorkerL(ashr_3):
438*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
439*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
440*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
441*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
442*8d67ca89SAndroid Build Coastguard Worker	pslldq	$13, %xmm2
443*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
444*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
445*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
446*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
447*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
448*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
449*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
450*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
451*8d67ca89SAndroid Build Coastguard Worker
452*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
453*8d67ca89SAndroid Build Coastguard Worker
454*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
455*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
456*8d67ca89SAndroid Build Coastguard Worker	mov	$3, %r9d	/* byte position left over from less32bytes case */
457*8d67ca89SAndroid Build Coastguard Worker	/*
458*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
459*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
460*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
461*8d67ca89SAndroid Build Coastguard Worker	 */
462*8d67ca89SAndroid Build Coastguard Worker	lea	3(%rdi), %r10
463*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
464*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
465*8d67ca89SAndroid Build Coastguard Worker
466*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
467*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_3):
468*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
469*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_3)
470*8d67ca89SAndroid Build Coastguard Worker
471*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_3):
472*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
473*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
474*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
475*8d67ca89SAndroid Build Coastguard Worker
476*8d67ca89SAndroid Build Coastguard Worker	palignr $3, %xmm3, %xmm2        /* merge into one 16byte value */
477*8d67ca89SAndroid Build Coastguard Worker
478*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
479*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
480*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
481*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
482*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
483*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
484*8d67ca89SAndroid Build Coastguard Worker
485*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
486*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
487*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
488*8d67ca89SAndroid Build Coastguard Worker#endif
489*8d67ca89SAndroid Build Coastguard Worker
490*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
491*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
492*8d67ca89SAndroid Build Coastguard Worker
493*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
494*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_3)	/* cross page boundary */
495*8d67ca89SAndroid Build Coastguard Worker
496*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
497*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
498*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
499*8d67ca89SAndroid Build Coastguard Worker
500*8d67ca89SAndroid Build Coastguard Worker	palignr $3, %xmm3, %xmm2        /* merge into one 16byte value */
501*8d67ca89SAndroid Build Coastguard Worker
502*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
503*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
504*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
505*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
506*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
507*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
508*8d67ca89SAndroid Build Coastguard Worker
509*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
510*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
511*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
512*8d67ca89SAndroid Build Coastguard Worker#endif
513*8d67ca89SAndroid Build Coastguard Worker
514*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
515*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
516*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_3)
517*8d67ca89SAndroid Build Coastguard Worker
518*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
519*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_3):
520*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
521*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
522*8d67ca89SAndroid Build Coastguard Worker	test	$0xfff8, %edx
523*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_3_exittail)
524*8d67ca89SAndroid Build Coastguard Worker
525*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
526*8d67ca89SAndroid Build Coastguard Worker	cmp	$12, %r11
527*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_3_exittail)
528*8d67ca89SAndroid Build Coastguard Worker#endif
529*8d67ca89SAndroid Build Coastguard Worker
530*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
531*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
532*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_3)
533*8d67ca89SAndroid Build Coastguard Worker
534*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
535*8d67ca89SAndroid Build Coastguard WorkerL(ashr_3_exittail):
536*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
537*8d67ca89SAndroid Build Coastguard Worker	psrldq	$3, %xmm0
538*8d67ca89SAndroid Build Coastguard Worker	psrldq	$3, %xmm3
539*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
540*8d67ca89SAndroid Build Coastguard Worker
541*8d67ca89SAndroid Build Coastguard Worker/*
542*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_4
543*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)  relative offset	 corresponding case
544*8d67ca89SAndroid Build Coastguard Worker *        n(12~15)            n -12         3(15 +(n-12) - n)         ashr_4
545*8d67ca89SAndroid Build Coastguard Worker */
546*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
547*8d67ca89SAndroid Build Coastguard WorkerL(ashr_4):
548*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
549*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
550*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
551*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
552*8d67ca89SAndroid Build Coastguard Worker	pslldq	$12, %xmm2
553*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
554*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
555*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
556*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
557*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
558*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
559*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
560*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
561*8d67ca89SAndroid Build Coastguard Worker
562*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
563*8d67ca89SAndroid Build Coastguard Worker
564*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
565*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
566*8d67ca89SAndroid Build Coastguard Worker	mov	$4, %r9d	/* byte position left over from less32bytes case */
567*8d67ca89SAndroid Build Coastguard Worker	/*
568*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
569*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
570*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
571*8d67ca89SAndroid Build Coastguard Worker	 */
572*8d67ca89SAndroid Build Coastguard Worker	lea	4(%rdi), %r10
573*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
574*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
575*8d67ca89SAndroid Build Coastguard Worker
576*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
577*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_4):
578*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
579*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_4)
580*8d67ca89SAndroid Build Coastguard Worker
581*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_4):
582*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
583*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
584*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
585*8d67ca89SAndroid Build Coastguard Worker
586*8d67ca89SAndroid Build Coastguard Worker	palignr $4, %xmm3, %xmm2        /* merge into one 16byte value */
587*8d67ca89SAndroid Build Coastguard Worker
588*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
589*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
590*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
591*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
592*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
593*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
594*8d67ca89SAndroid Build Coastguard Worker
595*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
596*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
597*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
598*8d67ca89SAndroid Build Coastguard Worker#endif
599*8d67ca89SAndroid Build Coastguard Worker
600*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
601*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
602*8d67ca89SAndroid Build Coastguard Worker
603*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
604*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_4)	/* cross page boundary */
605*8d67ca89SAndroid Build Coastguard Worker
606*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
607*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
608*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
609*8d67ca89SAndroid Build Coastguard Worker
610*8d67ca89SAndroid Build Coastguard Worker	palignr $4, %xmm3, %xmm2        /* merge into one 16byte value */
611*8d67ca89SAndroid Build Coastguard Worker
612*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
613*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
614*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
615*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
616*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
617*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
618*8d67ca89SAndroid Build Coastguard Worker
619*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
620*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
621*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
622*8d67ca89SAndroid Build Coastguard Worker#endif
623*8d67ca89SAndroid Build Coastguard Worker
624*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
625*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
626*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_4)
627*8d67ca89SAndroid Build Coastguard Worker
628*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
629*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_4):
630*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
631*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
632*8d67ca89SAndroid Build Coastguard Worker	test	$0xfff0, %edx
633*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_4_exittail)
634*8d67ca89SAndroid Build Coastguard Worker
635*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
636*8d67ca89SAndroid Build Coastguard Worker	cmp	$11, %r11
637*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_4_exittail)
638*8d67ca89SAndroid Build Coastguard Worker#endif
639*8d67ca89SAndroid Build Coastguard Worker
640*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
641*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
642*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_4)
643*8d67ca89SAndroid Build Coastguard Worker
644*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
645*8d67ca89SAndroid Build Coastguard WorkerL(ashr_4_exittail):
646*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
647*8d67ca89SAndroid Build Coastguard Worker	psrldq	$4, %xmm0
648*8d67ca89SAndroid Build Coastguard Worker	psrldq	$4, %xmm3
649*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
650*8d67ca89SAndroid Build Coastguard Worker
651*8d67ca89SAndroid Build Coastguard Worker/*
652*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_5
653*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset      corresponding case
654*8d67ca89SAndroid Build Coastguard Worker *        n(11~15)          n - 11      	  4(15 +(n-11) - n)         ashr_5
655*8d67ca89SAndroid Build Coastguard Worker */
656*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
657*8d67ca89SAndroid Build Coastguard WorkerL(ashr_5):
658*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
659*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
660*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
661*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
662*8d67ca89SAndroid Build Coastguard Worker	pslldq	$11, %xmm2
663*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
664*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
665*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
666*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
667*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
668*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
669*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
670*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
671*8d67ca89SAndroid Build Coastguard Worker
672*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
673*8d67ca89SAndroid Build Coastguard Worker
674*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
675*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
676*8d67ca89SAndroid Build Coastguard Worker	mov	$5, %r9d	/* byte position left over from less32bytes case */
677*8d67ca89SAndroid Build Coastguard Worker	/*
678*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
679*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
680*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
681*8d67ca89SAndroid Build Coastguard Worker	 */
682*8d67ca89SAndroid Build Coastguard Worker	lea	5(%rdi), %r10
683*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
684*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
685*8d67ca89SAndroid Build Coastguard Worker
686*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
687*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_5):
688*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
689*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_5)
690*8d67ca89SAndroid Build Coastguard Worker
691*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_5):
692*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
693*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
694*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
695*8d67ca89SAndroid Build Coastguard Worker
696*8d67ca89SAndroid Build Coastguard Worker	palignr $5, %xmm3, %xmm2        /* merge into one 16byte value */
697*8d67ca89SAndroid Build Coastguard Worker
698*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
699*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
700*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
701*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
702*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
703*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
704*8d67ca89SAndroid Build Coastguard Worker
705*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
706*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
707*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
708*8d67ca89SAndroid Build Coastguard Worker#endif
709*8d67ca89SAndroid Build Coastguard Worker
710*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
711*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
712*8d67ca89SAndroid Build Coastguard Worker
713*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
714*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_5)	/* cross page boundary */
715*8d67ca89SAndroid Build Coastguard Worker
716*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
717*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
718*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
719*8d67ca89SAndroid Build Coastguard Worker
720*8d67ca89SAndroid Build Coastguard Worker	palignr $5, %xmm3, %xmm2        /* merge into one 16byte value */
721*8d67ca89SAndroid Build Coastguard Worker
722*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
723*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
724*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
725*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
726*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
727*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
728*8d67ca89SAndroid Build Coastguard Worker
729*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
730*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
731*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
732*8d67ca89SAndroid Build Coastguard Worker#endif
733*8d67ca89SAndroid Build Coastguard Worker
734*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
735*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
736*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_5)
737*8d67ca89SAndroid Build Coastguard Worker
738*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
739*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_5):
740*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
741*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
742*8d67ca89SAndroid Build Coastguard Worker	test	$0xffe0, %edx
743*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_5_exittail)
744*8d67ca89SAndroid Build Coastguard Worker
745*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
746*8d67ca89SAndroid Build Coastguard Worker	cmp	$10, %r11
747*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_5_exittail)
748*8d67ca89SAndroid Build Coastguard Worker#endif
749*8d67ca89SAndroid Build Coastguard Worker
750*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
751*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
752*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_5)
753*8d67ca89SAndroid Build Coastguard Worker
754*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
755*8d67ca89SAndroid Build Coastguard WorkerL(ashr_5_exittail):
756*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
757*8d67ca89SAndroid Build Coastguard Worker	psrldq	$5, %xmm0
758*8d67ca89SAndroid Build Coastguard Worker	psrldq	$5, %xmm3
759*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
760*8d67ca89SAndroid Build Coastguard Worker
761*8d67ca89SAndroid Build Coastguard Worker/*
762*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_6
763*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset      corresponding case
764*8d67ca89SAndroid Build Coastguard Worker *        n(10~15)          n - 10      	  5(15 +(n-10) - n)         ashr_6
765*8d67ca89SAndroid Build Coastguard Worker */
766*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
767*8d67ca89SAndroid Build Coastguard WorkerL(ashr_6):
768*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
769*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
770*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
771*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
772*8d67ca89SAndroid Build Coastguard Worker	pslldq	$10, %xmm2
773*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
774*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
775*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
776*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
777*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
778*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
779*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
780*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
781*8d67ca89SAndroid Build Coastguard Worker
782*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
783*8d67ca89SAndroid Build Coastguard Worker
784*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
785*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
786*8d67ca89SAndroid Build Coastguard Worker	mov	$6, %r9d	/* byte position left over from less32bytes case */
787*8d67ca89SAndroid Build Coastguard Worker	/*
788*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
789*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
790*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
791*8d67ca89SAndroid Build Coastguard Worker	 */
792*8d67ca89SAndroid Build Coastguard Worker	lea	6(%rdi), %r10
793*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
794*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
795*8d67ca89SAndroid Build Coastguard Worker
796*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
797*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_6):
798*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
799*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_6)
800*8d67ca89SAndroid Build Coastguard Worker
801*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_6):
802*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
803*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
804*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
805*8d67ca89SAndroid Build Coastguard Worker
806*8d67ca89SAndroid Build Coastguard Worker	palignr $6, %xmm3, %xmm2        /* merge into one 16byte value */
807*8d67ca89SAndroid Build Coastguard Worker
808*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
809*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
810*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
811*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
812*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
813*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
814*8d67ca89SAndroid Build Coastguard Worker
815*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
816*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
817*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
818*8d67ca89SAndroid Build Coastguard Worker#endif
819*8d67ca89SAndroid Build Coastguard Worker
820*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
821*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
822*8d67ca89SAndroid Build Coastguard Worker
823*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
824*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_6)	/* cross page boundary */
825*8d67ca89SAndroid Build Coastguard Worker
826*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
827*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
828*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
829*8d67ca89SAndroid Build Coastguard Worker
830*8d67ca89SAndroid Build Coastguard Worker	palignr $6, %xmm3, %xmm2        /* merge into one 16byte value */
831*8d67ca89SAndroid Build Coastguard Worker
832*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
833*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
834*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
835*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
836*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
837*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
838*8d67ca89SAndroid Build Coastguard Worker
839*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
840*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
841*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
842*8d67ca89SAndroid Build Coastguard Worker#endif
843*8d67ca89SAndroid Build Coastguard Worker
844*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
845*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
846*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_6)
847*8d67ca89SAndroid Build Coastguard Worker
848*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
849*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_6):
850*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
851*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
852*8d67ca89SAndroid Build Coastguard Worker	test	$0xffc0, %edx
853*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_6_exittail)
854*8d67ca89SAndroid Build Coastguard Worker
855*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
856*8d67ca89SAndroid Build Coastguard Worker	cmp	$9, %r11
857*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_6_exittail)
858*8d67ca89SAndroid Build Coastguard Worker#endif
859*8d67ca89SAndroid Build Coastguard Worker
860*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
861*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
862*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_6)
863*8d67ca89SAndroid Build Coastguard Worker
864*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
865*8d67ca89SAndroid Build Coastguard WorkerL(ashr_6_exittail):
866*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
867*8d67ca89SAndroid Build Coastguard Worker	psrldq	$6, %xmm0
868*8d67ca89SAndroid Build Coastguard Worker	psrldq	$6, %xmm3
869*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
870*8d67ca89SAndroid Build Coastguard Worker
871*8d67ca89SAndroid Build Coastguard Worker/*
872*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_7
873*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset      corresponding case
874*8d67ca89SAndroid Build Coastguard Worker *        n(9~15)          n - 9      	        6(15 +(n - 9) - n)         ashr_7
875*8d67ca89SAndroid Build Coastguard Worker */
876*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
877*8d67ca89SAndroid Build Coastguard WorkerL(ashr_7):
878*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
879*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
880*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
881*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
882*8d67ca89SAndroid Build Coastguard Worker	pslldq	$9, %xmm2
883*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
884*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
885*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
886*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
887*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
888*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
889*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
890*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
891*8d67ca89SAndroid Build Coastguard Worker
892*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
893*8d67ca89SAndroid Build Coastguard Worker
894*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
895*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
896*8d67ca89SAndroid Build Coastguard Worker	mov	$7, %r9d	/* byte position left over from less32bytes case */
897*8d67ca89SAndroid Build Coastguard Worker	/*
898*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
899*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
900*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
901*8d67ca89SAndroid Build Coastguard Worker	 */
902*8d67ca89SAndroid Build Coastguard Worker	lea	7(%rdi), %r10
903*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
904*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
905*8d67ca89SAndroid Build Coastguard Worker
906*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
907*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_7):
908*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
909*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_7)
910*8d67ca89SAndroid Build Coastguard Worker
911*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_7):
912*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
913*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
914*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
915*8d67ca89SAndroid Build Coastguard Worker
916*8d67ca89SAndroid Build Coastguard Worker	palignr $7, %xmm3, %xmm2        /* merge into one 16byte value */
917*8d67ca89SAndroid Build Coastguard Worker
918*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
919*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
920*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
921*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
922*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
923*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
924*8d67ca89SAndroid Build Coastguard Worker
925*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
926*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
927*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
928*8d67ca89SAndroid Build Coastguard Worker#endif
929*8d67ca89SAndroid Build Coastguard Worker
930*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
931*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
932*8d67ca89SAndroid Build Coastguard Worker
933*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
934*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_7)	/* cross page boundary */
935*8d67ca89SAndroid Build Coastguard Worker
936*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
937*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
938*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
939*8d67ca89SAndroid Build Coastguard Worker
940*8d67ca89SAndroid Build Coastguard Worker	palignr $7, %xmm3, %xmm2        /* merge into one 16byte value */
941*8d67ca89SAndroid Build Coastguard Worker
942*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
943*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
944*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
945*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
946*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
947*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
948*8d67ca89SAndroid Build Coastguard Worker
949*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
950*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
951*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
952*8d67ca89SAndroid Build Coastguard Worker#endif
953*8d67ca89SAndroid Build Coastguard Worker
954*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
955*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
956*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_7)
957*8d67ca89SAndroid Build Coastguard Worker
958*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
959*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_7):
960*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
961*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
962*8d67ca89SAndroid Build Coastguard Worker	test	$0xff80, %edx
963*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_7_exittail)
964*8d67ca89SAndroid Build Coastguard Worker
965*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
966*8d67ca89SAndroid Build Coastguard Worker	cmp	$8, %r11
967*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_7_exittail)
968*8d67ca89SAndroid Build Coastguard Worker#endif
969*8d67ca89SAndroid Build Coastguard Worker
970*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
971*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
972*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_7)
973*8d67ca89SAndroid Build Coastguard Worker
974*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
975*8d67ca89SAndroid Build Coastguard WorkerL(ashr_7_exittail):
976*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
977*8d67ca89SAndroid Build Coastguard Worker	psrldq	$7, %xmm0
978*8d67ca89SAndroid Build Coastguard Worker	psrldq	$7, %xmm3
979*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
980*8d67ca89SAndroid Build Coastguard Worker
981*8d67ca89SAndroid Build Coastguard Worker/*
982*8d67ca89SAndroid Build Coastguard Worker *  The following cases will be handled by ashr_8
983*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
984*8d67ca89SAndroid Build Coastguard Worker *        n(8~15)          n - 8      	        7(15 +(n - 8) - n)         ashr_8
985*8d67ca89SAndroid Build Coastguard Worker */
986*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
987*8d67ca89SAndroid Build Coastguard WorkerL(ashr_8):
988*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
989*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
990*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
991*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
992*8d67ca89SAndroid Build Coastguard Worker	pslldq	$8, %xmm2
993*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
994*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
995*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
996*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
997*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
998*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
999*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
1000*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
1001*8d67ca89SAndroid Build Coastguard Worker
1002*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
1003*8d67ca89SAndroid Build Coastguard Worker
1004*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1005*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
1006*8d67ca89SAndroid Build Coastguard Worker	mov	$8, %r9d	/* byte position left over from less32bytes case */
1007*8d67ca89SAndroid Build Coastguard Worker	/*
1008*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
1009*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
1010*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
1011*8d67ca89SAndroid Build Coastguard Worker	 */
1012*8d67ca89SAndroid Build Coastguard Worker	lea	8(%rdi), %r10
1013*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
1014*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
1015*8d67ca89SAndroid Build Coastguard Worker
1016*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1017*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_8):
1018*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1019*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_8)
1020*8d67ca89SAndroid Build Coastguard Worker
1021*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_8):
1022*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1023*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1024*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1025*8d67ca89SAndroid Build Coastguard Worker
1026*8d67ca89SAndroid Build Coastguard Worker	palignr $8, %xmm3, %xmm2        /* merge into one 16byte value */
1027*8d67ca89SAndroid Build Coastguard Worker
1028*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1029*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1030*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1031*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1032*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1033*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1034*8d67ca89SAndroid Build Coastguard Worker
1035*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1036*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1037*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1038*8d67ca89SAndroid Build Coastguard Worker#endif
1039*8d67ca89SAndroid Build Coastguard Worker
1040*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1041*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1042*8d67ca89SAndroid Build Coastguard Worker
1043*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1044*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_8)	/* cross page boundary */
1045*8d67ca89SAndroid Build Coastguard Worker
1046*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1047*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1048*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1049*8d67ca89SAndroid Build Coastguard Worker
1050*8d67ca89SAndroid Build Coastguard Worker	palignr $8, %xmm3, %xmm2        /* merge into one 16byte value */
1051*8d67ca89SAndroid Build Coastguard Worker
1052*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1053*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1054*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1055*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1056*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1057*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1058*8d67ca89SAndroid Build Coastguard Worker
1059*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1060*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1061*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1062*8d67ca89SAndroid Build Coastguard Worker#endif
1063*8d67ca89SAndroid Build Coastguard Worker
1064*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1065*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1066*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_8)
1067*8d67ca89SAndroid Build Coastguard Worker
1068*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1069*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_8):
1070*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
1071*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
1072*8d67ca89SAndroid Build Coastguard Worker	test	$0xff00, %edx
1073*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_8_exittail)
1074*8d67ca89SAndroid Build Coastguard Worker
1075*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1076*8d67ca89SAndroid Build Coastguard Worker	cmp	$7, %r11
1077*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_8_exittail)
1078*8d67ca89SAndroid Build Coastguard Worker#endif
1079*8d67ca89SAndroid Build Coastguard Worker
1080*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1081*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
1082*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_8)
1083*8d67ca89SAndroid Build Coastguard Worker
1084*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1085*8d67ca89SAndroid Build Coastguard WorkerL(ashr_8_exittail):
1086*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1087*8d67ca89SAndroid Build Coastguard Worker	psrldq	$8, %xmm0
1088*8d67ca89SAndroid Build Coastguard Worker	psrldq	$8, %xmm3
1089*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
1090*8d67ca89SAndroid Build Coastguard Worker
1091*8d67ca89SAndroid Build Coastguard Worker/*
1092*8d67ca89SAndroid Build Coastguard Worker *  The following cases will be handled by ashr_9
1093*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
1094*8d67ca89SAndroid Build Coastguard Worker *        n(7~15)          n - 7      	        8(15 +(n - 7) - n)         ashr_9
1095*8d67ca89SAndroid Build Coastguard Worker */
1096*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1097*8d67ca89SAndroid Build Coastguard WorkerL(ashr_9):
1098*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1099*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
1100*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
1101*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1102*8d67ca89SAndroid Build Coastguard Worker	pslldq	$7, %xmm2
1103*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
1104*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
1105*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
1106*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
1107*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
1108*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
1109*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
1110*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
1111*8d67ca89SAndroid Build Coastguard Worker
1112*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
1113*8d67ca89SAndroid Build Coastguard Worker
1114*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1115*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
1116*8d67ca89SAndroid Build Coastguard Worker	mov	$9, %r9d	/* byte position left over from less32bytes case */
1117*8d67ca89SAndroid Build Coastguard Worker	/*
1118*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
1119*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
1120*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
1121*8d67ca89SAndroid Build Coastguard Worker	 */
1122*8d67ca89SAndroid Build Coastguard Worker	lea	9(%rdi), %r10
1123*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
1124*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
1125*8d67ca89SAndroid Build Coastguard Worker
1126*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1127*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_9):
1128*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1129*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_9)
1130*8d67ca89SAndroid Build Coastguard Worker
1131*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_9):
1132*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1133*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1134*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1135*8d67ca89SAndroid Build Coastguard Worker
1136*8d67ca89SAndroid Build Coastguard Worker	palignr $9, %xmm3, %xmm2        /* merge into one 16byte value */
1137*8d67ca89SAndroid Build Coastguard Worker
1138*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1139*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1140*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1141*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1142*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1143*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1144*8d67ca89SAndroid Build Coastguard Worker
1145*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1146*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1147*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1148*8d67ca89SAndroid Build Coastguard Worker#endif
1149*8d67ca89SAndroid Build Coastguard Worker
1150*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1151*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1152*8d67ca89SAndroid Build Coastguard Worker
1153*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1154*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_9)	/* cross page boundary */
1155*8d67ca89SAndroid Build Coastguard Worker
1156*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1157*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1158*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1159*8d67ca89SAndroid Build Coastguard Worker
1160*8d67ca89SAndroid Build Coastguard Worker	palignr $9, %xmm3, %xmm2        /* merge into one 16byte value */
1161*8d67ca89SAndroid Build Coastguard Worker
1162*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1163*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1164*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1165*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1166*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1167*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1168*8d67ca89SAndroid Build Coastguard Worker
1169*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1170*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1171*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1172*8d67ca89SAndroid Build Coastguard Worker#endif
1173*8d67ca89SAndroid Build Coastguard Worker
1174*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1175*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3		/* store for next cycle */
1176*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_9)
1177*8d67ca89SAndroid Build Coastguard Worker
1178*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1179*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_9):
1180*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
1181*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
1182*8d67ca89SAndroid Build Coastguard Worker	test	$0xfe00, %edx
1183*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_9_exittail)
1184*8d67ca89SAndroid Build Coastguard Worker
1185*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1186*8d67ca89SAndroid Build Coastguard Worker	cmp	$6, %r11
1187*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_9_exittail)
1188*8d67ca89SAndroid Build Coastguard Worker#endif
1189*8d67ca89SAndroid Build Coastguard Worker
1190*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1191*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
1192*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_9)
1193*8d67ca89SAndroid Build Coastguard Worker
1194*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1195*8d67ca89SAndroid Build Coastguard WorkerL(ashr_9_exittail):
1196*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1197*8d67ca89SAndroid Build Coastguard Worker	psrldq	$9, %xmm0
1198*8d67ca89SAndroid Build Coastguard Worker	psrldq	$9, %xmm3
1199*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
1200*8d67ca89SAndroid Build Coastguard Worker
1201*8d67ca89SAndroid Build Coastguard Worker/*
1202*8d67ca89SAndroid Build Coastguard Worker *  The following cases will be handled by ashr_10
1203*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
1204*8d67ca89SAndroid Build Coastguard Worker *        n(6~15)          n - 6      	        9(15 +(n - 6) - n)         ashr_10
1205*8d67ca89SAndroid Build Coastguard Worker */
1206*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1207*8d67ca89SAndroid Build Coastguard WorkerL(ashr_10):
1208*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1209*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
1210*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
1211*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1212*8d67ca89SAndroid Build Coastguard Worker	pslldq	$6, %xmm2
1213*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
1214*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
1215*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
1216*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
1217*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
1218*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
1219*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
1220*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
1221*8d67ca89SAndroid Build Coastguard Worker
1222*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
1223*8d67ca89SAndroid Build Coastguard Worker
1224*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1225*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
1226*8d67ca89SAndroid Build Coastguard Worker	mov	$10, %r9d	/* byte position left over from less32bytes case */
1227*8d67ca89SAndroid Build Coastguard Worker	/*
1228*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
1229*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
1230*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
1231*8d67ca89SAndroid Build Coastguard Worker	 */
1232*8d67ca89SAndroid Build Coastguard Worker	lea	10(%rdi), %r10
1233*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
1234*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
1235*8d67ca89SAndroid Build Coastguard Worker
1236*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1237*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_10):
1238*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1239*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_10)
1240*8d67ca89SAndroid Build Coastguard Worker
1241*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_10):
1242*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1243*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1244*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1245*8d67ca89SAndroid Build Coastguard Worker
1246*8d67ca89SAndroid Build Coastguard Worker	palignr $10, %xmm3, %xmm2        /* merge into one 16byte value */
1247*8d67ca89SAndroid Build Coastguard Worker
1248*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1249*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1250*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1251*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1252*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1253*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1254*8d67ca89SAndroid Build Coastguard Worker
1255*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1256*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1257*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1258*8d67ca89SAndroid Build Coastguard Worker#endif
1259*8d67ca89SAndroid Build Coastguard Worker
1260*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1261*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1262*8d67ca89SAndroid Build Coastguard Worker
1263*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1264*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_10)	/* cross page boundary */
1265*8d67ca89SAndroid Build Coastguard Worker
1266*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1267*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1268*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1269*8d67ca89SAndroid Build Coastguard Worker
1270*8d67ca89SAndroid Build Coastguard Worker	palignr $10, %xmm3, %xmm2        /* merge into one 16byte value */
1271*8d67ca89SAndroid Build Coastguard Worker
1272*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1273*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1274*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1275*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1276*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1277*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1278*8d67ca89SAndroid Build Coastguard Worker
1279*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1280*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1281*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1282*8d67ca89SAndroid Build Coastguard Worker#endif
1283*8d67ca89SAndroid Build Coastguard Worker
1284*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1285*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1286*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_10)
1287*8d67ca89SAndroid Build Coastguard Worker
1288*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1289*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_10):
1290*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
1291*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
1292*8d67ca89SAndroid Build Coastguard Worker	test	$0xfc00, %edx
1293*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_10_exittail)
1294*8d67ca89SAndroid Build Coastguard Worker
1295*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1296*8d67ca89SAndroid Build Coastguard Worker	cmp	$5, %r11
1297*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_10_exittail)
1298*8d67ca89SAndroid Build Coastguard Worker#endif
1299*8d67ca89SAndroid Build Coastguard Worker
1300*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1301*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
1302*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_10)
1303*8d67ca89SAndroid Build Coastguard Worker
1304*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1305*8d67ca89SAndroid Build Coastguard WorkerL(ashr_10_exittail):
1306*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1307*8d67ca89SAndroid Build Coastguard Worker	psrldq	$10, %xmm0
1308*8d67ca89SAndroid Build Coastguard Worker	psrldq	$10, %xmm3
1309*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
1310*8d67ca89SAndroid Build Coastguard Worker
1311*8d67ca89SAndroid Build Coastguard Worker/*
1312*8d67ca89SAndroid Build Coastguard Worker *  The following cases will be handled by ashr_11
1313*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
1314*8d67ca89SAndroid Build Coastguard Worker *        n(5~15)          n - 5      	        10(15 +(n - 5) - n)         ashr_11
1315*8d67ca89SAndroid Build Coastguard Worker */
1316*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1317*8d67ca89SAndroid Build Coastguard WorkerL(ashr_11):
1318*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1319*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
1320*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
1321*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1322*8d67ca89SAndroid Build Coastguard Worker	pslldq	$5, %xmm2
1323*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
1324*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
1325*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
1326*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
1327*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
1328*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
1329*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
1330*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
1331*8d67ca89SAndroid Build Coastguard Worker
1332*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
1333*8d67ca89SAndroid Build Coastguard Worker
1334*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1335*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
1336*8d67ca89SAndroid Build Coastguard Worker	mov	$11, %r9d	/* byte position left over from less32bytes case */
1337*8d67ca89SAndroid Build Coastguard Worker	/*
1338*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
1339*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
1340*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
1341*8d67ca89SAndroid Build Coastguard Worker	 */
1342*8d67ca89SAndroid Build Coastguard Worker	lea	11(%rdi), %r10
1343*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
1344*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
1345*8d67ca89SAndroid Build Coastguard Worker
1346*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1347*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_11):
1348*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1349*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_11)
1350*8d67ca89SAndroid Build Coastguard Worker
1351*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_11):
1352*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1353*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1354*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1355*8d67ca89SAndroid Build Coastguard Worker
1356*8d67ca89SAndroid Build Coastguard Worker	palignr $11, %xmm3, %xmm2        /* merge into one 16byte value */
1357*8d67ca89SAndroid Build Coastguard Worker
1358*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1359*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1360*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1361*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1362*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1363*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1364*8d67ca89SAndroid Build Coastguard Worker
1365*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1366*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1367*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1368*8d67ca89SAndroid Build Coastguard Worker#endif
1369*8d67ca89SAndroid Build Coastguard Worker
1370*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1371*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1372*8d67ca89SAndroid Build Coastguard Worker
1373*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1374*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_11)	/* cross page boundary */
1375*8d67ca89SAndroid Build Coastguard Worker
1376*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1377*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1378*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1379*8d67ca89SAndroid Build Coastguard Worker
1380*8d67ca89SAndroid Build Coastguard Worker	palignr $11, %xmm3, %xmm2        /* merge into one 16byte value */
1381*8d67ca89SAndroid Build Coastguard Worker
1382*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1383*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1384*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1385*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1386*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1387*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1388*8d67ca89SAndroid Build Coastguard Worker
1389*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1390*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1391*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1392*8d67ca89SAndroid Build Coastguard Worker#endif
1393*8d67ca89SAndroid Build Coastguard Worker
1394*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1395*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1396*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_11)
1397*8d67ca89SAndroid Build Coastguard Worker
1398*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1399*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_11):
1400*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
1401*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
1402*8d67ca89SAndroid Build Coastguard Worker	test	$0xf800, %edx
1403*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_11_exittail)
1404*8d67ca89SAndroid Build Coastguard Worker
1405*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1406*8d67ca89SAndroid Build Coastguard Worker	cmp	$4, %r11
1407*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_11_exittail)
1408*8d67ca89SAndroid Build Coastguard Worker#endif
1409*8d67ca89SAndroid Build Coastguard Worker
1410*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1411*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
1412*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_11)
1413*8d67ca89SAndroid Build Coastguard Worker
1414*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1415*8d67ca89SAndroid Build Coastguard WorkerL(ashr_11_exittail):
1416*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1417*8d67ca89SAndroid Build Coastguard Worker	psrldq	$11, %xmm0
1418*8d67ca89SAndroid Build Coastguard Worker	psrldq	$11, %xmm3
1419*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
1420*8d67ca89SAndroid Build Coastguard Worker
1421*8d67ca89SAndroid Build Coastguard Worker/*
1422*8d67ca89SAndroid Build Coastguard Worker *  The following cases will be handled by ashr_12
1423*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
1424*8d67ca89SAndroid Build Coastguard Worker *        n(4~15)          n - 4      	        11(15 +(n - 4) - n)         ashr_12
1425*8d67ca89SAndroid Build Coastguard Worker */
1426*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1427*8d67ca89SAndroid Build Coastguard WorkerL(ashr_12):
1428*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1429*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
1430*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
1431*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1432*8d67ca89SAndroid Build Coastguard Worker	pslldq	$4, %xmm2
1433*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
1434*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
1435*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
1436*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
1437*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
1438*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
1439*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
1440*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
1441*8d67ca89SAndroid Build Coastguard Worker
1442*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
1443*8d67ca89SAndroid Build Coastguard Worker
1444*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1445*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
1446*8d67ca89SAndroid Build Coastguard Worker	mov	$12, %r9d	/* byte position left over from less32bytes case */
1447*8d67ca89SAndroid Build Coastguard Worker	/*
1448*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
1449*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
1450*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
1451*8d67ca89SAndroid Build Coastguard Worker	 */
1452*8d67ca89SAndroid Build Coastguard Worker	lea	12(%rdi), %r10
1453*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
1454*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
1455*8d67ca89SAndroid Build Coastguard Worker
1456*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1457*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_12):
1458*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1459*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_12)
1460*8d67ca89SAndroid Build Coastguard Worker
1461*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_12):
1462*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1463*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1464*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1465*8d67ca89SAndroid Build Coastguard Worker
1466*8d67ca89SAndroid Build Coastguard Worker	palignr $12, %xmm3, %xmm2        /* merge into one 16byte value */
1467*8d67ca89SAndroid Build Coastguard Worker
1468*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1469*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1470*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1471*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1472*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1473*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1474*8d67ca89SAndroid Build Coastguard Worker
1475*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1476*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1477*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1478*8d67ca89SAndroid Build Coastguard Worker#endif
1479*8d67ca89SAndroid Build Coastguard Worker
1480*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1481*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1482*8d67ca89SAndroid Build Coastguard Worker
1483*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1484*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_12)	/* cross page boundary */
1485*8d67ca89SAndroid Build Coastguard Worker
1486*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1487*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1488*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1489*8d67ca89SAndroid Build Coastguard Worker
1490*8d67ca89SAndroid Build Coastguard Worker	palignr $12, %xmm3, %xmm2        /* merge into one 16byte value */
1491*8d67ca89SAndroid Build Coastguard Worker
1492*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1493*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1494*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1495*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1496*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1497*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1498*8d67ca89SAndroid Build Coastguard Worker
1499*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1500*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1501*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1502*8d67ca89SAndroid Build Coastguard Worker#endif
1503*8d67ca89SAndroid Build Coastguard Worker
1504*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1505*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1506*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_12)
1507*8d67ca89SAndroid Build Coastguard Worker
1508*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1509*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_12):
1510*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
1511*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
1512*8d67ca89SAndroid Build Coastguard Worker	test	$0xf000, %edx
1513*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_12_exittail)
1514*8d67ca89SAndroid Build Coastguard Worker
1515*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1516*8d67ca89SAndroid Build Coastguard Worker	cmp	$3, %r11
1517*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_12_exittail)
1518*8d67ca89SAndroid Build Coastguard Worker#endif
1519*8d67ca89SAndroid Build Coastguard Worker
1520*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1521*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
1522*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_12)
1523*8d67ca89SAndroid Build Coastguard Worker
1524*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1525*8d67ca89SAndroid Build Coastguard WorkerL(ashr_12_exittail):
1526*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1527*8d67ca89SAndroid Build Coastguard Worker	psrldq	$12, %xmm0
1528*8d67ca89SAndroid Build Coastguard Worker	psrldq	$12, %xmm3
1529*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
1530*8d67ca89SAndroid Build Coastguard Worker
1531*8d67ca89SAndroid Build Coastguard Worker/*
1532*8d67ca89SAndroid Build Coastguard Worker *  The following cases will be handled by ashr_13
1533*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
1534*8d67ca89SAndroid Build Coastguard Worker *        n(3~15)          n - 3      	        12(15 +(n - 3) - n)         ashr_13
1535*8d67ca89SAndroid Build Coastguard Worker */
1536*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1537*8d67ca89SAndroid Build Coastguard WorkerL(ashr_13):
1538*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1539*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
1540*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
1541*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1542*8d67ca89SAndroid Build Coastguard Worker	pslldq	$3, %xmm2
1543*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
1544*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
1545*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
1546*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
1547*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
1548*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
1549*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
1550*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
1551*8d67ca89SAndroid Build Coastguard Worker
1552*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
1553*8d67ca89SAndroid Build Coastguard Worker
1554*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1555*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
1556*8d67ca89SAndroid Build Coastguard Worker	mov	$13, %r9d	/* byte position left over from less32bytes case */
1557*8d67ca89SAndroid Build Coastguard Worker	/*
1558*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
1559*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
1560*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
1561*8d67ca89SAndroid Build Coastguard Worker	 */
1562*8d67ca89SAndroid Build Coastguard Worker	lea	13(%rdi), %r10
1563*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
1564*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
1565*8d67ca89SAndroid Build Coastguard Worker
1566*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1567*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_13):
1568*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1569*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_13)
1570*8d67ca89SAndroid Build Coastguard Worker
1571*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_13):
1572*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1573*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1574*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1575*8d67ca89SAndroid Build Coastguard Worker
1576*8d67ca89SAndroid Build Coastguard Worker	palignr $13, %xmm3, %xmm2        /* merge into one 16byte value */
1577*8d67ca89SAndroid Build Coastguard Worker
1578*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1579*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1580*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1581*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1582*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1583*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1584*8d67ca89SAndroid Build Coastguard Worker
1585*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1586*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1587*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1588*8d67ca89SAndroid Build Coastguard Worker#endif
1589*8d67ca89SAndroid Build Coastguard Worker
1590*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1591*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1592*8d67ca89SAndroid Build Coastguard Worker
1593*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1594*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_13)	/* cross page boundary */
1595*8d67ca89SAndroid Build Coastguard Worker
1596*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1597*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1598*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1599*8d67ca89SAndroid Build Coastguard Worker
1600*8d67ca89SAndroid Build Coastguard Worker	palignr $13, %xmm3, %xmm2        /* merge into one 16byte value */
1601*8d67ca89SAndroid Build Coastguard Worker
1602*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1603*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1604*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1605*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1606*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1607*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1608*8d67ca89SAndroid Build Coastguard Worker
1609*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1610*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1611*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1612*8d67ca89SAndroid Build Coastguard Worker#endif
1613*8d67ca89SAndroid Build Coastguard Worker
1614*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1615*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1616*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_13)
1617*8d67ca89SAndroid Build Coastguard Worker
1618*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1619*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_13):
1620*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
1621*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
1622*8d67ca89SAndroid Build Coastguard Worker	test	$0xe000, %edx
1623*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_13_exittail)
1624*8d67ca89SAndroid Build Coastguard Worker
1625*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1626*8d67ca89SAndroid Build Coastguard Worker	cmp	$2, %r11
1627*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_13_exittail)
1628*8d67ca89SAndroid Build Coastguard Worker#endif
1629*8d67ca89SAndroid Build Coastguard Worker
1630*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1631*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
1632*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_13)
1633*8d67ca89SAndroid Build Coastguard Worker
1634*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1635*8d67ca89SAndroid Build Coastguard WorkerL(ashr_13_exittail):
1636*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1637*8d67ca89SAndroid Build Coastguard Worker	psrldq  $13, %xmm0
1638*8d67ca89SAndroid Build Coastguard Worker	psrldq  $13, %xmm3
1639*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
1640*8d67ca89SAndroid Build Coastguard Worker
1641*8d67ca89SAndroid Build Coastguard Worker/*
1642*8d67ca89SAndroid Build Coastguard Worker *  The following cases will be handled by ashr_14
1643*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
1644*8d67ca89SAndroid Build Coastguard Worker *        n(2~15)          n - 2      	        13(15 +(n - 2) - n)         ashr_14
1645*8d67ca89SAndroid Build Coastguard Worker */
1646*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1647*8d67ca89SAndroid Build Coastguard WorkerL(ashr_14):
1648*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1649*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
1650*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
1651*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1652*8d67ca89SAndroid Build Coastguard Worker	pslldq  $2, %xmm2
1653*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
1654*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
1655*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
1656*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
1657*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
1658*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
1659*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
1660*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
1661*8d67ca89SAndroid Build Coastguard Worker
1662*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
1663*8d67ca89SAndroid Build Coastguard Worker
1664*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1665*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
1666*8d67ca89SAndroid Build Coastguard Worker	mov	$14, %r9d	/* byte position left over from less32bytes case */
1667*8d67ca89SAndroid Build Coastguard Worker	/*
1668*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
1669*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
1670*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
1671*8d67ca89SAndroid Build Coastguard Worker	 */
1672*8d67ca89SAndroid Build Coastguard Worker	lea	14(%rdi), %r10
1673*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
1674*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
1675*8d67ca89SAndroid Build Coastguard Worker
1676*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1677*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_14):
1678*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1679*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_14)
1680*8d67ca89SAndroid Build Coastguard Worker
1681*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_14):
1682*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1683*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1684*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1685*8d67ca89SAndroid Build Coastguard Worker
1686*8d67ca89SAndroid Build Coastguard Worker	palignr $14, %xmm3, %xmm2        /* merge into one 16byte value */
1687*8d67ca89SAndroid Build Coastguard Worker
1688*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1689*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1690*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1691*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1692*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1693*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1694*8d67ca89SAndroid Build Coastguard Worker
1695*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1696*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1697*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1698*8d67ca89SAndroid Build Coastguard Worker#endif
1699*8d67ca89SAndroid Build Coastguard Worker
1700*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1701*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1702*8d67ca89SAndroid Build Coastguard Worker
1703*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1704*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_14)	/* cross page boundary */
1705*8d67ca89SAndroid Build Coastguard Worker
1706*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1707*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1708*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1709*8d67ca89SAndroid Build Coastguard Worker
1710*8d67ca89SAndroid Build Coastguard Worker	palignr $14, %xmm3, %xmm2        /* merge into one 16byte value */
1711*8d67ca89SAndroid Build Coastguard Worker
1712*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1713*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1714*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1715*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1716*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1717*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1718*8d67ca89SAndroid Build Coastguard Worker
1719*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1720*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1721*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1722*8d67ca89SAndroid Build Coastguard Worker#endif
1723*8d67ca89SAndroid Build Coastguard Worker
1724*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1725*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1726*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_14)
1727*8d67ca89SAndroid Build Coastguard Worker
1728*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1729*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_14):
1730*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
1731*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
1732*8d67ca89SAndroid Build Coastguard Worker	test	$0xc000, %edx
1733*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_14_exittail)
1734*8d67ca89SAndroid Build Coastguard Worker
1735*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1736*8d67ca89SAndroid Build Coastguard Worker	cmp	$1, %r11
1737*8d67ca89SAndroid Build Coastguard Worker	jbe	L(ashr_14_exittail)
1738*8d67ca89SAndroid Build Coastguard Worker#endif
1739*8d67ca89SAndroid Build Coastguard Worker
1740*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1741*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
1742*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_14)
1743*8d67ca89SAndroid Build Coastguard Worker
1744*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1745*8d67ca89SAndroid Build Coastguard WorkerL(ashr_14_exittail):
1746*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1747*8d67ca89SAndroid Build Coastguard Worker	psrldq	$14, %xmm0
1748*8d67ca89SAndroid Build Coastguard Worker	psrldq	$14, %xmm3
1749*8d67ca89SAndroid Build Coastguard Worker	jmp	L(aftertail)
1750*8d67ca89SAndroid Build Coastguard Worker
1751*8d67ca89SAndroid Build Coastguard Worker/*
1752*8d67ca89SAndroid Build Coastguard Worker *  The following cases will be handled by ashr_15
1753*8d67ca89SAndroid Build Coastguard Worker *  rcx(offset of rsi)  rax(offset of rdi)        relative offset	 corresponding case
1754*8d67ca89SAndroid Build Coastguard Worker *        n(1~15)          n - 1      	        14(15 +(n - 1) - n)         ashr_15
1755*8d67ca89SAndroid Build Coastguard Worker */
1756*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1757*8d67ca89SAndroid Build Coastguard WorkerL(ashr_15):
1758*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1759*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm2
1760*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi), %xmm1
1761*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1762*8d67ca89SAndroid Build Coastguard Worker	pslldq	$1, %xmm2
1763*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm2
1764*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm2
1765*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm2, %r9d
1766*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %edx
1767*8d67ca89SAndroid Build Coastguard Worker	shr	%cl, %r9d
1768*8d67ca89SAndroid Build Coastguard Worker	sub	%r9d, %edx
1769*8d67ca89SAndroid Build Coastguard Worker	jnz	L(less32bytes)
1770*8d67ca89SAndroid Build Coastguard Worker
1771*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi), %xmm3
1772*8d67ca89SAndroid Build Coastguard Worker
1773*8d67ca89SAndroid Build Coastguard Worker	UPDATE_STRNCMP_COUNTER
1774*8d67ca89SAndroid Build Coastguard Worker
1775*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1776*8d67ca89SAndroid Build Coastguard Worker	mov	$16, %rcx	/* index for loads */
1777*8d67ca89SAndroid Build Coastguard Worker	mov	$15, %r9d	/* byte position left over from less32bytes case */
1778*8d67ca89SAndroid Build Coastguard Worker	/*
1779*8d67ca89SAndroid Build Coastguard Worker	 * Setup %r10 value allows us to detect crossing a page boundary.
1780*8d67ca89SAndroid Build Coastguard Worker	 * When %r10 goes positive we have crossed a page boundary and
1781*8d67ca89SAndroid Build Coastguard Worker	 * need to do a nibble.
1782*8d67ca89SAndroid Build Coastguard Worker	 */
1783*8d67ca89SAndroid Build Coastguard Worker	lea	15(%rdi), %r10
1784*8d67ca89SAndroid Build Coastguard Worker	and	$0xfff, %r10	/* offset into 4K page */
1785*8d67ca89SAndroid Build Coastguard Worker
1786*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10	/* subtract 4K pagesize */
1787*8d67ca89SAndroid Build Coastguard Worker
1788*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1789*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_15):
1790*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1791*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_15)
1792*8d67ca89SAndroid Build Coastguard Worker
1793*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_15):
1794*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1795*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1796*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1797*8d67ca89SAndroid Build Coastguard Worker
1798*8d67ca89SAndroid Build Coastguard Worker	palignr $15, %xmm3, %xmm2        /* merge into one 16byte value */
1799*8d67ca89SAndroid Build Coastguard Worker
1800*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1801*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1802*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1803*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1804*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1805*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1806*8d67ca89SAndroid Build Coastguard Worker
1807*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1808*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1809*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1810*8d67ca89SAndroid Build Coastguard Worker#endif
1811*8d67ca89SAndroid Build Coastguard Worker
1812*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1813*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1814*8d67ca89SAndroid Build Coastguard Worker
1815*8d67ca89SAndroid Build Coastguard Worker	add	$16, %r10
1816*8d67ca89SAndroid Build Coastguard Worker	jg	L(nibble_ashr_15)	/* cross page boundary */
1817*8d67ca89SAndroid Build Coastguard Worker
1818*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1819*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rdi, %rcx), %xmm2
1820*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, %xmm4
1821*8d67ca89SAndroid Build Coastguard Worker
1822*8d67ca89SAndroid Build Coastguard Worker	palignr $15, %xmm3, %xmm2        /* merge into one 16byte value */
1823*8d67ca89SAndroid Build Coastguard Worker
1824*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm1, %xmm0
1825*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm2, %xmm1
1826*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1827*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1828*8d67ca89SAndroid Build Coastguard Worker	sub	$0xffff, %edx
1829*8d67ca89SAndroid Build Coastguard Worker	jnz	L(exit)
1830*8d67ca89SAndroid Build Coastguard Worker
1831*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1832*8d67ca89SAndroid Build Coastguard Worker	sub	$16, %r11
1833*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1834*8d67ca89SAndroid Build Coastguard Worker#endif
1835*8d67ca89SAndroid Build Coastguard Worker
1836*8d67ca89SAndroid Build Coastguard Worker	add	$16, %rcx
1837*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, %xmm3
1838*8d67ca89SAndroid Build Coastguard Worker	jmp	L(loop_ashr_15)
1839*8d67ca89SAndroid Build Coastguard Worker
1840*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1841*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_15):
1842*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm0		/* check nibble for null char */
1843*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm0, %edx
1844*8d67ca89SAndroid Build Coastguard Worker	test	$0x8000, %edx
1845*8d67ca89SAndroid Build Coastguard Worker	jnz	L(ashr_15_exittail)
1846*8d67ca89SAndroid Build Coastguard Worker
1847*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1848*8d67ca89SAndroid Build Coastguard Worker	test	%r11, %r11
1849*8d67ca89SAndroid Build Coastguard Worker	je	L(ashr_15_exittail)
1850*8d67ca89SAndroid Build Coastguard Worker#endif
1851*8d67ca89SAndroid Build Coastguard Worker
1852*8d67ca89SAndroid Build Coastguard Worker	pxor	%xmm0, %xmm0
1853*8d67ca89SAndroid Build Coastguard Worker	sub	$0x1000, %r10
1854*8d67ca89SAndroid Build Coastguard Worker	jmp	L(gobble_ashr_15)
1855*8d67ca89SAndroid Build Coastguard Worker
1856*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1857*8d67ca89SAndroid Build Coastguard WorkerL(ashr_15_exittail):
1858*8d67ca89SAndroid Build Coastguard Worker	movdqa	(%rsi, %rcx), %xmm1
1859*8d67ca89SAndroid Build Coastguard Worker	psrldq	$15, %xmm3
1860*8d67ca89SAndroid Build Coastguard Worker	psrldq	$15, %xmm0
1861*8d67ca89SAndroid Build Coastguard Worker
1862*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1863*8d67ca89SAndroid Build Coastguard WorkerL(aftertail):
1864*8d67ca89SAndroid Build Coastguard Worker	pcmpeqb	%xmm3, %xmm1
1865*8d67ca89SAndroid Build Coastguard Worker	psubb	%xmm0, %xmm1
1866*8d67ca89SAndroid Build Coastguard Worker	pmovmskb %xmm1, %edx
1867*8d67ca89SAndroid Build Coastguard Worker	not	%edx
1868*8d67ca89SAndroid Build Coastguard Worker
1869*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1870*8d67ca89SAndroid Build Coastguard WorkerL(exit):
1871*8d67ca89SAndroid Build Coastguard Worker	lea	-16(%r9, %rcx), %rax	/* locate the exact offset for rdi */
1872*8d67ca89SAndroid Build Coastguard WorkerL(less32bytes):
1873*8d67ca89SAndroid Build Coastguard Worker	lea	(%rdi, %rax), %rdi	/* locate the exact address for first operand(rdi) */
1874*8d67ca89SAndroid Build Coastguard Worker	lea	(%rsi, %rcx), %rsi	/* locate the exact address for second operand(rsi) */
1875*8d67ca89SAndroid Build Coastguard Worker	test	%r8d, %r8d
1876*8d67ca89SAndroid Build Coastguard Worker	jz	L(ret)
1877*8d67ca89SAndroid Build Coastguard Worker	xchg	%rsi, %rdi		/* recover original order according to flag(%r8d) */
1878*8d67ca89SAndroid Build Coastguard Worker
1879*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1880*8d67ca89SAndroid Build Coastguard WorkerL(ret):
1881*8d67ca89SAndroid Build Coastguard WorkerL(less16bytes):
1882*8d67ca89SAndroid Build Coastguard Worker	bsf	%rdx, %rdx		/* find and store bit index in %rdx */
1883*8d67ca89SAndroid Build Coastguard Worker
1884*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP
1885*8d67ca89SAndroid Build Coastguard Worker	sub	%rdx, %r11
1886*8d67ca89SAndroid Build Coastguard Worker	jbe	L(strcmp_exitz)
1887*8d67ca89SAndroid Build Coastguard Worker#endif
1888*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%rsi, %rdx), %ecx
1889*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%rdi, %rdx), %eax
1890*8d67ca89SAndroid Build Coastguard Worker
1891*8d67ca89SAndroid Build Coastguard Worker	sub	%ecx, %eax
1892*8d67ca89SAndroid Build Coastguard Worker	ret
1893*8d67ca89SAndroid Build Coastguard Worker
1894*8d67ca89SAndroid Build Coastguard WorkerL(strcmp_exitz):
1895*8d67ca89SAndroid Build Coastguard Worker	xor	%eax, %eax
1896*8d67ca89SAndroid Build Coastguard Worker	ret
1897*8d67ca89SAndroid Build Coastguard Worker
1898*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
1899*8d67ca89SAndroid Build Coastguard WorkerL(Byte0):
1900*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%rsi), %ecx
1901*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%rdi), %eax
1902*8d67ca89SAndroid Build Coastguard Worker
1903*8d67ca89SAndroid Build Coastguard Worker	sub	%ecx, %eax
1904*8d67ca89SAndroid Build Coastguard Worker	ret
1905*8d67ca89SAndroid Build Coastguard WorkerEND (STRCMP)
1906*8d67ca89SAndroid Build Coastguard Worker
1907*8d67ca89SAndroid Build Coastguard Worker	.section .rodata,"a",@progbits
1908*8d67ca89SAndroid Build Coastguard Worker	.p2align 3
1909*8d67ca89SAndroid Build Coastguard WorkerL(unaligned_table):
1910*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_1) - L(unaligned_table)
1911*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_2) - L(unaligned_table)
1912*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_3) - L(unaligned_table)
1913*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_4) - L(unaligned_table)
1914*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_5) - L(unaligned_table)
1915*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_6) - L(unaligned_table)
1916*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_7) - L(unaligned_table)
1917*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_8) - L(unaligned_table)
1918*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_9) - L(unaligned_table)
1919*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_10) - L(unaligned_table)
1920*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_11) - L(unaligned_table)
1921*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_12) - L(unaligned_table)
1922*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_13) - L(unaligned_table)
1923*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_14) - L(unaligned_table)
1924*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_15) - L(unaligned_table)
1925*8d67ca89SAndroid Build Coastguard Worker	.int	L(ashr_0) - L(unaligned_table)
1926