1*8d67ca89SAndroid Build Coastguard Worker/* 2*8d67ca89SAndroid Build Coastguard WorkerCopyright (c) 2014, Intel Corporation 3*8d67ca89SAndroid Build Coastguard WorkerAll rights reserved. 4*8d67ca89SAndroid Build Coastguard Worker 5*8d67ca89SAndroid Build Coastguard WorkerRedistribution and use in source and binary forms, with or without 6*8d67ca89SAndroid Build Coastguard Workermodification, are permitted provided that the following conditions are met: 7*8d67ca89SAndroid Build Coastguard Worker 8*8d67ca89SAndroid Build Coastguard Worker * Redistributions of source code must retain the above copyright notice, 9*8d67ca89SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer. 10*8d67ca89SAndroid Build Coastguard Worker 11*8d67ca89SAndroid Build Coastguard Worker * Redistributions in binary form must reproduce the above copyright notice, 12*8d67ca89SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 13*8d67ca89SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 14*8d67ca89SAndroid Build Coastguard Worker 15*8d67ca89SAndroid Build Coastguard Worker * Neither the name of Intel Corporation nor the names of its contributors 16*8d67ca89SAndroid Build Coastguard Worker * may be used to endorse or promote products derived from this software 17*8d67ca89SAndroid Build Coastguard Worker * without specific prior written permission. 18*8d67ca89SAndroid Build Coastguard Worker 19*8d67ca89SAndroid Build Coastguard WorkerTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20*8d67ca89SAndroid Build Coastguard WorkerANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21*8d67ca89SAndroid Build Coastguard WorkerWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22*8d67ca89SAndroid Build Coastguard WorkerDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23*8d67ca89SAndroid Build Coastguard WorkerANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24*8d67ca89SAndroid Build Coastguard Worker(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25*8d67ca89SAndroid Build Coastguard WorkerLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26*8d67ca89SAndroid Build Coastguard WorkerANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27*8d67ca89SAndroid Build Coastguard Worker(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28*8d67ca89SAndroid Build Coastguard WorkerSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*8d67ca89SAndroid Build Coastguard Worker*/ 30*8d67ca89SAndroid Build Coastguard Worker 31*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 32*8d67ca89SAndroid Build Coastguard Worker/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz 33*8d67ca89SAndroid Build Coastguard Worker if the new counter > the old one or is 0. */ 34*8d67ca89SAndroid Build Coastguard Worker#define UPDATE_STRNCMP_COUNTER \ 35*8d67ca89SAndroid Build Coastguard Worker /* calculate left number to compare */ \ 36*8d67ca89SAndroid Build Coastguard Worker lea -16(%rcx, %r11), %r9; \ 37*8d67ca89SAndroid Build Coastguard Worker cmp %r9, %r11; \ 38*8d67ca89SAndroid Build Coastguard Worker jb L(strcmp_exitz); \ 39*8d67ca89SAndroid Build Coastguard Worker test %r9, %r9; \ 40*8d67ca89SAndroid Build Coastguard Worker je L(strcmp_exitz); \ 41*8d67ca89SAndroid Build Coastguard Worker mov %r9, %r11 42*8d67ca89SAndroid Build Coastguard Worker 43*8d67ca89SAndroid Build Coastguard Worker#else 44*8d67ca89SAndroid Build Coastguard Worker#define UPDATE_STRNCMP_COUNTER 45*8d67ca89SAndroid Build Coastguard Worker#ifndef STRCMP 46*8d67ca89SAndroid Build Coastguard Worker#define STRCMP strcmp 47*8d67ca89SAndroid Build Coastguard Worker#endif 48*8d67ca89SAndroid Build Coastguard Worker#endif 49*8d67ca89SAndroid Build Coastguard Worker 50*8d67ca89SAndroid Build Coastguard Worker#ifndef L 51*8d67ca89SAndroid Build Coastguard Worker# define L(label) .L##label 52*8d67ca89SAndroid Build Coastguard Worker#endif 53*8d67ca89SAndroid Build Coastguard Worker 54*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_startproc 55*8d67ca89SAndroid Build Coastguard Worker# define cfi_startproc .cfi_startproc 56*8d67ca89SAndroid Build Coastguard Worker#endif 57*8d67ca89SAndroid Build Coastguard Worker 58*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_endproc 59*8d67ca89SAndroid Build Coastguard Worker# define cfi_endproc .cfi_endproc 60*8d67ca89SAndroid Build Coastguard Worker#endif 61*8d67ca89SAndroid Build Coastguard Worker 62*8d67ca89SAndroid Build Coastguard Worker#ifndef ENTRY 63*8d67ca89SAndroid Build Coastguard Worker# define ENTRY(name) \ 64*8d67ca89SAndroid Build Coastguard Worker .type name, @function; \ 65*8d67ca89SAndroid Build Coastguard Worker .globl name; \ 66*8d67ca89SAndroid Build Coastguard Worker .p2align 4; \ 67*8d67ca89SAndroid Build Coastguard Workername: \ 68*8d67ca89SAndroid Build Coastguard Worker cfi_startproc 69*8d67ca89SAndroid Build Coastguard Worker#endif 70*8d67ca89SAndroid Build Coastguard Worker 71*8d67ca89SAndroid Build Coastguard Worker#ifndef END 72*8d67ca89SAndroid Build Coastguard Worker# define END(name) \ 73*8d67ca89SAndroid Build Coastguard Worker cfi_endproc; \ 74*8d67ca89SAndroid Build Coastguard Worker .size name, .-name 75*8d67ca89SAndroid Build Coastguard Worker#endif 76*8d67ca89SAndroid Build Coastguard Worker#define RETURN ret 77*8d67ca89SAndroid Build Coastguard Worker .section .text.ssse3,"ax",@progbits 78*8d67ca89SAndroid Build Coastguard WorkerENTRY (STRCMP) 79*8d67ca89SAndroid Build Coastguard Worker/* 80*8d67ca89SAndroid Build Coastguard Worker * This implementation uses SSE to compare up to 16 bytes at a time. 81*8d67ca89SAndroid Build Coastguard Worker */ 82*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 83*8d67ca89SAndroid Build Coastguard Worker test %rdx, %rdx 84*8d67ca89SAndroid Build Coastguard Worker je L(strcmp_exitz) 85*8d67ca89SAndroid Build Coastguard Worker cmp $1, %rdx 86*8d67ca89SAndroid Build Coastguard Worker je L(Byte0) 87*8d67ca89SAndroid Build Coastguard Worker mov %rdx, %r11 88*8d67ca89SAndroid Build Coastguard Worker#endif 89*8d67ca89SAndroid Build Coastguard Worker mov %esi, %ecx 90*8d67ca89SAndroid Build Coastguard Worker mov %edi, %eax 91*8d67ca89SAndroid Build Coastguard Worker/* Use 64bit AND here to avoid long NOP padding. */ 92*8d67ca89SAndroid Build Coastguard Worker and $0x3f, %rcx /* rsi alignment in cache line */ 93*8d67ca89SAndroid Build Coastguard Worker and $0x3f, %rax /* rdi alignment in cache line */ 94*8d67ca89SAndroid Build Coastguard Worker cmp $0x30, %ecx 95*8d67ca89SAndroid Build Coastguard Worker ja L(crosscache) /* rsi: 16-byte load will cross cache line */ 96*8d67ca89SAndroid Build Coastguard Worker cmp $0x30, %eax 97*8d67ca89SAndroid Build Coastguard Worker ja L(crosscache) /* rdi: 16-byte load will cross cache line */ 98*8d67ca89SAndroid Build Coastguard Worker movlpd (%rdi), %xmm1 99*8d67ca89SAndroid Build Coastguard Worker movlpd (%rsi), %xmm2 100*8d67ca89SAndroid Build Coastguard Worker movhpd 8(%rdi), %xmm1 101*8d67ca89SAndroid Build Coastguard Worker movhpd 8(%rsi), %xmm2 102*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ 103*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 /* Any null chars? */ 104*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ 105*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 106*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 107*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ 108*8d67ca89SAndroid Build Coastguard Worker jnz L(less16bytes) /* If not, find different value or null char */ 109*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 110*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 111*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) /* finish comparision */ 112*8d67ca89SAndroid Build Coastguard Worker#endif 113*8d67ca89SAndroid Build Coastguard Worker add $16, %rsi /* prepare to search next 16 bytes */ 114*8d67ca89SAndroid Build Coastguard Worker add $16, %rdi /* prepare to search next 16 bytes */ 115*8d67ca89SAndroid Build Coastguard Worker 116*8d67ca89SAndroid Build Coastguard Worker /* 117*8d67ca89SAndroid Build Coastguard Worker * Determine source and destination string offsets from 16-byte alignment. 118*8d67ca89SAndroid Build Coastguard Worker * Use relative offset difference between the two to determine which case 119*8d67ca89SAndroid Build Coastguard Worker * below to use. 120*8d67ca89SAndroid Build Coastguard Worker */ 121*8d67ca89SAndroid Build Coastguard Worker .p2align 4 122*8d67ca89SAndroid Build Coastguard WorkerL(crosscache): 123*8d67ca89SAndroid Build Coastguard Worker and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */ 124*8d67ca89SAndroid Build Coastguard Worker and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */ 125*8d67ca89SAndroid Build Coastguard Worker mov $0xffff, %edx /* for equivalent offset */ 126*8d67ca89SAndroid Build Coastguard Worker xor %r8d, %r8d 127*8d67ca89SAndroid Build Coastguard Worker and $0xf, %ecx /* offset of rsi */ 128*8d67ca89SAndroid Build Coastguard Worker and $0xf, %eax /* offset of rdi */ 129*8d67ca89SAndroid Build Coastguard Worker cmp %eax, %ecx 130*8d67ca89SAndroid Build Coastguard Worker je L(ashr_0) /* rsi and rdi relative offset same */ 131*8d67ca89SAndroid Build Coastguard Worker ja L(bigger) 132*8d67ca89SAndroid Build Coastguard Worker mov %edx, %r8d /* r8d is offset flag for exit tail */ 133*8d67ca89SAndroid Build Coastguard Worker xchg %ecx, %eax 134*8d67ca89SAndroid Build Coastguard Worker xchg %rsi, %rdi 135*8d67ca89SAndroid Build Coastguard WorkerL(bigger): 136*8d67ca89SAndroid Build Coastguard Worker lea 15(%rax), %r9 137*8d67ca89SAndroid Build Coastguard Worker sub %rcx, %r9 138*8d67ca89SAndroid Build Coastguard Worker lea L(unaligned_table)(%rip), %r10 139*8d67ca89SAndroid Build Coastguard Worker movslq (%r10, %r9,4), %r9 140*8d67ca89SAndroid Build Coastguard Worker lea (%r10, %r9), %r10 141*8d67ca89SAndroid Build Coastguard Worker jmp *%r10 /* jump to corresponding case */ 142*8d67ca89SAndroid Build Coastguard Worker 143*8d67ca89SAndroid Build Coastguard Worker/* 144*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_0 145*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 146*8d67ca89SAndroid Build Coastguard Worker * n(0~15) n(0~15) 15(15+ n-n) ashr_0 147*8d67ca89SAndroid Build Coastguard Worker */ 148*8d67ca89SAndroid Build Coastguard Worker .p2align 4 149*8d67ca89SAndroid Build Coastguard WorkerL(ashr_0): 150*8d67ca89SAndroid Build Coastguard Worker 151*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 152*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ 153*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 /* Any null chars? */ 154*8d67ca89SAndroid Build Coastguard Worker pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ 155*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 156*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %r9d 157*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx /* adjust 0xffff for offset */ 158*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d /* adjust for 16-byte offset */ 159*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 160*8d67ca89SAndroid Build Coastguard Worker /* 161*8d67ca89SAndroid Build Coastguard Worker * edx must be the same with r9d if in left byte (16-rcx) is equal to 162*8d67ca89SAndroid Build Coastguard Worker * the start from (16-rax) and no null char was seen. 163*8d67ca89SAndroid Build Coastguard Worker */ 164*8d67ca89SAndroid Build Coastguard Worker jne L(less32bytes) /* mismatch or null char */ 165*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 166*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx 167*8d67ca89SAndroid Build Coastguard Worker mov $16, %r9 168*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */ 169*8d67ca89SAndroid Build Coastguard Worker 170*8d67ca89SAndroid Build Coastguard Worker /* 171*8d67ca89SAndroid Build Coastguard Worker * Now both strings are aligned at 16-byte boundary. Loop over strings 172*8d67ca89SAndroid Build Coastguard Worker * checking 32-bytes per iteration. 173*8d67ca89SAndroid Build Coastguard Worker */ 174*8d67ca89SAndroid Build Coastguard Worker .p2align 4 175*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_0): 176*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 177*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 178*8d67ca89SAndroid Build Coastguard Worker 179*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 180*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 181*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 182*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 183*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 184*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) /* mismatch or null char seen */ 185*8d67ca89SAndroid Build Coastguard Worker 186*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 187*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 188*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 189*8d67ca89SAndroid Build Coastguard Worker#endif 190*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 191*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 192*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 193*8d67ca89SAndroid Build Coastguard Worker 194*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 195*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 196*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 197*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 198*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 199*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 200*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 201*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 202*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 203*8d67ca89SAndroid Build Coastguard Worker#endif 204*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 205*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_0) 206*8d67ca89SAndroid Build Coastguard Worker 207*8d67ca89SAndroid Build Coastguard Worker/* 208*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_1 209*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 210*8d67ca89SAndroid Build Coastguard Worker * n(15) n -15 0(15 +(n-15) - n) ashr_1 211*8d67ca89SAndroid Build Coastguard Worker */ 212*8d67ca89SAndroid Build Coastguard Worker .p2align 4 213*8d67ca89SAndroid Build Coastguard WorkerL(ashr_1): 214*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 215*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 216*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 217*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 /* Any null chars? */ 218*8d67ca89SAndroid Build Coastguard Worker pslldq $15, %xmm2 /* shift first string to align with second */ 219*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */ 220*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 221*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 222*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx /* adjust 0xffff for offset */ 223*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d /* adjust for 16-byte offset */ 224*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 225*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) /* mismatch or null char seen */ 226*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 227*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 228*8d67ca89SAndroid Build Coastguard Worker 229*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 230*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads*/ 231*8d67ca89SAndroid Build Coastguard Worker mov $1, %r9d /* byte position left over from less32bytes case */ 232*8d67ca89SAndroid Build Coastguard Worker /* 233*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 234*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 235*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 236*8d67ca89SAndroid Build Coastguard Worker */ 237*8d67ca89SAndroid Build Coastguard Worker lea 1(%rdi), %r10 238*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 239*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 240*8d67ca89SAndroid Build Coastguard Worker 241*8d67ca89SAndroid Build Coastguard Worker .p2align 4 242*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_1): 243*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 244*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_1) /* cross page boundary */ 245*8d67ca89SAndroid Build Coastguard Worker 246*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_1): 247*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 248*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 249*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 /* store for next cycle */ 250*8d67ca89SAndroid Build Coastguard Worker 251*8d67ca89SAndroid Build Coastguard Worker palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */ 252*8d67ca89SAndroid Build Coastguard Worker 253*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 254*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 255*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 256*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 257*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 258*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 259*8d67ca89SAndroid Build Coastguard Worker 260*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 261*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 262*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 263*8d67ca89SAndroid Build Coastguard Worker#endif 264*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 265*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 266*8d67ca89SAndroid Build Coastguard Worker 267*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 268*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_1) /* cross page boundary */ 269*8d67ca89SAndroid Build Coastguard Worker 270*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 271*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 272*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 /* store for next cycle */ 273*8d67ca89SAndroid Build Coastguard Worker 274*8d67ca89SAndroid Build Coastguard Worker palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */ 275*8d67ca89SAndroid Build Coastguard Worker 276*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 277*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 278*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 279*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 280*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 281*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 282*8d67ca89SAndroid Build Coastguard Worker 283*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 284*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 285*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 286*8d67ca89SAndroid Build Coastguard Worker#endif 287*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 288*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 289*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_1) 290*8d67ca89SAndroid Build Coastguard Worker 291*8d67ca89SAndroid Build Coastguard Worker /* 292*8d67ca89SAndroid Build Coastguard Worker * Nibble avoids loads across page boundary. This is to avoid a potential 293*8d67ca89SAndroid Build Coastguard Worker * access into unmapped memory. 294*8d67ca89SAndroid Build Coastguard Worker */ 295*8d67ca89SAndroid Build Coastguard Worker .p2align 4 296*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_1): 297*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/ 298*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 299*8d67ca89SAndroid Build Coastguard Worker test $0xfffe, %edx 300*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_1_exittail) /* find null char*/ 301*8d67ca89SAndroid Build Coastguard Worker 302*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 303*8d67ca89SAndroid Build Coastguard Worker cmp $14, %r11 304*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_1_exittail) 305*8d67ca89SAndroid Build Coastguard Worker#endif 306*8d67ca89SAndroid Build Coastguard Worker 307*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 308*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* substract 4K from %r10 */ 309*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_1) 310*8d67ca89SAndroid Build Coastguard Worker 311*8d67ca89SAndroid Build Coastguard Worker /* 312*8d67ca89SAndroid Build Coastguard Worker * Once find null char, determine if there is a string mismatch 313*8d67ca89SAndroid Build Coastguard Worker * before the null char. 314*8d67ca89SAndroid Build Coastguard Worker */ 315*8d67ca89SAndroid Build Coastguard Worker .p2align 4 316*8d67ca89SAndroid Build Coastguard WorkerL(ashr_1_exittail): 317*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 318*8d67ca89SAndroid Build Coastguard Worker psrldq $1, %xmm0 319*8d67ca89SAndroid Build Coastguard Worker psrldq $1, %xmm3 320*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 321*8d67ca89SAndroid Build Coastguard Worker 322*8d67ca89SAndroid Build Coastguard Worker/* 323*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_2 324*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 325*8d67ca89SAndroid Build Coastguard Worker * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 326*8d67ca89SAndroid Build Coastguard Worker */ 327*8d67ca89SAndroid Build Coastguard Worker .p2align 4 328*8d67ca89SAndroid Build Coastguard WorkerL(ashr_2): 329*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 330*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 331*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 332*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 333*8d67ca89SAndroid Build Coastguard Worker pslldq $14, %xmm2 334*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 335*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 336*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 337*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 338*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 339*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 340*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 341*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 342*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 343*8d67ca89SAndroid Build Coastguard Worker 344*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 345*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 346*8d67ca89SAndroid Build Coastguard Worker mov $2, %r9d /* byte position left over from less32bytes case */ 347*8d67ca89SAndroid Build Coastguard Worker /* 348*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 349*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 350*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 351*8d67ca89SAndroid Build Coastguard Worker */ 352*8d67ca89SAndroid Build Coastguard Worker lea 2(%rdi), %r10 353*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 354*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 355*8d67ca89SAndroid Build Coastguard Worker 356*8d67ca89SAndroid Build Coastguard Worker .p2align 4 357*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_2): 358*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 359*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_2) 360*8d67ca89SAndroid Build Coastguard Worker 361*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_2): 362*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 363*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 364*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 365*8d67ca89SAndroid Build Coastguard Worker 366*8d67ca89SAndroid Build Coastguard Worker palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */ 367*8d67ca89SAndroid Build Coastguard Worker 368*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 369*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 370*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 371*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 372*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 373*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 374*8d67ca89SAndroid Build Coastguard Worker 375*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 376*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 377*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 378*8d67ca89SAndroid Build Coastguard Worker#endif 379*8d67ca89SAndroid Build Coastguard Worker 380*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 381*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 382*8d67ca89SAndroid Build Coastguard Worker 383*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 384*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_2) /* cross page boundary */ 385*8d67ca89SAndroid Build Coastguard Worker 386*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 387*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 388*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 389*8d67ca89SAndroid Build Coastguard Worker 390*8d67ca89SAndroid Build Coastguard Worker palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */ 391*8d67ca89SAndroid Build Coastguard Worker 392*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 393*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 394*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 395*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 396*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 397*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 398*8d67ca89SAndroid Build Coastguard Worker 399*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 400*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 401*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 402*8d67ca89SAndroid Build Coastguard Worker#endif 403*8d67ca89SAndroid Build Coastguard Worker 404*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 405*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 406*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_2) 407*8d67ca89SAndroid Build Coastguard Worker 408*8d67ca89SAndroid Build Coastguard Worker .p2align 4 409*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_2): 410*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 411*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 412*8d67ca89SAndroid Build Coastguard Worker test $0xfffc, %edx 413*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_2_exittail) 414*8d67ca89SAndroid Build Coastguard Worker 415*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 416*8d67ca89SAndroid Build Coastguard Worker cmp $13, %r11 417*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_2_exittail) 418*8d67ca89SAndroid Build Coastguard Worker#endif 419*8d67ca89SAndroid Build Coastguard Worker 420*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 421*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 422*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_2) 423*8d67ca89SAndroid Build Coastguard Worker 424*8d67ca89SAndroid Build Coastguard Worker .p2align 4 425*8d67ca89SAndroid Build Coastguard WorkerL(ashr_2_exittail): 426*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 427*8d67ca89SAndroid Build Coastguard Worker psrldq $2, %xmm0 428*8d67ca89SAndroid Build Coastguard Worker psrldq $2, %xmm3 429*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 430*8d67ca89SAndroid Build Coastguard Worker 431*8d67ca89SAndroid Build Coastguard Worker/* 432*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_3 433*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 434*8d67ca89SAndroid Build Coastguard Worker * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 435*8d67ca89SAndroid Build Coastguard Worker */ 436*8d67ca89SAndroid Build Coastguard Worker .p2align 4 437*8d67ca89SAndroid Build Coastguard WorkerL(ashr_3): 438*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 439*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 440*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 441*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 442*8d67ca89SAndroid Build Coastguard Worker pslldq $13, %xmm2 443*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 444*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 445*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 446*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 447*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 448*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 449*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 450*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 451*8d67ca89SAndroid Build Coastguard Worker 452*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 453*8d67ca89SAndroid Build Coastguard Worker 454*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 455*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 456*8d67ca89SAndroid Build Coastguard Worker mov $3, %r9d /* byte position left over from less32bytes case */ 457*8d67ca89SAndroid Build Coastguard Worker /* 458*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 459*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 460*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 461*8d67ca89SAndroid Build Coastguard Worker */ 462*8d67ca89SAndroid Build Coastguard Worker lea 3(%rdi), %r10 463*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 464*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 465*8d67ca89SAndroid Build Coastguard Worker 466*8d67ca89SAndroid Build Coastguard Worker .p2align 4 467*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_3): 468*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 469*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_3) 470*8d67ca89SAndroid Build Coastguard Worker 471*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_3): 472*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 473*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 474*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 475*8d67ca89SAndroid Build Coastguard Worker 476*8d67ca89SAndroid Build Coastguard Worker palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */ 477*8d67ca89SAndroid Build Coastguard Worker 478*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 479*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 480*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 481*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 482*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 483*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 484*8d67ca89SAndroid Build Coastguard Worker 485*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 486*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 487*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 488*8d67ca89SAndroid Build Coastguard Worker#endif 489*8d67ca89SAndroid Build Coastguard Worker 490*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 491*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 492*8d67ca89SAndroid Build Coastguard Worker 493*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 494*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_3) /* cross page boundary */ 495*8d67ca89SAndroid Build Coastguard Worker 496*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 497*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 498*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 499*8d67ca89SAndroid Build Coastguard Worker 500*8d67ca89SAndroid Build Coastguard Worker palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */ 501*8d67ca89SAndroid Build Coastguard Worker 502*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 503*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 504*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 505*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 506*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 507*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 508*8d67ca89SAndroid Build Coastguard Worker 509*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 510*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 511*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 512*8d67ca89SAndroid Build Coastguard Worker#endif 513*8d67ca89SAndroid Build Coastguard Worker 514*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 515*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 516*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_3) 517*8d67ca89SAndroid Build Coastguard Worker 518*8d67ca89SAndroid Build Coastguard Worker .p2align 4 519*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_3): 520*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 521*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 522*8d67ca89SAndroid Build Coastguard Worker test $0xfff8, %edx 523*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_3_exittail) 524*8d67ca89SAndroid Build Coastguard Worker 525*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 526*8d67ca89SAndroid Build Coastguard Worker cmp $12, %r11 527*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_3_exittail) 528*8d67ca89SAndroid Build Coastguard Worker#endif 529*8d67ca89SAndroid Build Coastguard Worker 530*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 531*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 532*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_3) 533*8d67ca89SAndroid Build Coastguard Worker 534*8d67ca89SAndroid Build Coastguard Worker .p2align 4 535*8d67ca89SAndroid Build Coastguard WorkerL(ashr_3_exittail): 536*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 537*8d67ca89SAndroid Build Coastguard Worker psrldq $3, %xmm0 538*8d67ca89SAndroid Build Coastguard Worker psrldq $3, %xmm3 539*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 540*8d67ca89SAndroid Build Coastguard Worker 541*8d67ca89SAndroid Build Coastguard Worker/* 542*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_4 543*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 544*8d67ca89SAndroid Build Coastguard Worker * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 545*8d67ca89SAndroid Build Coastguard Worker */ 546*8d67ca89SAndroid Build Coastguard Worker .p2align 4 547*8d67ca89SAndroid Build Coastguard WorkerL(ashr_4): 548*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 549*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 550*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 551*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 552*8d67ca89SAndroid Build Coastguard Worker pslldq $12, %xmm2 553*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 554*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 555*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 556*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 557*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 558*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 559*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 560*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 561*8d67ca89SAndroid Build Coastguard Worker 562*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 563*8d67ca89SAndroid Build Coastguard Worker 564*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 565*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 566*8d67ca89SAndroid Build Coastguard Worker mov $4, %r9d /* byte position left over from less32bytes case */ 567*8d67ca89SAndroid Build Coastguard Worker /* 568*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 569*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 570*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 571*8d67ca89SAndroid Build Coastguard Worker */ 572*8d67ca89SAndroid Build Coastguard Worker lea 4(%rdi), %r10 573*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 574*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 575*8d67ca89SAndroid Build Coastguard Worker 576*8d67ca89SAndroid Build Coastguard Worker .p2align 4 577*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_4): 578*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 579*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_4) 580*8d67ca89SAndroid Build Coastguard Worker 581*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_4): 582*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 583*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 584*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 585*8d67ca89SAndroid Build Coastguard Worker 586*8d67ca89SAndroid Build Coastguard Worker palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */ 587*8d67ca89SAndroid Build Coastguard Worker 588*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 589*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 590*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 591*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 592*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 593*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 594*8d67ca89SAndroid Build Coastguard Worker 595*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 596*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 597*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 598*8d67ca89SAndroid Build Coastguard Worker#endif 599*8d67ca89SAndroid Build Coastguard Worker 600*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 601*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 602*8d67ca89SAndroid Build Coastguard Worker 603*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 604*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_4) /* cross page boundary */ 605*8d67ca89SAndroid Build Coastguard Worker 606*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 607*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 608*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 609*8d67ca89SAndroid Build Coastguard Worker 610*8d67ca89SAndroid Build Coastguard Worker palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */ 611*8d67ca89SAndroid Build Coastguard Worker 612*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 613*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 614*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 615*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 616*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 617*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 618*8d67ca89SAndroid Build Coastguard Worker 619*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 620*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 621*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 622*8d67ca89SAndroid Build Coastguard Worker#endif 623*8d67ca89SAndroid Build Coastguard Worker 624*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 625*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 626*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_4) 627*8d67ca89SAndroid Build Coastguard Worker 628*8d67ca89SAndroid Build Coastguard Worker .p2align 4 629*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_4): 630*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 631*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 632*8d67ca89SAndroid Build Coastguard Worker test $0xfff0, %edx 633*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_4_exittail) 634*8d67ca89SAndroid Build Coastguard Worker 635*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 636*8d67ca89SAndroid Build Coastguard Worker cmp $11, %r11 637*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_4_exittail) 638*8d67ca89SAndroid Build Coastguard Worker#endif 639*8d67ca89SAndroid Build Coastguard Worker 640*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 641*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 642*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_4) 643*8d67ca89SAndroid Build Coastguard Worker 644*8d67ca89SAndroid Build Coastguard Worker .p2align 4 645*8d67ca89SAndroid Build Coastguard WorkerL(ashr_4_exittail): 646*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 647*8d67ca89SAndroid Build Coastguard Worker psrldq $4, %xmm0 648*8d67ca89SAndroid Build Coastguard Worker psrldq $4, %xmm3 649*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 650*8d67ca89SAndroid Build Coastguard Worker 651*8d67ca89SAndroid Build Coastguard Worker/* 652*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_5 653*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 654*8d67ca89SAndroid Build Coastguard Worker * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5 655*8d67ca89SAndroid Build Coastguard Worker */ 656*8d67ca89SAndroid Build Coastguard Worker .p2align 4 657*8d67ca89SAndroid Build Coastguard WorkerL(ashr_5): 658*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 659*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 660*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 661*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 662*8d67ca89SAndroid Build Coastguard Worker pslldq $11, %xmm2 663*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 664*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 665*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 666*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 667*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 668*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 669*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 670*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 671*8d67ca89SAndroid Build Coastguard Worker 672*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 673*8d67ca89SAndroid Build Coastguard Worker 674*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 675*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 676*8d67ca89SAndroid Build Coastguard Worker mov $5, %r9d /* byte position left over from less32bytes case */ 677*8d67ca89SAndroid Build Coastguard Worker /* 678*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 679*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 680*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 681*8d67ca89SAndroid Build Coastguard Worker */ 682*8d67ca89SAndroid Build Coastguard Worker lea 5(%rdi), %r10 683*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 684*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 685*8d67ca89SAndroid Build Coastguard Worker 686*8d67ca89SAndroid Build Coastguard Worker .p2align 4 687*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_5): 688*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 689*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_5) 690*8d67ca89SAndroid Build Coastguard Worker 691*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_5): 692*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 693*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 694*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 695*8d67ca89SAndroid Build Coastguard Worker 696*8d67ca89SAndroid Build Coastguard Worker palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */ 697*8d67ca89SAndroid Build Coastguard Worker 698*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 699*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 700*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 701*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 702*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 703*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 704*8d67ca89SAndroid Build Coastguard Worker 705*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 706*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 707*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 708*8d67ca89SAndroid Build Coastguard Worker#endif 709*8d67ca89SAndroid Build Coastguard Worker 710*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 711*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 712*8d67ca89SAndroid Build Coastguard Worker 713*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 714*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_5) /* cross page boundary */ 715*8d67ca89SAndroid Build Coastguard Worker 716*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 717*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 718*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 719*8d67ca89SAndroid Build Coastguard Worker 720*8d67ca89SAndroid Build Coastguard Worker palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */ 721*8d67ca89SAndroid Build Coastguard Worker 722*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 723*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 724*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 725*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 726*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 727*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 728*8d67ca89SAndroid Build Coastguard Worker 729*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 730*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 731*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 732*8d67ca89SAndroid Build Coastguard Worker#endif 733*8d67ca89SAndroid Build Coastguard Worker 734*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 735*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 736*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_5) 737*8d67ca89SAndroid Build Coastguard Worker 738*8d67ca89SAndroid Build Coastguard Worker .p2align 4 739*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_5): 740*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 741*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 742*8d67ca89SAndroid Build Coastguard Worker test $0xffe0, %edx 743*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_5_exittail) 744*8d67ca89SAndroid Build Coastguard Worker 745*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 746*8d67ca89SAndroid Build Coastguard Worker cmp $10, %r11 747*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_5_exittail) 748*8d67ca89SAndroid Build Coastguard Worker#endif 749*8d67ca89SAndroid Build Coastguard Worker 750*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 751*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 752*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_5) 753*8d67ca89SAndroid Build Coastguard Worker 754*8d67ca89SAndroid Build Coastguard Worker .p2align 4 755*8d67ca89SAndroid Build Coastguard WorkerL(ashr_5_exittail): 756*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 757*8d67ca89SAndroid Build Coastguard Worker psrldq $5, %xmm0 758*8d67ca89SAndroid Build Coastguard Worker psrldq $5, %xmm3 759*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 760*8d67ca89SAndroid Build Coastguard Worker 761*8d67ca89SAndroid Build Coastguard Worker/* 762*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_6 763*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 764*8d67ca89SAndroid Build Coastguard Worker * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6 765*8d67ca89SAndroid Build Coastguard Worker */ 766*8d67ca89SAndroid Build Coastguard Worker .p2align 4 767*8d67ca89SAndroid Build Coastguard WorkerL(ashr_6): 768*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 769*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 770*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 771*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 772*8d67ca89SAndroid Build Coastguard Worker pslldq $10, %xmm2 773*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 774*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 775*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 776*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 777*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 778*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 779*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 780*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 781*8d67ca89SAndroid Build Coastguard Worker 782*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 783*8d67ca89SAndroid Build Coastguard Worker 784*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 785*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 786*8d67ca89SAndroid Build Coastguard Worker mov $6, %r9d /* byte position left over from less32bytes case */ 787*8d67ca89SAndroid Build Coastguard Worker /* 788*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 789*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 790*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 791*8d67ca89SAndroid Build Coastguard Worker */ 792*8d67ca89SAndroid Build Coastguard Worker lea 6(%rdi), %r10 793*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 794*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 795*8d67ca89SAndroid Build Coastguard Worker 796*8d67ca89SAndroid Build Coastguard Worker .p2align 4 797*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_6): 798*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 799*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_6) 800*8d67ca89SAndroid Build Coastguard Worker 801*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_6): 802*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 803*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 804*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 805*8d67ca89SAndroid Build Coastguard Worker 806*8d67ca89SAndroid Build Coastguard Worker palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */ 807*8d67ca89SAndroid Build Coastguard Worker 808*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 809*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 810*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 811*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 812*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 813*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 814*8d67ca89SAndroid Build Coastguard Worker 815*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 816*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 817*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 818*8d67ca89SAndroid Build Coastguard Worker#endif 819*8d67ca89SAndroid Build Coastguard Worker 820*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 821*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 822*8d67ca89SAndroid Build Coastguard Worker 823*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 824*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_6) /* cross page boundary */ 825*8d67ca89SAndroid Build Coastguard Worker 826*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 827*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 828*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 829*8d67ca89SAndroid Build Coastguard Worker 830*8d67ca89SAndroid Build Coastguard Worker palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */ 831*8d67ca89SAndroid Build Coastguard Worker 832*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 833*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 834*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 835*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 836*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 837*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 838*8d67ca89SAndroid Build Coastguard Worker 839*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 840*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 841*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 842*8d67ca89SAndroid Build Coastguard Worker#endif 843*8d67ca89SAndroid Build Coastguard Worker 844*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 845*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 846*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_6) 847*8d67ca89SAndroid Build Coastguard Worker 848*8d67ca89SAndroid Build Coastguard Worker .p2align 4 849*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_6): 850*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 851*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 852*8d67ca89SAndroid Build Coastguard Worker test $0xffc0, %edx 853*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_6_exittail) 854*8d67ca89SAndroid Build Coastguard Worker 855*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 856*8d67ca89SAndroid Build Coastguard Worker cmp $9, %r11 857*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_6_exittail) 858*8d67ca89SAndroid Build Coastguard Worker#endif 859*8d67ca89SAndroid Build Coastguard Worker 860*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 861*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 862*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_6) 863*8d67ca89SAndroid Build Coastguard Worker 864*8d67ca89SAndroid Build Coastguard Worker .p2align 4 865*8d67ca89SAndroid Build Coastguard WorkerL(ashr_6_exittail): 866*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 867*8d67ca89SAndroid Build Coastguard Worker psrldq $6, %xmm0 868*8d67ca89SAndroid Build Coastguard Worker psrldq $6, %xmm3 869*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 870*8d67ca89SAndroid Build Coastguard Worker 871*8d67ca89SAndroid Build Coastguard Worker/* 872*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_7 873*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 874*8d67ca89SAndroid Build Coastguard Worker * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7 875*8d67ca89SAndroid Build Coastguard Worker */ 876*8d67ca89SAndroid Build Coastguard Worker .p2align 4 877*8d67ca89SAndroid Build Coastguard WorkerL(ashr_7): 878*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 879*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 880*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 881*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 882*8d67ca89SAndroid Build Coastguard Worker pslldq $9, %xmm2 883*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 884*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 885*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 886*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 887*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 888*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 889*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 890*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 891*8d67ca89SAndroid Build Coastguard Worker 892*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 893*8d67ca89SAndroid Build Coastguard Worker 894*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 895*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 896*8d67ca89SAndroid Build Coastguard Worker mov $7, %r9d /* byte position left over from less32bytes case */ 897*8d67ca89SAndroid Build Coastguard Worker /* 898*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 899*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 900*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 901*8d67ca89SAndroid Build Coastguard Worker */ 902*8d67ca89SAndroid Build Coastguard Worker lea 7(%rdi), %r10 903*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 904*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 905*8d67ca89SAndroid Build Coastguard Worker 906*8d67ca89SAndroid Build Coastguard Worker .p2align 4 907*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_7): 908*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 909*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_7) 910*8d67ca89SAndroid Build Coastguard Worker 911*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_7): 912*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 913*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 914*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 915*8d67ca89SAndroid Build Coastguard Worker 916*8d67ca89SAndroid Build Coastguard Worker palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */ 917*8d67ca89SAndroid Build Coastguard Worker 918*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 919*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 920*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 921*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 922*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 923*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 924*8d67ca89SAndroid Build Coastguard Worker 925*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 926*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 927*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 928*8d67ca89SAndroid Build Coastguard Worker#endif 929*8d67ca89SAndroid Build Coastguard Worker 930*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 931*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 932*8d67ca89SAndroid Build Coastguard Worker 933*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 934*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_7) /* cross page boundary */ 935*8d67ca89SAndroid Build Coastguard Worker 936*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 937*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 938*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 939*8d67ca89SAndroid Build Coastguard Worker 940*8d67ca89SAndroid Build Coastguard Worker palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */ 941*8d67ca89SAndroid Build Coastguard Worker 942*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 943*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 944*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 945*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 946*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 947*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 948*8d67ca89SAndroid Build Coastguard Worker 949*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 950*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 951*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 952*8d67ca89SAndroid Build Coastguard Worker#endif 953*8d67ca89SAndroid Build Coastguard Worker 954*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 955*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 956*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_7) 957*8d67ca89SAndroid Build Coastguard Worker 958*8d67ca89SAndroid Build Coastguard Worker .p2align 4 959*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_7): 960*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 961*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 962*8d67ca89SAndroid Build Coastguard Worker test $0xff80, %edx 963*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_7_exittail) 964*8d67ca89SAndroid Build Coastguard Worker 965*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 966*8d67ca89SAndroid Build Coastguard Worker cmp $8, %r11 967*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_7_exittail) 968*8d67ca89SAndroid Build Coastguard Worker#endif 969*8d67ca89SAndroid Build Coastguard Worker 970*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 971*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 972*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_7) 973*8d67ca89SAndroid Build Coastguard Worker 974*8d67ca89SAndroid Build Coastguard Worker .p2align 4 975*8d67ca89SAndroid Build Coastguard WorkerL(ashr_7_exittail): 976*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 977*8d67ca89SAndroid Build Coastguard Worker psrldq $7, %xmm0 978*8d67ca89SAndroid Build Coastguard Worker psrldq $7, %xmm3 979*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 980*8d67ca89SAndroid Build Coastguard Worker 981*8d67ca89SAndroid Build Coastguard Worker/* 982*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_8 983*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 984*8d67ca89SAndroid Build Coastguard Worker * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8 985*8d67ca89SAndroid Build Coastguard Worker */ 986*8d67ca89SAndroid Build Coastguard Worker .p2align 4 987*8d67ca89SAndroid Build Coastguard WorkerL(ashr_8): 988*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 989*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 990*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 991*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 992*8d67ca89SAndroid Build Coastguard Worker pslldq $8, %xmm2 993*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 994*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 995*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 996*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 997*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 998*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 999*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 1000*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 1001*8d67ca89SAndroid Build Coastguard Worker 1002*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 1003*8d67ca89SAndroid Build Coastguard Worker 1004*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1005*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 1006*8d67ca89SAndroid Build Coastguard Worker mov $8, %r9d /* byte position left over from less32bytes case */ 1007*8d67ca89SAndroid Build Coastguard Worker /* 1008*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 1009*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 1010*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 1011*8d67ca89SAndroid Build Coastguard Worker */ 1012*8d67ca89SAndroid Build Coastguard Worker lea 8(%rdi), %r10 1013*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 1014*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 1015*8d67ca89SAndroid Build Coastguard Worker 1016*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1017*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_8): 1018*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1019*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_8) 1020*8d67ca89SAndroid Build Coastguard Worker 1021*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_8): 1022*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1023*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1024*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1025*8d67ca89SAndroid Build Coastguard Worker 1026*8d67ca89SAndroid Build Coastguard Worker palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */ 1027*8d67ca89SAndroid Build Coastguard Worker 1028*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1029*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1030*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1031*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1032*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1033*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1034*8d67ca89SAndroid Build Coastguard Worker 1035*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1036*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1037*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1038*8d67ca89SAndroid Build Coastguard Worker#endif 1039*8d67ca89SAndroid Build Coastguard Worker 1040*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1041*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1042*8d67ca89SAndroid Build Coastguard Worker 1043*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1044*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_8) /* cross page boundary */ 1045*8d67ca89SAndroid Build Coastguard Worker 1046*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1047*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1048*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1049*8d67ca89SAndroid Build Coastguard Worker 1050*8d67ca89SAndroid Build Coastguard Worker palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */ 1051*8d67ca89SAndroid Build Coastguard Worker 1052*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1053*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1054*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1055*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1056*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1057*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1058*8d67ca89SAndroid Build Coastguard Worker 1059*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1060*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1061*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1062*8d67ca89SAndroid Build Coastguard Worker#endif 1063*8d67ca89SAndroid Build Coastguard Worker 1064*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1065*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1066*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_8) 1067*8d67ca89SAndroid Build Coastguard Worker 1068*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1069*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_8): 1070*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 1071*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 1072*8d67ca89SAndroid Build Coastguard Worker test $0xff00, %edx 1073*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_8_exittail) 1074*8d67ca89SAndroid Build Coastguard Worker 1075*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1076*8d67ca89SAndroid Build Coastguard Worker cmp $7, %r11 1077*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_8_exittail) 1078*8d67ca89SAndroid Build Coastguard Worker#endif 1079*8d67ca89SAndroid Build Coastguard Worker 1080*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1081*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 1082*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_8) 1083*8d67ca89SAndroid Build Coastguard Worker 1084*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1085*8d67ca89SAndroid Build Coastguard WorkerL(ashr_8_exittail): 1086*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1087*8d67ca89SAndroid Build Coastguard Worker psrldq $8, %xmm0 1088*8d67ca89SAndroid Build Coastguard Worker psrldq $8, %xmm3 1089*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 1090*8d67ca89SAndroid Build Coastguard Worker 1091*8d67ca89SAndroid Build Coastguard Worker/* 1092*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_9 1093*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1094*8d67ca89SAndroid Build Coastguard Worker * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9 1095*8d67ca89SAndroid Build Coastguard Worker */ 1096*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1097*8d67ca89SAndroid Build Coastguard WorkerL(ashr_9): 1098*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1099*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 1100*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 1101*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1102*8d67ca89SAndroid Build Coastguard Worker pslldq $7, %xmm2 1103*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 1104*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 1105*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 1106*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 1107*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 1108*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 1109*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 1110*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 1111*8d67ca89SAndroid Build Coastguard Worker 1112*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 1113*8d67ca89SAndroid Build Coastguard Worker 1114*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1115*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 1116*8d67ca89SAndroid Build Coastguard Worker mov $9, %r9d /* byte position left over from less32bytes case */ 1117*8d67ca89SAndroid Build Coastguard Worker /* 1118*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 1119*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 1120*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 1121*8d67ca89SAndroid Build Coastguard Worker */ 1122*8d67ca89SAndroid Build Coastguard Worker lea 9(%rdi), %r10 1123*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 1124*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 1125*8d67ca89SAndroid Build Coastguard Worker 1126*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1127*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_9): 1128*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1129*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_9) 1130*8d67ca89SAndroid Build Coastguard Worker 1131*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_9): 1132*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1133*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1134*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1135*8d67ca89SAndroid Build Coastguard Worker 1136*8d67ca89SAndroid Build Coastguard Worker palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */ 1137*8d67ca89SAndroid Build Coastguard Worker 1138*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1139*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1140*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1141*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1142*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1143*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1144*8d67ca89SAndroid Build Coastguard Worker 1145*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1146*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1147*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1148*8d67ca89SAndroid Build Coastguard Worker#endif 1149*8d67ca89SAndroid Build Coastguard Worker 1150*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1151*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1152*8d67ca89SAndroid Build Coastguard Worker 1153*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1154*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_9) /* cross page boundary */ 1155*8d67ca89SAndroid Build Coastguard Worker 1156*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1157*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1158*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1159*8d67ca89SAndroid Build Coastguard Worker 1160*8d67ca89SAndroid Build Coastguard Worker palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */ 1161*8d67ca89SAndroid Build Coastguard Worker 1162*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1163*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1164*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1165*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1166*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1167*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1168*8d67ca89SAndroid Build Coastguard Worker 1169*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1170*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1171*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1172*8d67ca89SAndroid Build Coastguard Worker#endif 1173*8d67ca89SAndroid Build Coastguard Worker 1174*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1175*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 /* store for next cycle */ 1176*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_9) 1177*8d67ca89SAndroid Build Coastguard Worker 1178*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1179*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_9): 1180*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 1181*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 1182*8d67ca89SAndroid Build Coastguard Worker test $0xfe00, %edx 1183*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_9_exittail) 1184*8d67ca89SAndroid Build Coastguard Worker 1185*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1186*8d67ca89SAndroid Build Coastguard Worker cmp $6, %r11 1187*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_9_exittail) 1188*8d67ca89SAndroid Build Coastguard Worker#endif 1189*8d67ca89SAndroid Build Coastguard Worker 1190*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1191*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 1192*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_9) 1193*8d67ca89SAndroid Build Coastguard Worker 1194*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1195*8d67ca89SAndroid Build Coastguard WorkerL(ashr_9_exittail): 1196*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1197*8d67ca89SAndroid Build Coastguard Worker psrldq $9, %xmm0 1198*8d67ca89SAndroid Build Coastguard Worker psrldq $9, %xmm3 1199*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 1200*8d67ca89SAndroid Build Coastguard Worker 1201*8d67ca89SAndroid Build Coastguard Worker/* 1202*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_10 1203*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1204*8d67ca89SAndroid Build Coastguard Worker * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10 1205*8d67ca89SAndroid Build Coastguard Worker */ 1206*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1207*8d67ca89SAndroid Build Coastguard WorkerL(ashr_10): 1208*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1209*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 1210*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 1211*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1212*8d67ca89SAndroid Build Coastguard Worker pslldq $6, %xmm2 1213*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 1214*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 1215*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 1216*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 1217*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 1218*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 1219*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 1220*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 1221*8d67ca89SAndroid Build Coastguard Worker 1222*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 1223*8d67ca89SAndroid Build Coastguard Worker 1224*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1225*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 1226*8d67ca89SAndroid Build Coastguard Worker mov $10, %r9d /* byte position left over from less32bytes case */ 1227*8d67ca89SAndroid Build Coastguard Worker /* 1228*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 1229*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 1230*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 1231*8d67ca89SAndroid Build Coastguard Worker */ 1232*8d67ca89SAndroid Build Coastguard Worker lea 10(%rdi), %r10 1233*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 1234*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 1235*8d67ca89SAndroid Build Coastguard Worker 1236*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1237*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_10): 1238*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1239*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_10) 1240*8d67ca89SAndroid Build Coastguard Worker 1241*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_10): 1242*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1243*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1244*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1245*8d67ca89SAndroid Build Coastguard Worker 1246*8d67ca89SAndroid Build Coastguard Worker palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */ 1247*8d67ca89SAndroid Build Coastguard Worker 1248*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1249*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1250*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1251*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1252*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1253*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1254*8d67ca89SAndroid Build Coastguard Worker 1255*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1256*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1257*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1258*8d67ca89SAndroid Build Coastguard Worker#endif 1259*8d67ca89SAndroid Build Coastguard Worker 1260*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1261*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1262*8d67ca89SAndroid Build Coastguard Worker 1263*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1264*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_10) /* cross page boundary */ 1265*8d67ca89SAndroid Build Coastguard Worker 1266*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1267*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1268*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1269*8d67ca89SAndroid Build Coastguard Worker 1270*8d67ca89SAndroid Build Coastguard Worker palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */ 1271*8d67ca89SAndroid Build Coastguard Worker 1272*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1273*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1274*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1275*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1276*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1277*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1278*8d67ca89SAndroid Build Coastguard Worker 1279*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1280*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1281*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1282*8d67ca89SAndroid Build Coastguard Worker#endif 1283*8d67ca89SAndroid Build Coastguard Worker 1284*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1285*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1286*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_10) 1287*8d67ca89SAndroid Build Coastguard Worker 1288*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1289*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_10): 1290*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 1291*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 1292*8d67ca89SAndroid Build Coastguard Worker test $0xfc00, %edx 1293*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_10_exittail) 1294*8d67ca89SAndroid Build Coastguard Worker 1295*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1296*8d67ca89SAndroid Build Coastguard Worker cmp $5, %r11 1297*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_10_exittail) 1298*8d67ca89SAndroid Build Coastguard Worker#endif 1299*8d67ca89SAndroid Build Coastguard Worker 1300*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1301*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 1302*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_10) 1303*8d67ca89SAndroid Build Coastguard Worker 1304*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1305*8d67ca89SAndroid Build Coastguard WorkerL(ashr_10_exittail): 1306*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1307*8d67ca89SAndroid Build Coastguard Worker psrldq $10, %xmm0 1308*8d67ca89SAndroid Build Coastguard Worker psrldq $10, %xmm3 1309*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 1310*8d67ca89SAndroid Build Coastguard Worker 1311*8d67ca89SAndroid Build Coastguard Worker/* 1312*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_11 1313*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1314*8d67ca89SAndroid Build Coastguard Worker * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11 1315*8d67ca89SAndroid Build Coastguard Worker */ 1316*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1317*8d67ca89SAndroid Build Coastguard WorkerL(ashr_11): 1318*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1319*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 1320*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 1321*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1322*8d67ca89SAndroid Build Coastguard Worker pslldq $5, %xmm2 1323*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 1324*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 1325*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 1326*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 1327*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 1328*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 1329*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 1330*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 1331*8d67ca89SAndroid Build Coastguard Worker 1332*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 1333*8d67ca89SAndroid Build Coastguard Worker 1334*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1335*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 1336*8d67ca89SAndroid Build Coastguard Worker mov $11, %r9d /* byte position left over from less32bytes case */ 1337*8d67ca89SAndroid Build Coastguard Worker /* 1338*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 1339*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 1340*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 1341*8d67ca89SAndroid Build Coastguard Worker */ 1342*8d67ca89SAndroid Build Coastguard Worker lea 11(%rdi), %r10 1343*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 1344*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 1345*8d67ca89SAndroid Build Coastguard Worker 1346*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1347*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_11): 1348*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1349*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_11) 1350*8d67ca89SAndroid Build Coastguard Worker 1351*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_11): 1352*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1353*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1354*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1355*8d67ca89SAndroid Build Coastguard Worker 1356*8d67ca89SAndroid Build Coastguard Worker palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */ 1357*8d67ca89SAndroid Build Coastguard Worker 1358*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1359*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1360*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1361*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1362*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1363*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1364*8d67ca89SAndroid Build Coastguard Worker 1365*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1366*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1367*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1368*8d67ca89SAndroid Build Coastguard Worker#endif 1369*8d67ca89SAndroid Build Coastguard Worker 1370*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1371*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1372*8d67ca89SAndroid Build Coastguard Worker 1373*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1374*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_11) /* cross page boundary */ 1375*8d67ca89SAndroid Build Coastguard Worker 1376*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1377*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1378*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1379*8d67ca89SAndroid Build Coastguard Worker 1380*8d67ca89SAndroid Build Coastguard Worker palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */ 1381*8d67ca89SAndroid Build Coastguard Worker 1382*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1383*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1384*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1385*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1386*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1387*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1388*8d67ca89SAndroid Build Coastguard Worker 1389*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1390*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1391*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1392*8d67ca89SAndroid Build Coastguard Worker#endif 1393*8d67ca89SAndroid Build Coastguard Worker 1394*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1395*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1396*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_11) 1397*8d67ca89SAndroid Build Coastguard Worker 1398*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1399*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_11): 1400*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 1401*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 1402*8d67ca89SAndroid Build Coastguard Worker test $0xf800, %edx 1403*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_11_exittail) 1404*8d67ca89SAndroid Build Coastguard Worker 1405*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1406*8d67ca89SAndroid Build Coastguard Worker cmp $4, %r11 1407*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_11_exittail) 1408*8d67ca89SAndroid Build Coastguard Worker#endif 1409*8d67ca89SAndroid Build Coastguard Worker 1410*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1411*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 1412*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_11) 1413*8d67ca89SAndroid Build Coastguard Worker 1414*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1415*8d67ca89SAndroid Build Coastguard WorkerL(ashr_11_exittail): 1416*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1417*8d67ca89SAndroid Build Coastguard Worker psrldq $11, %xmm0 1418*8d67ca89SAndroid Build Coastguard Worker psrldq $11, %xmm3 1419*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 1420*8d67ca89SAndroid Build Coastguard Worker 1421*8d67ca89SAndroid Build Coastguard Worker/* 1422*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_12 1423*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1424*8d67ca89SAndroid Build Coastguard Worker * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12 1425*8d67ca89SAndroid Build Coastguard Worker */ 1426*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1427*8d67ca89SAndroid Build Coastguard WorkerL(ashr_12): 1428*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1429*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 1430*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 1431*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1432*8d67ca89SAndroid Build Coastguard Worker pslldq $4, %xmm2 1433*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 1434*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 1435*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 1436*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 1437*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 1438*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 1439*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 1440*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 1441*8d67ca89SAndroid Build Coastguard Worker 1442*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 1443*8d67ca89SAndroid Build Coastguard Worker 1444*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1445*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 1446*8d67ca89SAndroid Build Coastguard Worker mov $12, %r9d /* byte position left over from less32bytes case */ 1447*8d67ca89SAndroid Build Coastguard Worker /* 1448*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 1449*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 1450*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 1451*8d67ca89SAndroid Build Coastguard Worker */ 1452*8d67ca89SAndroid Build Coastguard Worker lea 12(%rdi), %r10 1453*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 1454*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 1455*8d67ca89SAndroid Build Coastguard Worker 1456*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1457*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_12): 1458*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1459*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_12) 1460*8d67ca89SAndroid Build Coastguard Worker 1461*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_12): 1462*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1463*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1464*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1465*8d67ca89SAndroid Build Coastguard Worker 1466*8d67ca89SAndroid Build Coastguard Worker palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */ 1467*8d67ca89SAndroid Build Coastguard Worker 1468*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1469*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1470*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1471*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1472*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1473*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1474*8d67ca89SAndroid Build Coastguard Worker 1475*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1476*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1477*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1478*8d67ca89SAndroid Build Coastguard Worker#endif 1479*8d67ca89SAndroid Build Coastguard Worker 1480*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1481*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1482*8d67ca89SAndroid Build Coastguard Worker 1483*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1484*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_12) /* cross page boundary */ 1485*8d67ca89SAndroid Build Coastguard Worker 1486*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1487*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1488*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1489*8d67ca89SAndroid Build Coastguard Worker 1490*8d67ca89SAndroid Build Coastguard Worker palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */ 1491*8d67ca89SAndroid Build Coastguard Worker 1492*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1493*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1494*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1495*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1496*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1497*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1498*8d67ca89SAndroid Build Coastguard Worker 1499*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1500*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1501*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1502*8d67ca89SAndroid Build Coastguard Worker#endif 1503*8d67ca89SAndroid Build Coastguard Worker 1504*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1505*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1506*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_12) 1507*8d67ca89SAndroid Build Coastguard Worker 1508*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1509*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_12): 1510*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 1511*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 1512*8d67ca89SAndroid Build Coastguard Worker test $0xf000, %edx 1513*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_12_exittail) 1514*8d67ca89SAndroid Build Coastguard Worker 1515*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1516*8d67ca89SAndroid Build Coastguard Worker cmp $3, %r11 1517*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_12_exittail) 1518*8d67ca89SAndroid Build Coastguard Worker#endif 1519*8d67ca89SAndroid Build Coastguard Worker 1520*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1521*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 1522*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_12) 1523*8d67ca89SAndroid Build Coastguard Worker 1524*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1525*8d67ca89SAndroid Build Coastguard WorkerL(ashr_12_exittail): 1526*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1527*8d67ca89SAndroid Build Coastguard Worker psrldq $12, %xmm0 1528*8d67ca89SAndroid Build Coastguard Worker psrldq $12, %xmm3 1529*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 1530*8d67ca89SAndroid Build Coastguard Worker 1531*8d67ca89SAndroid Build Coastguard Worker/* 1532*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_13 1533*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1534*8d67ca89SAndroid Build Coastguard Worker * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13 1535*8d67ca89SAndroid Build Coastguard Worker */ 1536*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1537*8d67ca89SAndroid Build Coastguard WorkerL(ashr_13): 1538*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1539*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 1540*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 1541*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1542*8d67ca89SAndroid Build Coastguard Worker pslldq $3, %xmm2 1543*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 1544*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 1545*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 1546*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 1547*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 1548*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 1549*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 1550*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 1551*8d67ca89SAndroid Build Coastguard Worker 1552*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 1553*8d67ca89SAndroid Build Coastguard Worker 1554*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1555*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 1556*8d67ca89SAndroid Build Coastguard Worker mov $13, %r9d /* byte position left over from less32bytes case */ 1557*8d67ca89SAndroid Build Coastguard Worker /* 1558*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 1559*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 1560*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 1561*8d67ca89SAndroid Build Coastguard Worker */ 1562*8d67ca89SAndroid Build Coastguard Worker lea 13(%rdi), %r10 1563*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 1564*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 1565*8d67ca89SAndroid Build Coastguard Worker 1566*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1567*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_13): 1568*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1569*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_13) 1570*8d67ca89SAndroid Build Coastguard Worker 1571*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_13): 1572*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1573*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1574*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1575*8d67ca89SAndroid Build Coastguard Worker 1576*8d67ca89SAndroid Build Coastguard Worker palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */ 1577*8d67ca89SAndroid Build Coastguard Worker 1578*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1579*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1580*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1581*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1582*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1583*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1584*8d67ca89SAndroid Build Coastguard Worker 1585*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1586*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1587*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1588*8d67ca89SAndroid Build Coastguard Worker#endif 1589*8d67ca89SAndroid Build Coastguard Worker 1590*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1591*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1592*8d67ca89SAndroid Build Coastguard Worker 1593*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1594*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_13) /* cross page boundary */ 1595*8d67ca89SAndroid Build Coastguard Worker 1596*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1597*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1598*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1599*8d67ca89SAndroid Build Coastguard Worker 1600*8d67ca89SAndroid Build Coastguard Worker palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */ 1601*8d67ca89SAndroid Build Coastguard Worker 1602*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1603*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1604*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1605*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1606*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1607*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1608*8d67ca89SAndroid Build Coastguard Worker 1609*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1610*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1611*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1612*8d67ca89SAndroid Build Coastguard Worker#endif 1613*8d67ca89SAndroid Build Coastguard Worker 1614*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1615*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1616*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_13) 1617*8d67ca89SAndroid Build Coastguard Worker 1618*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1619*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_13): 1620*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 1621*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 1622*8d67ca89SAndroid Build Coastguard Worker test $0xe000, %edx 1623*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_13_exittail) 1624*8d67ca89SAndroid Build Coastguard Worker 1625*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1626*8d67ca89SAndroid Build Coastguard Worker cmp $2, %r11 1627*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_13_exittail) 1628*8d67ca89SAndroid Build Coastguard Worker#endif 1629*8d67ca89SAndroid Build Coastguard Worker 1630*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1631*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 1632*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_13) 1633*8d67ca89SAndroid Build Coastguard Worker 1634*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1635*8d67ca89SAndroid Build Coastguard WorkerL(ashr_13_exittail): 1636*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1637*8d67ca89SAndroid Build Coastguard Worker psrldq $13, %xmm0 1638*8d67ca89SAndroid Build Coastguard Worker psrldq $13, %xmm3 1639*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 1640*8d67ca89SAndroid Build Coastguard Worker 1641*8d67ca89SAndroid Build Coastguard Worker/* 1642*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_14 1643*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1644*8d67ca89SAndroid Build Coastguard Worker * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14 1645*8d67ca89SAndroid Build Coastguard Worker */ 1646*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1647*8d67ca89SAndroid Build Coastguard WorkerL(ashr_14): 1648*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1649*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 1650*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 1651*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1652*8d67ca89SAndroid Build Coastguard Worker pslldq $2, %xmm2 1653*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 1654*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 1655*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 1656*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 1657*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 1658*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 1659*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 1660*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 1661*8d67ca89SAndroid Build Coastguard Worker 1662*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 1663*8d67ca89SAndroid Build Coastguard Worker 1664*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1665*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 1666*8d67ca89SAndroid Build Coastguard Worker mov $14, %r9d /* byte position left over from less32bytes case */ 1667*8d67ca89SAndroid Build Coastguard Worker /* 1668*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 1669*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 1670*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 1671*8d67ca89SAndroid Build Coastguard Worker */ 1672*8d67ca89SAndroid Build Coastguard Worker lea 14(%rdi), %r10 1673*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 1674*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 1675*8d67ca89SAndroid Build Coastguard Worker 1676*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1677*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_14): 1678*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1679*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_14) 1680*8d67ca89SAndroid Build Coastguard Worker 1681*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_14): 1682*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1683*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1684*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1685*8d67ca89SAndroid Build Coastguard Worker 1686*8d67ca89SAndroid Build Coastguard Worker palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */ 1687*8d67ca89SAndroid Build Coastguard Worker 1688*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1689*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1690*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1691*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1692*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1693*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1694*8d67ca89SAndroid Build Coastguard Worker 1695*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1696*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1697*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1698*8d67ca89SAndroid Build Coastguard Worker#endif 1699*8d67ca89SAndroid Build Coastguard Worker 1700*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1701*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1702*8d67ca89SAndroid Build Coastguard Worker 1703*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1704*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_14) /* cross page boundary */ 1705*8d67ca89SAndroid Build Coastguard Worker 1706*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1707*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1708*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1709*8d67ca89SAndroid Build Coastguard Worker 1710*8d67ca89SAndroid Build Coastguard Worker palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */ 1711*8d67ca89SAndroid Build Coastguard Worker 1712*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1713*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1714*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1715*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1716*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1717*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1718*8d67ca89SAndroid Build Coastguard Worker 1719*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1720*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1721*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1722*8d67ca89SAndroid Build Coastguard Worker#endif 1723*8d67ca89SAndroid Build Coastguard Worker 1724*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1725*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1726*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_14) 1727*8d67ca89SAndroid Build Coastguard Worker 1728*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1729*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_14): 1730*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 1731*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 1732*8d67ca89SAndroid Build Coastguard Worker test $0xc000, %edx 1733*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_14_exittail) 1734*8d67ca89SAndroid Build Coastguard Worker 1735*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1736*8d67ca89SAndroid Build Coastguard Worker cmp $1, %r11 1737*8d67ca89SAndroid Build Coastguard Worker jbe L(ashr_14_exittail) 1738*8d67ca89SAndroid Build Coastguard Worker#endif 1739*8d67ca89SAndroid Build Coastguard Worker 1740*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1741*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 1742*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_14) 1743*8d67ca89SAndroid Build Coastguard Worker 1744*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1745*8d67ca89SAndroid Build Coastguard WorkerL(ashr_14_exittail): 1746*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1747*8d67ca89SAndroid Build Coastguard Worker psrldq $14, %xmm0 1748*8d67ca89SAndroid Build Coastguard Worker psrldq $14, %xmm3 1749*8d67ca89SAndroid Build Coastguard Worker jmp L(aftertail) 1750*8d67ca89SAndroid Build Coastguard Worker 1751*8d67ca89SAndroid Build Coastguard Worker/* 1752*8d67ca89SAndroid Build Coastguard Worker * The following cases will be handled by ashr_15 1753*8d67ca89SAndroid Build Coastguard Worker * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1754*8d67ca89SAndroid Build Coastguard Worker * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15 1755*8d67ca89SAndroid Build Coastguard Worker */ 1756*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1757*8d67ca89SAndroid Build Coastguard WorkerL(ashr_15): 1758*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1759*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm2 1760*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi), %xmm1 1761*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1762*8d67ca89SAndroid Build Coastguard Worker pslldq $1, %xmm2 1763*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm2 1764*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm2 1765*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm2, %r9d 1766*8d67ca89SAndroid Build Coastguard Worker shr %cl, %edx 1767*8d67ca89SAndroid Build Coastguard Worker shr %cl, %r9d 1768*8d67ca89SAndroid Build Coastguard Worker sub %r9d, %edx 1769*8d67ca89SAndroid Build Coastguard Worker jnz L(less32bytes) 1770*8d67ca89SAndroid Build Coastguard Worker 1771*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi), %xmm3 1772*8d67ca89SAndroid Build Coastguard Worker 1773*8d67ca89SAndroid Build Coastguard Worker UPDATE_STRNCMP_COUNTER 1774*8d67ca89SAndroid Build Coastguard Worker 1775*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1776*8d67ca89SAndroid Build Coastguard Worker mov $16, %rcx /* index for loads */ 1777*8d67ca89SAndroid Build Coastguard Worker mov $15, %r9d /* byte position left over from less32bytes case */ 1778*8d67ca89SAndroid Build Coastguard Worker /* 1779*8d67ca89SAndroid Build Coastguard Worker * Setup %r10 value allows us to detect crossing a page boundary. 1780*8d67ca89SAndroid Build Coastguard Worker * When %r10 goes positive we have crossed a page boundary and 1781*8d67ca89SAndroid Build Coastguard Worker * need to do a nibble. 1782*8d67ca89SAndroid Build Coastguard Worker */ 1783*8d67ca89SAndroid Build Coastguard Worker lea 15(%rdi), %r10 1784*8d67ca89SAndroid Build Coastguard Worker and $0xfff, %r10 /* offset into 4K page */ 1785*8d67ca89SAndroid Build Coastguard Worker 1786*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 /* subtract 4K pagesize */ 1787*8d67ca89SAndroid Build Coastguard Worker 1788*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1789*8d67ca89SAndroid Build Coastguard WorkerL(loop_ashr_15): 1790*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1791*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_15) 1792*8d67ca89SAndroid Build Coastguard Worker 1793*8d67ca89SAndroid Build Coastguard WorkerL(gobble_ashr_15): 1794*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1795*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1796*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1797*8d67ca89SAndroid Build Coastguard Worker 1798*8d67ca89SAndroid Build Coastguard Worker palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */ 1799*8d67ca89SAndroid Build Coastguard Worker 1800*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1801*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1802*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1803*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1804*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1805*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1806*8d67ca89SAndroid Build Coastguard Worker 1807*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1808*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1809*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1810*8d67ca89SAndroid Build Coastguard Worker#endif 1811*8d67ca89SAndroid Build Coastguard Worker 1812*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1813*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1814*8d67ca89SAndroid Build Coastguard Worker 1815*8d67ca89SAndroid Build Coastguard Worker add $16, %r10 1816*8d67ca89SAndroid Build Coastguard Worker jg L(nibble_ashr_15) /* cross page boundary */ 1817*8d67ca89SAndroid Build Coastguard Worker 1818*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1819*8d67ca89SAndroid Build Coastguard Worker movdqa (%rdi, %rcx), %xmm2 1820*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm2, %xmm4 1821*8d67ca89SAndroid Build Coastguard Worker 1822*8d67ca89SAndroid Build Coastguard Worker palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */ 1823*8d67ca89SAndroid Build Coastguard Worker 1824*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm1, %xmm0 1825*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm2, %xmm1 1826*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1827*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1828*8d67ca89SAndroid Build Coastguard Worker sub $0xffff, %edx 1829*8d67ca89SAndroid Build Coastguard Worker jnz L(exit) 1830*8d67ca89SAndroid Build Coastguard Worker 1831*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1832*8d67ca89SAndroid Build Coastguard Worker sub $16, %r11 1833*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1834*8d67ca89SAndroid Build Coastguard Worker#endif 1835*8d67ca89SAndroid Build Coastguard Worker 1836*8d67ca89SAndroid Build Coastguard Worker add $16, %rcx 1837*8d67ca89SAndroid Build Coastguard Worker movdqa %xmm4, %xmm3 1838*8d67ca89SAndroid Build Coastguard Worker jmp L(loop_ashr_15) 1839*8d67ca89SAndroid Build Coastguard Worker 1840*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1841*8d67ca89SAndroid Build Coastguard WorkerL(nibble_ashr_15): 1842*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm0 /* check nibble for null char */ 1843*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm0, %edx 1844*8d67ca89SAndroid Build Coastguard Worker test $0x8000, %edx 1845*8d67ca89SAndroid Build Coastguard Worker jnz L(ashr_15_exittail) 1846*8d67ca89SAndroid Build Coastguard Worker 1847*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1848*8d67ca89SAndroid Build Coastguard Worker test %r11, %r11 1849*8d67ca89SAndroid Build Coastguard Worker je L(ashr_15_exittail) 1850*8d67ca89SAndroid Build Coastguard Worker#endif 1851*8d67ca89SAndroid Build Coastguard Worker 1852*8d67ca89SAndroid Build Coastguard Worker pxor %xmm0, %xmm0 1853*8d67ca89SAndroid Build Coastguard Worker sub $0x1000, %r10 1854*8d67ca89SAndroid Build Coastguard Worker jmp L(gobble_ashr_15) 1855*8d67ca89SAndroid Build Coastguard Worker 1856*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1857*8d67ca89SAndroid Build Coastguard WorkerL(ashr_15_exittail): 1858*8d67ca89SAndroid Build Coastguard Worker movdqa (%rsi, %rcx), %xmm1 1859*8d67ca89SAndroid Build Coastguard Worker psrldq $15, %xmm3 1860*8d67ca89SAndroid Build Coastguard Worker psrldq $15, %xmm0 1861*8d67ca89SAndroid Build Coastguard Worker 1862*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1863*8d67ca89SAndroid Build Coastguard WorkerL(aftertail): 1864*8d67ca89SAndroid Build Coastguard Worker pcmpeqb %xmm3, %xmm1 1865*8d67ca89SAndroid Build Coastguard Worker psubb %xmm0, %xmm1 1866*8d67ca89SAndroid Build Coastguard Worker pmovmskb %xmm1, %edx 1867*8d67ca89SAndroid Build Coastguard Worker not %edx 1868*8d67ca89SAndroid Build Coastguard Worker 1869*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1870*8d67ca89SAndroid Build Coastguard WorkerL(exit): 1871*8d67ca89SAndroid Build Coastguard Worker lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */ 1872*8d67ca89SAndroid Build Coastguard WorkerL(less32bytes): 1873*8d67ca89SAndroid Build Coastguard Worker lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */ 1874*8d67ca89SAndroid Build Coastguard Worker lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */ 1875*8d67ca89SAndroid Build Coastguard Worker test %r8d, %r8d 1876*8d67ca89SAndroid Build Coastguard Worker jz L(ret) 1877*8d67ca89SAndroid Build Coastguard Worker xchg %rsi, %rdi /* recover original order according to flag(%r8d) */ 1878*8d67ca89SAndroid Build Coastguard Worker 1879*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1880*8d67ca89SAndroid Build Coastguard WorkerL(ret): 1881*8d67ca89SAndroid Build Coastguard WorkerL(less16bytes): 1882*8d67ca89SAndroid Build Coastguard Worker bsf %rdx, %rdx /* find and store bit index in %rdx */ 1883*8d67ca89SAndroid Build Coastguard Worker 1884*8d67ca89SAndroid Build Coastguard Worker#ifdef USE_AS_STRNCMP 1885*8d67ca89SAndroid Build Coastguard Worker sub %rdx, %r11 1886*8d67ca89SAndroid Build Coastguard Worker jbe L(strcmp_exitz) 1887*8d67ca89SAndroid Build Coastguard Worker#endif 1888*8d67ca89SAndroid Build Coastguard Worker movzbl (%rsi, %rdx), %ecx 1889*8d67ca89SAndroid Build Coastguard Worker movzbl (%rdi, %rdx), %eax 1890*8d67ca89SAndroid Build Coastguard Worker 1891*8d67ca89SAndroid Build Coastguard Worker sub %ecx, %eax 1892*8d67ca89SAndroid Build Coastguard Worker ret 1893*8d67ca89SAndroid Build Coastguard Worker 1894*8d67ca89SAndroid Build Coastguard WorkerL(strcmp_exitz): 1895*8d67ca89SAndroid Build Coastguard Worker xor %eax, %eax 1896*8d67ca89SAndroid Build Coastguard Worker ret 1897*8d67ca89SAndroid Build Coastguard Worker 1898*8d67ca89SAndroid Build Coastguard Worker .p2align 4 1899*8d67ca89SAndroid Build Coastguard WorkerL(Byte0): 1900*8d67ca89SAndroid Build Coastguard Worker movzbl (%rsi), %ecx 1901*8d67ca89SAndroid Build Coastguard Worker movzbl (%rdi), %eax 1902*8d67ca89SAndroid Build Coastguard Worker 1903*8d67ca89SAndroid Build Coastguard Worker sub %ecx, %eax 1904*8d67ca89SAndroid Build Coastguard Worker ret 1905*8d67ca89SAndroid Build Coastguard WorkerEND (STRCMP) 1906*8d67ca89SAndroid Build Coastguard Worker 1907*8d67ca89SAndroid Build Coastguard Worker .section .rodata,"a",@progbits 1908*8d67ca89SAndroid Build Coastguard Worker .p2align 3 1909*8d67ca89SAndroid Build Coastguard WorkerL(unaligned_table): 1910*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_1) - L(unaligned_table) 1911*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_2) - L(unaligned_table) 1912*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_3) - L(unaligned_table) 1913*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_4) - L(unaligned_table) 1914*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_5) - L(unaligned_table) 1915*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_6) - L(unaligned_table) 1916*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_7) - L(unaligned_table) 1917*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_8) - L(unaligned_table) 1918*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_9) - L(unaligned_table) 1919*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_10) - L(unaligned_table) 1920*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_11) - L(unaligned_table) 1921*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_12) - L(unaligned_table) 1922*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_13) - L(unaligned_table) 1923*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_14) - L(unaligned_table) 1924*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_15) - L(unaligned_table) 1925*8d67ca89SAndroid Build Coastguard Worker .int L(ashr_0) - L(unaligned_table) 1926