1*795d594fSAndroid Build Coastguard Worker/* 2*795d594fSAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project 3*795d594fSAndroid Build Coastguard Worker * 4*795d594fSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*795d594fSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*795d594fSAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*795d594fSAndroid Build Coastguard Worker * 8*795d594fSAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*795d594fSAndroid Build Coastguard Worker * 10*795d594fSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*795d594fSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*795d594fSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*795d594fSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*795d594fSAndroid Build Coastguard Worker * limitations under the License. 15*795d594fSAndroid Build Coastguard Worker */ 16*795d594fSAndroid Build Coastguard Worker 17*795d594fSAndroid Build Coastguard Worker#ifndef ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_ 18*795d594fSAndroid Build Coastguard Worker#define ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_ 19*795d594fSAndroid Build Coastguard Worker 20*795d594fSAndroid Build Coastguard Worker#include "asm_support_arm.S" 21*795d594fSAndroid Build Coastguard Worker 22*795d594fSAndroid Build Coastguard Worker/* 23*795d594fSAndroid Build Coastguard Worker * Optimized memcmp16() for ARM9. 24*795d594fSAndroid Build Coastguard Worker * This would not be optimal on XScale or ARM11, where more prefetching 25*795d594fSAndroid Build Coastguard Worker * and use of pld will be needed. 26*795d594fSAndroid Build Coastguard Worker * The 2 major optimzations here are 27*795d594fSAndroid Build Coastguard Worker * (1) The main loop compares 16 bytes at a time 28*795d594fSAndroid Build Coastguard Worker * (2) The loads are scheduled in a way they won't stall 29*795d594fSAndroid Build Coastguard Worker */ 30*795d594fSAndroid Build Coastguard Worker 31*795d594fSAndroid Build Coastguard WorkerARM_ENTRY __memcmp16 32*795d594fSAndroid Build Coastguard Worker pld [r0, #0] 33*795d594fSAndroid Build Coastguard Worker pld [r1, #0] 34*795d594fSAndroid Build Coastguard Worker 35*795d594fSAndroid Build Coastguard Worker /* take of the case where length is nul or the buffers are the same */ 36*795d594fSAndroid Build Coastguard Worker cmp r0, r1 37*795d594fSAndroid Build Coastguard Worker cmpne r2, #0 38*795d594fSAndroid Build Coastguard Worker moveq r0, #0 39*795d594fSAndroid Build Coastguard Worker bxeq lr 40*795d594fSAndroid Build Coastguard Worker 41*795d594fSAndroid Build Coastguard Worker /* since r0 hold the result, move the first source 42*795d594fSAndroid Build Coastguard Worker * pointer somewhere else 43*795d594fSAndroid Build Coastguard Worker */ 44*795d594fSAndroid Build Coastguard Worker 45*795d594fSAndroid Build Coastguard Worker mov r3, r0 46*795d594fSAndroid Build Coastguard Worker 47*795d594fSAndroid Build Coastguard Worker /* make sure we have at least 12 words, this simplify things below 48*795d594fSAndroid Build Coastguard Worker * and avoid some overhead for small blocks 49*795d594fSAndroid Build Coastguard Worker */ 50*795d594fSAndroid Build Coastguard Worker 51*795d594fSAndroid Build Coastguard Worker cmp r2, #12 52*795d594fSAndroid Build Coastguard Worker bpl 0f 53*795d594fSAndroid Build Coastguard Worker 54*795d594fSAndroid Build Coastguard Worker /* small blocks (less then 12 words) */ 55*795d594fSAndroid Build Coastguard Worker pld [r0, #32] 56*795d594fSAndroid Build Coastguard Worker pld [r1, #32] 57*795d594fSAndroid Build Coastguard Worker 58*795d594fSAndroid Build Coastguard Worker1: ldrh r0, [r3], #2 59*795d594fSAndroid Build Coastguard Worker ldrh ip, [r1], #2 60*795d594fSAndroid Build Coastguard Worker subs r0, r0, ip 61*795d594fSAndroid Build Coastguard Worker bxne lr 62*795d594fSAndroid Build Coastguard Worker subs r2, r2, #1 63*795d594fSAndroid Build Coastguard Worker bne 1b 64*795d594fSAndroid Build Coastguard Worker bx lr 65*795d594fSAndroid Build Coastguard Worker 66*795d594fSAndroid Build Coastguard Worker 67*795d594fSAndroid Build Coastguard Worker /* save registers */ 68*795d594fSAndroid Build Coastguard Worker0: push {r4, lr} 69*795d594fSAndroid Build Coastguard Worker .cfi_def_cfa_offset 8 70*795d594fSAndroid Build Coastguard Worker .cfi_rel_offset r4, 0 71*795d594fSAndroid Build Coastguard Worker .cfi_rel_offset lr, 4 72*795d594fSAndroid Build Coastguard Worker 73*795d594fSAndroid Build Coastguard Worker /* align first pointer to word boundary */ 74*795d594fSAndroid Build Coastguard Worker tst r3, #2 75*795d594fSAndroid Build Coastguard Worker beq 0f 76*795d594fSAndroid Build Coastguard Worker 77*795d594fSAndroid Build Coastguard Worker ldrh r0, [r3], #2 78*795d594fSAndroid Build Coastguard Worker ldrh ip, [r1], #2 79*795d594fSAndroid Build Coastguard Worker sub r2, r2, #1 80*795d594fSAndroid Build Coastguard Worker subs r0, r0, ip 81*795d594fSAndroid Build Coastguard Worker /* restore registers and return */ 82*795d594fSAndroid Build Coastguard Worker popne {r4, lr} 83*795d594fSAndroid Build Coastguard Worker bxne lr 84*795d594fSAndroid Build Coastguard Worker 85*795d594fSAndroid Build Coastguard Worker 86*795d594fSAndroid Build Coastguard Worker0: /* here the first pointer is aligned, and we have at least 3 words 87*795d594fSAndroid Build Coastguard Worker * to process. 88*795d594fSAndroid Build Coastguard Worker */ 89*795d594fSAndroid Build Coastguard Worker 90*795d594fSAndroid Build Coastguard Worker /* see if the pointers are congruent */ 91*795d594fSAndroid Build Coastguard Worker eor r0, r3, r1 92*795d594fSAndroid Build Coastguard Worker ands r0, r0, #2 93*795d594fSAndroid Build Coastguard Worker bne 5f 94*795d594fSAndroid Build Coastguard Worker 95*795d594fSAndroid Build Coastguard Worker /* congruent case, 16 half-words per iteration 96*795d594fSAndroid Build Coastguard Worker * We need to make sure there are at least 16+2 words left 97*795d594fSAndroid Build Coastguard Worker * because we effectively read ahead one long word, and we could 98*795d594fSAndroid Build Coastguard Worker * read past the buffer (and segfault) if we're not careful. 99*795d594fSAndroid Build Coastguard Worker */ 100*795d594fSAndroid Build Coastguard Worker 101*795d594fSAndroid Build Coastguard Worker ldr ip, [r1] 102*795d594fSAndroid Build Coastguard Worker subs r2, r2, #(16 + 2) 103*795d594fSAndroid Build Coastguard Worker bmi 1f 104*795d594fSAndroid Build Coastguard Worker 105*795d594fSAndroid Build Coastguard Worker0: 106*795d594fSAndroid Build Coastguard Worker pld [r3, #64] 107*795d594fSAndroid Build Coastguard Worker pld [r1, #64] 108*795d594fSAndroid Build Coastguard Worker ldr r0, [r3], #4 109*795d594fSAndroid Build Coastguard Worker ldr lr, [r1, #4]! 110*795d594fSAndroid Build Coastguard Worker eors r0, r0, ip 111*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 112*795d594fSAndroid Build Coastguard Worker ldreq ip, [r1, #4]! 113*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, lr 114*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 115*795d594fSAndroid Build Coastguard Worker ldreq lr, [r1, #4]! 116*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, ip 117*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 118*795d594fSAndroid Build Coastguard Worker ldreq ip, [r1, #4]! 119*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, lr 120*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 121*795d594fSAndroid Build Coastguard Worker ldreq lr, [r1, #4]! 122*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, ip 123*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 124*795d594fSAndroid Build Coastguard Worker ldreq ip, [r1, #4]! 125*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, lr 126*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 127*795d594fSAndroid Build Coastguard Worker ldreq lr, [r1, #4]! 128*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, ip 129*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 130*795d594fSAndroid Build Coastguard Worker ldreq ip, [r1, #4]! 131*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, lr 132*795d594fSAndroid Build Coastguard Worker bne 2f 133*795d594fSAndroid Build Coastguard Worker subs r2, r2, #16 134*795d594fSAndroid Build Coastguard Worker bhs 0b 135*795d594fSAndroid Build Coastguard Worker 136*795d594fSAndroid Build Coastguard Worker /* do we have at least 2 words left? */ 137*795d594fSAndroid Build Coastguard Worker1: adds r2, r2, #(16 - 2 + 2) 138*795d594fSAndroid Build Coastguard Worker bmi 4f 139*795d594fSAndroid Build Coastguard Worker 140*795d594fSAndroid Build Coastguard Worker /* finish off 2 words at a time */ 141*795d594fSAndroid Build Coastguard Worker3: ldr r0, [r3], #4 142*795d594fSAndroid Build Coastguard Worker ldr ip, [r1], #4 143*795d594fSAndroid Build Coastguard Worker eors r0, r0, ip 144*795d594fSAndroid Build Coastguard Worker bne 2f 145*795d594fSAndroid Build Coastguard Worker subs r2, r2, #2 146*795d594fSAndroid Build Coastguard Worker bhs 3b 147*795d594fSAndroid Build Coastguard Worker 148*795d594fSAndroid Build Coastguard Worker /* are we done? */ 149*795d594fSAndroid Build Coastguard Worker4: adds r2, r2, #2 150*795d594fSAndroid Build Coastguard Worker bne 8f 151*795d594fSAndroid Build Coastguard Worker /* restore registers and return */ 152*795d594fSAndroid Build Coastguard Worker mov r0, #0 153*795d594fSAndroid Build Coastguard Worker pop {r4, pc} 154*795d594fSAndroid Build Coastguard Worker 155*795d594fSAndroid Build Coastguard Worker2: /* the last 2 words are different, restart them */ 156*795d594fSAndroid Build Coastguard Worker ldrh r0, [r3, #-4] 157*795d594fSAndroid Build Coastguard Worker ldrh ip, [r1, #-4] 158*795d594fSAndroid Build Coastguard Worker subs r0, r0, ip 159*795d594fSAndroid Build Coastguard Worker ldrheq r0, [r3, #-2] 160*795d594fSAndroid Build Coastguard Worker ldrheq ip, [r1, #-2] 161*795d594fSAndroid Build Coastguard Worker subseq r0, r0, ip 162*795d594fSAndroid Build Coastguard Worker /* restore registers and return */ 163*795d594fSAndroid Build Coastguard Worker pop {r4, pc} 164*795d594fSAndroid Build Coastguard Worker 165*795d594fSAndroid Build Coastguard Worker /* process the last few words */ 166*795d594fSAndroid Build Coastguard Worker8: ldrh r0, [r3], #2 167*795d594fSAndroid Build Coastguard Worker ldrh ip, [r1], #2 168*795d594fSAndroid Build Coastguard Worker subs r0, r0, ip 169*795d594fSAndroid Build Coastguard Worker bne 9f 170*795d594fSAndroid Build Coastguard Worker subs r2, r2, #1 171*795d594fSAndroid Build Coastguard Worker bne 8b 172*795d594fSAndroid Build Coastguard Worker 173*795d594fSAndroid Build Coastguard Worker9: /* restore registers and return */ 174*795d594fSAndroid Build Coastguard Worker pop {r4, pc} 175*795d594fSAndroid Build Coastguard Worker 176*795d594fSAndroid Build Coastguard Worker5: /*************** non-congruent case ***************/ 177*795d594fSAndroid Build Coastguard Worker 178*795d594fSAndroid Build Coastguard Worker /* align the unaligned pointer */ 179*795d594fSAndroid Build Coastguard Worker bic r1, r1, #3 180*795d594fSAndroid Build Coastguard Worker ldr lr, [r1], #4 181*795d594fSAndroid Build Coastguard Worker sub r2, r2, #8 182*795d594fSAndroid Build Coastguard Worker 183*795d594fSAndroid Build Coastguard Worker6: 184*795d594fSAndroid Build Coastguard Worker pld [r3, #64] 185*795d594fSAndroid Build Coastguard Worker pld [r1, #64] 186*795d594fSAndroid Build Coastguard Worker mov ip, lr, lsr #16 187*795d594fSAndroid Build Coastguard Worker ldr lr, [r1], #4 188*795d594fSAndroid Build Coastguard Worker ldr r0, [r3], #4 189*795d594fSAndroid Build Coastguard Worker orr ip, ip, lr, lsl #16 190*795d594fSAndroid Build Coastguard Worker eors r0, r0, ip 191*795d594fSAndroid Build Coastguard Worker moveq ip, lr, lsr #16 192*795d594fSAndroid Build Coastguard Worker ldreq lr, [r1], #4 193*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 194*795d594fSAndroid Build Coastguard Worker orreq ip, ip, lr, lsl #16 195*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, ip 196*795d594fSAndroid Build Coastguard Worker moveq ip, lr, lsr #16 197*795d594fSAndroid Build Coastguard Worker ldreq lr, [r1], #4 198*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 199*795d594fSAndroid Build Coastguard Worker orreq ip, ip, lr, lsl #16 200*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, ip 201*795d594fSAndroid Build Coastguard Worker moveq ip, lr, lsr #16 202*795d594fSAndroid Build Coastguard Worker ldreq lr, [r1], #4 203*795d594fSAndroid Build Coastguard Worker ldreq r0, [r3], #4 204*795d594fSAndroid Build Coastguard Worker orreq ip, ip, lr, lsl #16 205*795d594fSAndroid Build Coastguard Worker eorseq r0, r0, ip 206*795d594fSAndroid Build Coastguard Worker bne 7f 207*795d594fSAndroid Build Coastguard Worker subs r2, r2, #8 208*795d594fSAndroid Build Coastguard Worker bhs 6b 209*795d594fSAndroid Build Coastguard Worker sub r1, r1, #2 210*795d594fSAndroid Build Coastguard Worker /* are we done? */ 211*795d594fSAndroid Build Coastguard Worker adds r2, r2, #8 212*795d594fSAndroid Build Coastguard Worker moveq r0, #0 213*795d594fSAndroid Build Coastguard Worker beq 9b 214*795d594fSAndroid Build Coastguard Worker /* finish off the remaining bytes */ 215*795d594fSAndroid Build Coastguard Worker b 8b 216*795d594fSAndroid Build Coastguard Worker 217*795d594fSAndroid Build Coastguard Worker7: /* fix up the 2 pointers and fallthrough... */ 218*795d594fSAndroid Build Coastguard Worker sub r1, r1, #2 219*795d594fSAndroid Build Coastguard Worker b 2b 220*795d594fSAndroid Build Coastguard WorkerEND __memcmp16 221*795d594fSAndroid Build Coastguard Worker 222*795d594fSAndroid Build Coastguard Worker 223*795d594fSAndroid Build Coastguard Worker#endif // ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_ 224