1*795d594fSAndroid Build Coastguard Worker/* 2*795d594fSAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project 3*795d594fSAndroid Build Coastguard Worker * 4*795d594fSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*795d594fSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*795d594fSAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*795d594fSAndroid Build Coastguard Worker * 8*795d594fSAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*795d594fSAndroid Build Coastguard Worker * 10*795d594fSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*795d594fSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*795d594fSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*795d594fSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*795d594fSAndroid Build Coastguard Worker * limitations under the License. 15*795d594fSAndroid Build Coastguard Worker */ 16*795d594fSAndroid Build Coastguard Worker 17*795d594fSAndroid Build Coastguard Worker/* Assumptions: 18*795d594fSAndroid Build Coastguard Worker * 19*795d594fSAndroid Build Coastguard Worker * ARMv8-a, AArch64 20*795d594fSAndroid Build Coastguard Worker */ 21*795d594fSAndroid Build Coastguard Worker 22*795d594fSAndroid Build Coastguard Worker#ifndef ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_ 23*795d594fSAndroid Build Coastguard Worker#define ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_ 24*795d594fSAndroid Build Coastguard Worker 25*795d594fSAndroid Build Coastguard Worker#include "asm_support_arm64.S" 26*795d594fSAndroid Build Coastguard Worker 27*795d594fSAndroid Build Coastguard Worker/* Parameters and result. */ 28*795d594fSAndroid Build Coastguard Worker#define src1 x0 29*795d594fSAndroid Build Coastguard Worker#define src2 x1 30*795d594fSAndroid Build Coastguard Worker#define limit x2 31*795d594fSAndroid Build Coastguard Worker#define result x0 32*795d594fSAndroid Build Coastguard Worker 33*795d594fSAndroid Build Coastguard Worker/* Internal variables. */ 34*795d594fSAndroid Build Coastguard Worker#define data1 x3 35*795d594fSAndroid Build Coastguard Worker#define data1w w3 36*795d594fSAndroid Build Coastguard Worker#define data2 x4 37*795d594fSAndroid Build Coastguard Worker#define data2w w4 38*795d594fSAndroid Build Coastguard Worker#define has_nul x5 39*795d594fSAndroid Build Coastguard Worker#define diff x6 40*795d594fSAndroid Build Coastguard Worker#define endloop x7 41*795d594fSAndroid Build Coastguard Worker#define tmp1 x8 42*795d594fSAndroid Build Coastguard Worker#define tmp2 x9 43*795d594fSAndroid Build Coastguard Worker#define tmp3 x10 44*795d594fSAndroid Build Coastguard Worker#define limit_wd x12 45*795d594fSAndroid Build Coastguard Worker#define mask x13 46*795d594fSAndroid Build Coastguard Worker 47*795d594fSAndroid Build Coastguard Worker// WARNING: If you change this code to use x14 and x15, you must also change 48*795d594fSAndroid Build Coastguard Worker// art_quick_string_compareto, which relies on these temps being unused. 49*795d594fSAndroid Build Coastguard Worker 50*795d594fSAndroid Build Coastguard WorkerENTRY __memcmp16 51*795d594fSAndroid Build Coastguard Worker cbz limit, .Lret0 52*795d594fSAndroid Build Coastguard Worker lsl limit, limit, #1 /* Half-words to bytes. */ 53*795d594fSAndroid Build Coastguard Worker eor tmp1, src1, src2 54*795d594fSAndroid Build Coastguard Worker tst tmp1, #7 55*795d594fSAndroid Build Coastguard Worker b.ne .Lmisaligned8 56*795d594fSAndroid Build Coastguard Worker ands tmp1, src1, #7 57*795d594fSAndroid Build Coastguard Worker b.ne .Lmutual_align 58*795d594fSAndroid Build Coastguard Worker add limit_wd, limit, #7 59*795d594fSAndroid Build Coastguard Worker lsr limit_wd, limit_wd, #3 60*795d594fSAndroid Build Coastguard Worker /* Start of performance-critical section -- one 64B cache line. */ 61*795d594fSAndroid Build Coastguard Worker.Lloop_aligned: 62*795d594fSAndroid Build Coastguard Worker ldr data1, [src1], #8 63*795d594fSAndroid Build Coastguard Worker ldr data2, [src2], #8 64*795d594fSAndroid Build Coastguard Worker.Lstart_realigned: 65*795d594fSAndroid Build Coastguard Worker subs limit_wd, limit_wd, #1 66*795d594fSAndroid Build Coastguard Worker eor diff, data1, data2 /* Non-zero if differences found. */ 67*795d594fSAndroid Build Coastguard Worker csinv endloop, diff, xzr, ne /* Last Dword or differences. */ 68*795d594fSAndroid Build Coastguard Worker cbz endloop, .Lloop_aligned 69*795d594fSAndroid Build Coastguard Worker /* End of performance-critical section -- one 64B cache line. */ 70*795d594fSAndroid Build Coastguard Worker 71*795d594fSAndroid Build Coastguard Worker /* Not reached the limit, must have found a diff. */ 72*795d594fSAndroid Build Coastguard Worker cbnz limit_wd, .Lnot_limit 73*795d594fSAndroid Build Coastguard Worker 74*795d594fSAndroid Build Coastguard Worker /* Limit % 8 == 0 => all bytes significant. */ 75*795d594fSAndroid Build Coastguard Worker ands limit, limit, #7 76*795d594fSAndroid Build Coastguard Worker b.eq .Lnot_limit 77*795d594fSAndroid Build Coastguard Worker 78*795d594fSAndroid Build Coastguard Worker lsl limit, limit, #3 /* Bits -> bytes. */ 79*795d594fSAndroid Build Coastguard Worker mov mask, #~0 80*795d594fSAndroid Build Coastguard Worker lsl mask, mask, limit 81*795d594fSAndroid Build Coastguard Worker bic data1, data1, mask 82*795d594fSAndroid Build Coastguard Worker bic data2, data2, mask 83*795d594fSAndroid Build Coastguard Worker 84*795d594fSAndroid Build Coastguard Worker.Lnot_limit: 85*795d594fSAndroid Build Coastguard Worker 86*795d594fSAndroid Build Coastguard Worker // Swap the byte order of diff. Exact reverse is not important, as we only need to detect 87*795d594fSAndroid Build Coastguard Worker // the half-word. 88*795d594fSAndroid Build Coastguard Worker rev diff, diff 89*795d594fSAndroid Build Coastguard Worker // The most significant bit of DIFF marks the least significant bit of change between DATA1/2 90*795d594fSAndroid Build Coastguard Worker clz diff, diff 91*795d594fSAndroid Build Coastguard Worker // Mask off 0xF to have shift amount. Why does ARM64 not have BIC with immediate?!?! 92*795d594fSAndroid Build Coastguard Worker bfi diff, xzr, #0, #4 93*795d594fSAndroid Build Coastguard Worker // Create a 16b mask 94*795d594fSAndroid Build Coastguard Worker mov mask, #0xFFFF 95*795d594fSAndroid Build Coastguard Worker // Shift to the right half-word. 96*795d594fSAndroid Build Coastguard Worker lsr data1, data1, diff 97*795d594fSAndroid Build Coastguard Worker lsr data2, data2, diff 98*795d594fSAndroid Build Coastguard Worker // Mask the lowest half-word. 99*795d594fSAndroid Build Coastguard Worker and data1, data1, mask 100*795d594fSAndroid Build Coastguard Worker and data2, data2, mask 101*795d594fSAndroid Build Coastguard Worker // Compute difference. 102*795d594fSAndroid Build Coastguard Worker sub result, data1, data2 103*795d594fSAndroid Build Coastguard Worker ret 104*795d594fSAndroid Build Coastguard Worker 105*795d594fSAndroid Build Coastguard Worker.Lmutual_align: 106*795d594fSAndroid Build Coastguard Worker /* Sources are mutually aligned, but are not currently at an 107*795d594fSAndroid Build Coastguard Worker alignment boundary. Round down the addresses and then mask off 108*795d594fSAndroid Build Coastguard Worker the bytes that precede the start point. */ 109*795d594fSAndroid Build Coastguard Worker bic src1, src1, #7 110*795d594fSAndroid Build Coastguard Worker bic src2, src2, #7 111*795d594fSAndroid Build Coastguard Worker add limit, limit, tmp1 /* Adjust the limit for the extra. */ 112*795d594fSAndroid Build Coastguard Worker lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 113*795d594fSAndroid Build Coastguard Worker ldr data1, [src1], #8 114*795d594fSAndroid Build Coastguard Worker neg tmp1, tmp1 /* Bits to alignment -64. */ 115*795d594fSAndroid Build Coastguard Worker ldr data2, [src2], #8 116*795d594fSAndroid Build Coastguard Worker mov tmp2, #~0 117*795d594fSAndroid Build Coastguard Worker /* Little-endian. Early bytes are at LSB. */ 118*795d594fSAndroid Build Coastguard Worker lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 119*795d594fSAndroid Build Coastguard Worker add limit_wd, limit, #7 120*795d594fSAndroid Build Coastguard Worker orr data1, data1, tmp2 121*795d594fSAndroid Build Coastguard Worker orr data2, data2, tmp2 122*795d594fSAndroid Build Coastguard Worker lsr limit_wd, limit_wd, #3 123*795d594fSAndroid Build Coastguard Worker b .Lstart_realigned 124*795d594fSAndroid Build Coastguard Worker 125*795d594fSAndroid Build Coastguard Worker.Lret0: 126*795d594fSAndroid Build Coastguard Worker mov result, #0 127*795d594fSAndroid Build Coastguard Worker ret 128*795d594fSAndroid Build Coastguard Worker 129*795d594fSAndroid Build Coastguard Worker .p2align 6 130*795d594fSAndroid Build Coastguard Worker.Lmisaligned8: 131*795d594fSAndroid Build Coastguard Worker sub limit, limit, #1 132*795d594fSAndroid Build Coastguard Worker1: 133*795d594fSAndroid Build Coastguard Worker /* Perhaps we can do better than this. */ 134*795d594fSAndroid Build Coastguard Worker ldrh data1w, [src1], #2 135*795d594fSAndroid Build Coastguard Worker ldrh data2w, [src2], #2 136*795d594fSAndroid Build Coastguard Worker subs limit, limit, #2 137*795d594fSAndroid Build Coastguard Worker ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 138*795d594fSAndroid Build Coastguard Worker b.eq 1b 139*795d594fSAndroid Build Coastguard Worker sub result, data1, data2 140*795d594fSAndroid Build Coastguard Worker ret 141*795d594fSAndroid Build Coastguard WorkerEND __memcmp16 142*795d594fSAndroid Build Coastguard Worker 143*795d594fSAndroid Build Coastguard Worker#endif // ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_ 144