xref: /aosp_15_r20/art/runtime/arch/arm64/memcmp16_arm64.S (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1*795d594fSAndroid Build Coastguard Worker/*
2*795d594fSAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project
3*795d594fSAndroid Build Coastguard Worker *
4*795d594fSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*795d594fSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*795d594fSAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*795d594fSAndroid Build Coastguard Worker *
8*795d594fSAndroid Build Coastguard Worker *      http://www.apache.org/licenses/LICENSE-2.0
9*795d594fSAndroid Build Coastguard Worker *
10*795d594fSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*795d594fSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*795d594fSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*795d594fSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*795d594fSAndroid Build Coastguard Worker * limitations under the License.
15*795d594fSAndroid Build Coastguard Worker */
16*795d594fSAndroid Build Coastguard Worker
17*795d594fSAndroid Build Coastguard Worker/* Assumptions:
18*795d594fSAndroid Build Coastguard Worker *
19*795d594fSAndroid Build Coastguard Worker * ARMv8-a, AArch64
20*795d594fSAndroid Build Coastguard Worker */
21*795d594fSAndroid Build Coastguard Worker
22*795d594fSAndroid Build Coastguard Worker#ifndef ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
23*795d594fSAndroid Build Coastguard Worker#define ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
24*795d594fSAndroid Build Coastguard Worker
25*795d594fSAndroid Build Coastguard Worker#include "asm_support_arm64.S"
26*795d594fSAndroid Build Coastguard Worker
27*795d594fSAndroid Build Coastguard Worker/* Parameters and result.  */
28*795d594fSAndroid Build Coastguard Worker#define src1        x0
29*795d594fSAndroid Build Coastguard Worker#define src2        x1
30*795d594fSAndroid Build Coastguard Worker#define limit       x2
31*795d594fSAndroid Build Coastguard Worker#define result      x0
32*795d594fSAndroid Build Coastguard Worker
33*795d594fSAndroid Build Coastguard Worker/* Internal variables.  */
34*795d594fSAndroid Build Coastguard Worker#define data1       x3
35*795d594fSAndroid Build Coastguard Worker#define data1w      w3
36*795d594fSAndroid Build Coastguard Worker#define data2       x4
37*795d594fSAndroid Build Coastguard Worker#define data2w      w4
38*795d594fSAndroid Build Coastguard Worker#define has_nul     x5
39*795d594fSAndroid Build Coastguard Worker#define diff        x6
40*795d594fSAndroid Build Coastguard Worker#define endloop     x7
41*795d594fSAndroid Build Coastguard Worker#define tmp1        x8
42*795d594fSAndroid Build Coastguard Worker#define tmp2        x9
43*795d594fSAndroid Build Coastguard Worker#define tmp3        x10
44*795d594fSAndroid Build Coastguard Worker#define limit_wd    x12
45*795d594fSAndroid Build Coastguard Worker#define mask        x13
46*795d594fSAndroid Build Coastguard Worker
47*795d594fSAndroid Build Coastguard Worker// WARNING: If you change this code to use x14 and x15, you must also change
48*795d594fSAndroid Build Coastguard Worker//          art_quick_string_compareto, which relies on these temps being unused.
49*795d594fSAndroid Build Coastguard Worker
50*795d594fSAndroid Build Coastguard WorkerENTRY __memcmp16
51*795d594fSAndroid Build Coastguard Worker  cbz     limit, .Lret0
52*795d594fSAndroid Build Coastguard Worker  lsl     limit, limit, #1  /* Half-words to bytes.  */
53*795d594fSAndroid Build Coastguard Worker  eor     tmp1, src1, src2
54*795d594fSAndroid Build Coastguard Worker  tst     tmp1, #7
55*795d594fSAndroid Build Coastguard Worker  b.ne    .Lmisaligned8
56*795d594fSAndroid Build Coastguard Worker  ands    tmp1, src1, #7
57*795d594fSAndroid Build Coastguard Worker  b.ne    .Lmutual_align
58*795d594fSAndroid Build Coastguard Worker  add     limit_wd, limit, #7
59*795d594fSAndroid Build Coastguard Worker  lsr     limit_wd, limit_wd, #3
60*795d594fSAndroid Build Coastguard Worker  /* Start of performance-critical section  -- one 64B cache line.  */
61*795d594fSAndroid Build Coastguard Worker.Lloop_aligned:
62*795d594fSAndroid Build Coastguard Worker  ldr     data1, [src1], #8
63*795d594fSAndroid Build Coastguard Worker  ldr     data2, [src2], #8
64*795d594fSAndroid Build Coastguard Worker.Lstart_realigned:
65*795d594fSAndroid Build Coastguard Worker  subs    limit_wd, limit_wd, #1
66*795d594fSAndroid Build Coastguard Worker  eor     diff, data1, data2  /* Non-zero if differences found.  */
67*795d594fSAndroid Build Coastguard Worker  csinv   endloop, diff, xzr, ne  /* Last Dword or differences.  */
68*795d594fSAndroid Build Coastguard Worker  cbz     endloop, .Lloop_aligned
69*795d594fSAndroid Build Coastguard Worker  /* End of performance-critical section  -- one 64B cache line.  */
70*795d594fSAndroid Build Coastguard Worker
71*795d594fSAndroid Build Coastguard Worker  /* Not reached the limit, must have found a diff.  */
72*795d594fSAndroid Build Coastguard Worker  cbnz    limit_wd, .Lnot_limit
73*795d594fSAndroid Build Coastguard Worker
74*795d594fSAndroid Build Coastguard Worker  /* Limit % 8 == 0 => all bytes significant.  */
75*795d594fSAndroid Build Coastguard Worker  ands    limit, limit, #7
76*795d594fSAndroid Build Coastguard Worker  b.eq    .Lnot_limit
77*795d594fSAndroid Build Coastguard Worker
78*795d594fSAndroid Build Coastguard Worker  lsl     limit, limit, #3  /* Bits -> bytes.  */
79*795d594fSAndroid Build Coastguard Worker  mov     mask, #~0
80*795d594fSAndroid Build Coastguard Worker  lsl     mask, mask, limit
81*795d594fSAndroid Build Coastguard Worker  bic     data1, data1, mask
82*795d594fSAndroid Build Coastguard Worker  bic     data2, data2, mask
83*795d594fSAndroid Build Coastguard Worker
84*795d594fSAndroid Build Coastguard Worker.Lnot_limit:
85*795d594fSAndroid Build Coastguard Worker
86*795d594fSAndroid Build Coastguard Worker  // Swap the byte order of diff. Exact reverse is not important, as we only need to detect
87*795d594fSAndroid Build Coastguard Worker  // the half-word.
88*795d594fSAndroid Build Coastguard Worker  rev     diff, diff
89*795d594fSAndroid Build Coastguard Worker  // The most significant bit of DIFF marks the least significant bit of change between DATA1/2
90*795d594fSAndroid Build Coastguard Worker  clz     diff, diff
91*795d594fSAndroid Build Coastguard Worker  // Mask off 0xF to have shift amount. Why does ARM64 not have BIC with immediate?!?!
92*795d594fSAndroid Build Coastguard Worker  bfi     diff, xzr, #0, #4
93*795d594fSAndroid Build Coastguard Worker  // Create a 16b mask
94*795d594fSAndroid Build Coastguard Worker  mov     mask, #0xFFFF
95*795d594fSAndroid Build Coastguard Worker  // Shift to the right half-word.
96*795d594fSAndroid Build Coastguard Worker  lsr     data1, data1, diff
97*795d594fSAndroid Build Coastguard Worker  lsr     data2, data2, diff
98*795d594fSAndroid Build Coastguard Worker  // Mask the lowest half-word.
99*795d594fSAndroid Build Coastguard Worker  and     data1, data1, mask
100*795d594fSAndroid Build Coastguard Worker  and     data2, data2, mask
101*795d594fSAndroid Build Coastguard Worker  // Compute difference.
102*795d594fSAndroid Build Coastguard Worker  sub     result, data1, data2
103*795d594fSAndroid Build Coastguard Worker  ret
104*795d594fSAndroid Build Coastguard Worker
105*795d594fSAndroid Build Coastguard Worker.Lmutual_align:
106*795d594fSAndroid Build Coastguard Worker  /* Sources are mutually aligned, but are not currently at an
107*795d594fSAndroid Build Coastguard Worker     alignment boundary.  Round down the addresses and then mask off
108*795d594fSAndroid Build Coastguard Worker     the bytes that precede the start point.  */
109*795d594fSAndroid Build Coastguard Worker  bic     src1, src1, #7
110*795d594fSAndroid Build Coastguard Worker  bic     src2, src2, #7
111*795d594fSAndroid Build Coastguard Worker  add     limit, limit, tmp1  /* Adjust the limit for the extra.  */
112*795d594fSAndroid Build Coastguard Worker  lsl     tmp1, tmp1, #3    /* Bytes beyond alignment -> bits.  */
113*795d594fSAndroid Build Coastguard Worker  ldr     data1, [src1], #8
114*795d594fSAndroid Build Coastguard Worker  neg     tmp1, tmp1    /* Bits to alignment -64.  */
115*795d594fSAndroid Build Coastguard Worker  ldr     data2, [src2], #8
116*795d594fSAndroid Build Coastguard Worker  mov     tmp2, #~0
117*795d594fSAndroid Build Coastguard Worker  /* Little-endian.  Early bytes are at LSB.  */
118*795d594fSAndroid Build Coastguard Worker  lsr     tmp2, tmp2, tmp1  /* Shift (tmp1 & 63).  */
119*795d594fSAndroid Build Coastguard Worker  add     limit_wd, limit, #7
120*795d594fSAndroid Build Coastguard Worker  orr     data1, data1, tmp2
121*795d594fSAndroid Build Coastguard Worker  orr     data2, data2, tmp2
122*795d594fSAndroid Build Coastguard Worker  lsr     limit_wd, limit_wd, #3
123*795d594fSAndroid Build Coastguard Worker  b       .Lstart_realigned
124*795d594fSAndroid Build Coastguard Worker
125*795d594fSAndroid Build Coastguard Worker.Lret0:
126*795d594fSAndroid Build Coastguard Worker  mov     result, #0
127*795d594fSAndroid Build Coastguard Worker  ret
128*795d594fSAndroid Build Coastguard Worker
129*795d594fSAndroid Build Coastguard Worker  .p2align 6
130*795d594fSAndroid Build Coastguard Worker.Lmisaligned8:
131*795d594fSAndroid Build Coastguard Worker  sub     limit, limit, #1
132*795d594fSAndroid Build Coastguard Worker1:
133*795d594fSAndroid Build Coastguard Worker  /* Perhaps we can do better than this.  */
134*795d594fSAndroid Build Coastguard Worker  ldrh    data1w, [src1], #2
135*795d594fSAndroid Build Coastguard Worker  ldrh    data2w, [src2], #2
136*795d594fSAndroid Build Coastguard Worker  subs    limit, limit, #2
137*795d594fSAndroid Build Coastguard Worker  ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
138*795d594fSAndroid Build Coastguard Worker  b.eq    1b
139*795d594fSAndroid Build Coastguard Worker  sub     result, data1, data2
140*795d594fSAndroid Build Coastguard Worker  ret
141*795d594fSAndroid Build Coastguard WorkerEND __memcmp16
142*795d594fSAndroid Build Coastguard Worker
143*795d594fSAndroid Build Coastguard Worker#endif  // ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
144