xref: /aosp_15_r20/art/runtime/arch/arm/memcmp16_arm.S (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1*795d594fSAndroid Build Coastguard Worker/*
2*795d594fSAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project
3*795d594fSAndroid Build Coastguard Worker *
4*795d594fSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*795d594fSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*795d594fSAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*795d594fSAndroid Build Coastguard Worker *
8*795d594fSAndroid Build Coastguard Worker *      http://www.apache.org/licenses/LICENSE-2.0
9*795d594fSAndroid Build Coastguard Worker *
10*795d594fSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*795d594fSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*795d594fSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*795d594fSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*795d594fSAndroid Build Coastguard Worker * limitations under the License.
15*795d594fSAndroid Build Coastguard Worker */
16*795d594fSAndroid Build Coastguard Worker
17*795d594fSAndroid Build Coastguard Worker#ifndef ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
18*795d594fSAndroid Build Coastguard Worker#define ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
19*795d594fSAndroid Build Coastguard Worker
20*795d594fSAndroid Build Coastguard Worker#include "asm_support_arm.S"
21*795d594fSAndroid Build Coastguard Worker
22*795d594fSAndroid Build Coastguard Worker/*
23*795d594fSAndroid Build Coastguard Worker * Optimized memcmp16() for ARM9.
24*795d594fSAndroid Build Coastguard Worker * This would not be optimal on XScale or ARM11, where more prefetching
25*795d594fSAndroid Build Coastguard Worker * and use of pld will be needed.
26*795d594fSAndroid Build Coastguard Worker * The 2 major optimzations here are
27*795d594fSAndroid Build Coastguard Worker * (1) The main loop compares 16 bytes at a time
28*795d594fSAndroid Build Coastguard Worker * (2) The loads are scheduled in a way they won't stall
29*795d594fSAndroid Build Coastguard Worker */
30*795d594fSAndroid Build Coastguard Worker
31*795d594fSAndroid Build Coastguard WorkerARM_ENTRY __memcmp16
32*795d594fSAndroid Build Coastguard Worker        pld         [r0, #0]
33*795d594fSAndroid Build Coastguard Worker        pld         [r1, #0]
34*795d594fSAndroid Build Coastguard Worker
35*795d594fSAndroid Build Coastguard Worker        /* take of the case where length is nul or the buffers are the same */
36*795d594fSAndroid Build Coastguard Worker        cmp         r0, r1
37*795d594fSAndroid Build Coastguard Worker        cmpne       r2, #0
38*795d594fSAndroid Build Coastguard Worker        moveq       r0, #0
39*795d594fSAndroid Build Coastguard Worker        bxeq        lr
40*795d594fSAndroid Build Coastguard Worker
41*795d594fSAndroid Build Coastguard Worker        /* since r0 hold the result, move the first source
42*795d594fSAndroid Build Coastguard Worker         * pointer somewhere else
43*795d594fSAndroid Build Coastguard Worker         */
44*795d594fSAndroid Build Coastguard Worker
45*795d594fSAndroid Build Coastguard Worker        mov         r3, r0
46*795d594fSAndroid Build Coastguard Worker
47*795d594fSAndroid Build Coastguard Worker         /* make sure we have at least 12 words, this simplify things below
48*795d594fSAndroid Build Coastguard Worker          * and avoid some overhead for small blocks
49*795d594fSAndroid Build Coastguard Worker          */
50*795d594fSAndroid Build Coastguard Worker
51*795d594fSAndroid Build Coastguard Worker        cmp         r2, #12
52*795d594fSAndroid Build Coastguard Worker        bpl         0f
53*795d594fSAndroid Build Coastguard Worker
54*795d594fSAndroid Build Coastguard Worker        /* small blocks (less then 12 words) */
55*795d594fSAndroid Build Coastguard Worker        pld         [r0, #32]
56*795d594fSAndroid Build Coastguard Worker        pld         [r1, #32]
57*795d594fSAndroid Build Coastguard Worker
58*795d594fSAndroid Build Coastguard Worker1:      ldrh        r0, [r3], #2
59*795d594fSAndroid Build Coastguard Worker        ldrh        ip, [r1], #2
60*795d594fSAndroid Build Coastguard Worker        subs        r0, r0, ip
61*795d594fSAndroid Build Coastguard Worker        bxne        lr
62*795d594fSAndroid Build Coastguard Worker        subs        r2, r2, #1
63*795d594fSAndroid Build Coastguard Worker        bne         1b
64*795d594fSAndroid Build Coastguard Worker        bx          lr
65*795d594fSAndroid Build Coastguard Worker
66*795d594fSAndroid Build Coastguard Worker
67*795d594fSAndroid Build Coastguard Worker        /* save registers */
68*795d594fSAndroid Build Coastguard Worker0:      push        {r4, lr}
69*795d594fSAndroid Build Coastguard Worker        .cfi_def_cfa_offset 8
70*795d594fSAndroid Build Coastguard Worker        .cfi_rel_offset r4, 0
71*795d594fSAndroid Build Coastguard Worker        .cfi_rel_offset lr, 4
72*795d594fSAndroid Build Coastguard Worker
73*795d594fSAndroid Build Coastguard Worker        /* align first pointer to word boundary */
74*795d594fSAndroid Build Coastguard Worker        tst         r3, #2
75*795d594fSAndroid Build Coastguard Worker        beq         0f
76*795d594fSAndroid Build Coastguard Worker
77*795d594fSAndroid Build Coastguard Worker        ldrh        r0, [r3], #2
78*795d594fSAndroid Build Coastguard Worker        ldrh        ip, [r1], #2
79*795d594fSAndroid Build Coastguard Worker        sub         r2, r2, #1
80*795d594fSAndroid Build Coastguard Worker        subs        r0, r0, ip
81*795d594fSAndroid Build Coastguard Worker        /* restore registers and return */
82*795d594fSAndroid Build Coastguard Worker        popne       {r4, lr}
83*795d594fSAndroid Build Coastguard Worker        bxne        lr
84*795d594fSAndroid Build Coastguard Worker
85*795d594fSAndroid Build Coastguard Worker
86*795d594fSAndroid Build Coastguard Worker0:      /* here the first pointer is aligned, and we have at least 3 words
87*795d594fSAndroid Build Coastguard Worker         * to process.
88*795d594fSAndroid Build Coastguard Worker         */
89*795d594fSAndroid Build Coastguard Worker
90*795d594fSAndroid Build Coastguard Worker        /* see if the pointers are congruent */
91*795d594fSAndroid Build Coastguard Worker        eor         r0, r3, r1
92*795d594fSAndroid Build Coastguard Worker        ands        r0, r0, #2
93*795d594fSAndroid Build Coastguard Worker        bne         5f
94*795d594fSAndroid Build Coastguard Worker
95*795d594fSAndroid Build Coastguard Worker        /* congruent case, 16 half-words per iteration
96*795d594fSAndroid Build Coastguard Worker         * We need to make sure there are at least 16+2 words left
97*795d594fSAndroid Build Coastguard Worker         * because we effectively read ahead one long word, and we could
98*795d594fSAndroid Build Coastguard Worker         * read past the buffer (and segfault) if we're not careful.
99*795d594fSAndroid Build Coastguard Worker         */
100*795d594fSAndroid Build Coastguard Worker
101*795d594fSAndroid Build Coastguard Worker        ldr         ip, [r1]
102*795d594fSAndroid Build Coastguard Worker        subs        r2, r2, #(16 + 2)
103*795d594fSAndroid Build Coastguard Worker        bmi         1f
104*795d594fSAndroid Build Coastguard Worker
105*795d594fSAndroid Build Coastguard Worker0:
106*795d594fSAndroid Build Coastguard Worker        pld         [r3, #64]
107*795d594fSAndroid Build Coastguard Worker        pld         [r1, #64]
108*795d594fSAndroid Build Coastguard Worker        ldr         r0, [r3], #4
109*795d594fSAndroid Build Coastguard Worker        ldr         lr, [r1, #4]!
110*795d594fSAndroid Build Coastguard Worker        eors        r0, r0, ip
111*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
112*795d594fSAndroid Build Coastguard Worker        ldreq       ip, [r1, #4]!
113*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, lr
114*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
115*795d594fSAndroid Build Coastguard Worker        ldreq       lr, [r1, #4]!
116*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, ip
117*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
118*795d594fSAndroid Build Coastguard Worker        ldreq       ip, [r1, #4]!
119*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, lr
120*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
121*795d594fSAndroid Build Coastguard Worker        ldreq       lr, [r1, #4]!
122*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, ip
123*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
124*795d594fSAndroid Build Coastguard Worker        ldreq       ip, [r1, #4]!
125*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, lr
126*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
127*795d594fSAndroid Build Coastguard Worker        ldreq       lr, [r1, #4]!
128*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, ip
129*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
130*795d594fSAndroid Build Coastguard Worker        ldreq       ip, [r1, #4]!
131*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, lr
132*795d594fSAndroid Build Coastguard Worker        bne         2f
133*795d594fSAndroid Build Coastguard Worker        subs        r2, r2, #16
134*795d594fSAndroid Build Coastguard Worker        bhs         0b
135*795d594fSAndroid Build Coastguard Worker
136*795d594fSAndroid Build Coastguard Worker        /* do we have at least 2 words left? */
137*795d594fSAndroid Build Coastguard Worker1:      adds        r2, r2, #(16 - 2 + 2)
138*795d594fSAndroid Build Coastguard Worker        bmi         4f
139*795d594fSAndroid Build Coastguard Worker
140*795d594fSAndroid Build Coastguard Worker        /* finish off 2 words at a time */
141*795d594fSAndroid Build Coastguard Worker3:      ldr         r0, [r3], #4
142*795d594fSAndroid Build Coastguard Worker        ldr         ip, [r1], #4
143*795d594fSAndroid Build Coastguard Worker        eors        r0, r0, ip
144*795d594fSAndroid Build Coastguard Worker        bne         2f
145*795d594fSAndroid Build Coastguard Worker        subs        r2, r2, #2
146*795d594fSAndroid Build Coastguard Worker        bhs         3b
147*795d594fSAndroid Build Coastguard Worker
148*795d594fSAndroid Build Coastguard Worker        /* are we done? */
149*795d594fSAndroid Build Coastguard Worker4:      adds        r2, r2, #2
150*795d594fSAndroid Build Coastguard Worker        bne         8f
151*795d594fSAndroid Build Coastguard Worker        /* restore registers and return */
152*795d594fSAndroid Build Coastguard Worker        mov         r0, #0
153*795d594fSAndroid Build Coastguard Worker        pop         {r4, pc}
154*795d594fSAndroid Build Coastguard Worker
155*795d594fSAndroid Build Coastguard Worker2:      /* the last 2 words are different, restart them */
156*795d594fSAndroid Build Coastguard Worker        ldrh        r0, [r3, #-4]
157*795d594fSAndroid Build Coastguard Worker        ldrh        ip, [r1, #-4]
158*795d594fSAndroid Build Coastguard Worker        subs        r0, r0, ip
159*795d594fSAndroid Build Coastguard Worker        ldrheq      r0, [r3, #-2]
160*795d594fSAndroid Build Coastguard Worker        ldrheq      ip, [r1, #-2]
161*795d594fSAndroid Build Coastguard Worker        subseq      r0, r0, ip
162*795d594fSAndroid Build Coastguard Worker        /* restore registers and return */
163*795d594fSAndroid Build Coastguard Worker        pop         {r4, pc}
164*795d594fSAndroid Build Coastguard Worker
165*795d594fSAndroid Build Coastguard Worker        /* process the last few words */
166*795d594fSAndroid Build Coastguard Worker8:      ldrh        r0, [r3], #2
167*795d594fSAndroid Build Coastguard Worker        ldrh        ip, [r1], #2
168*795d594fSAndroid Build Coastguard Worker        subs        r0, r0, ip
169*795d594fSAndroid Build Coastguard Worker        bne         9f
170*795d594fSAndroid Build Coastguard Worker        subs        r2, r2, #1
171*795d594fSAndroid Build Coastguard Worker        bne         8b
172*795d594fSAndroid Build Coastguard Worker
173*795d594fSAndroid Build Coastguard Worker9:      /* restore registers and return */
174*795d594fSAndroid Build Coastguard Worker        pop         {r4, pc}
175*795d594fSAndroid Build Coastguard Worker
176*795d594fSAndroid Build Coastguard Worker5:      /*************** non-congruent case ***************/
177*795d594fSAndroid Build Coastguard Worker
178*795d594fSAndroid Build Coastguard Worker        /* align the unaligned pointer */
179*795d594fSAndroid Build Coastguard Worker        bic         r1, r1, #3
180*795d594fSAndroid Build Coastguard Worker        ldr         lr, [r1], #4
181*795d594fSAndroid Build Coastguard Worker        sub         r2, r2, #8
182*795d594fSAndroid Build Coastguard Worker
183*795d594fSAndroid Build Coastguard Worker6:
184*795d594fSAndroid Build Coastguard Worker        pld         [r3, #64]
185*795d594fSAndroid Build Coastguard Worker        pld         [r1, #64]
186*795d594fSAndroid Build Coastguard Worker        mov         ip, lr, lsr #16
187*795d594fSAndroid Build Coastguard Worker        ldr         lr, [r1], #4
188*795d594fSAndroid Build Coastguard Worker        ldr         r0, [r3], #4
189*795d594fSAndroid Build Coastguard Worker        orr         ip, ip, lr, lsl #16
190*795d594fSAndroid Build Coastguard Worker        eors        r0, r0, ip
191*795d594fSAndroid Build Coastguard Worker        moveq       ip, lr, lsr #16
192*795d594fSAndroid Build Coastguard Worker        ldreq       lr, [r1], #4
193*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
194*795d594fSAndroid Build Coastguard Worker        orreq       ip, ip, lr, lsl #16
195*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, ip
196*795d594fSAndroid Build Coastguard Worker        moveq       ip, lr, lsr #16
197*795d594fSAndroid Build Coastguard Worker        ldreq       lr, [r1], #4
198*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
199*795d594fSAndroid Build Coastguard Worker        orreq       ip, ip, lr, lsl #16
200*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, ip
201*795d594fSAndroid Build Coastguard Worker        moveq       ip, lr, lsr #16
202*795d594fSAndroid Build Coastguard Worker        ldreq       lr, [r1], #4
203*795d594fSAndroid Build Coastguard Worker        ldreq       r0, [r3], #4
204*795d594fSAndroid Build Coastguard Worker        orreq       ip, ip, lr, lsl #16
205*795d594fSAndroid Build Coastguard Worker        eorseq      r0, r0, ip
206*795d594fSAndroid Build Coastguard Worker        bne         7f
207*795d594fSAndroid Build Coastguard Worker        subs        r2, r2, #8
208*795d594fSAndroid Build Coastguard Worker        bhs         6b
209*795d594fSAndroid Build Coastguard Worker        sub         r1, r1, #2
210*795d594fSAndroid Build Coastguard Worker        /* are we done? */
211*795d594fSAndroid Build Coastguard Worker        adds        r2, r2, #8
212*795d594fSAndroid Build Coastguard Worker        moveq       r0, #0
213*795d594fSAndroid Build Coastguard Worker        beq         9b
214*795d594fSAndroid Build Coastguard Worker        /* finish off the remaining bytes */
215*795d594fSAndroid Build Coastguard Worker        b           8b
216*795d594fSAndroid Build Coastguard Worker
217*795d594fSAndroid Build Coastguard Worker7:      /* fix up the 2 pointers and fallthrough... */
218*795d594fSAndroid Build Coastguard Worker        sub         r1, r1, #2
219*795d594fSAndroid Build Coastguard Worker        b           2b
220*795d594fSAndroid Build Coastguard WorkerEND __memcmp16
221*795d594fSAndroid Build Coastguard Worker
222*795d594fSAndroid Build Coastguard Worker
223*795d594fSAndroid Build Coastguard Worker#endif  // ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
224