1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 4 */ 5 6 7#if defined __GNUC__ 8 9#include <arch/asm.h> 10 11.syntax unified 12 13.macro ARM_DIV_BODY dividend, divisor, result, curbit 14 15#if __COREBOOT_ARM_ARCH__ >= 5 16 17 clz \curbit, \divisor 18 clz \result, \dividend 19 sub \result, \curbit, \result 20 mov \curbit, #1 21 mov \divisor, \divisor, lsl \result 22 mov \curbit, \curbit, lsl \result 23 mov \result, #0 24 25#else 26 27 @ Initially shift the divisor left 3 bits if possible, 28 @ set curbit accordingly. This allows for curbit to be located 29 @ at the left end of each 4 bit nibbles in the division loop 30 @ to save one loop in most cases. 31 tst \divisor, #0xe0000000 32 moveq \divisor, \divisor, lsl #3 33 moveq \curbit, #8 34 movne \curbit, #1 35 36 @ Unless the divisor is very big, shift it up in multiples of 37 @ four bits, since this is the amount of unwinding in the main 38 @ division loop. Continue shifting until the divisor is 39 @ larger than the dividend. 401: cmp \divisor, #0x10000000 41 cmplo \divisor, \dividend 42 movlo \divisor, \divisor, lsl #4 43 movlo \curbit, \curbit, lsl #4 44 blo 1b 45 46 @ For very big divisors, we must shift it a bit at a time, or 47 @ we will be in danger of overflowing. 481: cmp \divisor, #0x80000000 49 cmplo \divisor, \dividend 50 movlo \divisor, \divisor, lsl #1 51 movlo \curbit, \curbit, lsl #1 52 blo 1b 53 54 mov \result, #0 55 56#endif 57 58 @ Division loop 591: cmp \dividend, \divisor 60 subhs \dividend, \dividend, \divisor 61 orrhs \result, \result, \curbit 62 cmp \dividend, \divisor, lsr #1 63 subhs \dividend, \dividend, \divisor, lsr #1 64 orrhs \result, \result, \curbit, lsr #1 65 cmp \dividend, \divisor, lsr #2 66 subhs \dividend, \dividend, \divisor, lsr #2 67 orrhs \result, \result, \curbit, lsr #2 68 cmp \dividend, \divisor, lsr #3 69 subhs \dividend, \dividend, \divisor, lsr #3 70 orrhs \result, \result, \curbit, lsr #3 71 cmp \dividend, #0 @ Early termination? 72 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? 73 movne \divisor, \divisor, lsr #4 74 bne 1b 75 76.endm 77 78 79.macro ARM_DIV2_ORDER divisor, order 80 81#if __COREBOOT_ARM_ARCH__ >= 5 82 83 clz \order, \divisor 84 rsb \order, \order, #31 85 86#else 87 88 cmp \divisor, #(1 << 16) 89 movhs \divisor, \divisor, lsr #16 90 movhs \order, #16 91 movlo \order, #0 92 93 cmp \divisor, #(1 << 8) 94 movhs \divisor, \divisor, lsr #8 95 addhs \order, \order, #8 96 97 cmp \divisor, #(1 << 4) 98 movhs \divisor, \divisor, lsr #4 99 addhs \order, \order, #4 100 101 cmp \divisor, #(1 << 2) 102 addhi \order, \order, #3 103 addls \order, \order, \divisor, lsr #1 104 105#endif 106 107.endm 108 109 110.macro ARM_MOD_BODY dividend, divisor, order, spare 111 112#if __COREBOOT_ARM_ARCH__ >= 5 113 114 clz \order, \divisor 115 clz \spare, \dividend 116 sub \order, \order, \spare 117 mov \divisor, \divisor, lsl \order 118 119#else 120 121 mov \order, #0 122 123 @ Unless the divisor is very big, shift it up in multiples of 124 @ four bits, since this is the amount of unwinding in the main 125 @ division loop. Continue shifting until the divisor is 126 @ larger than the dividend. 1271: cmp \divisor, #0x10000000 128 cmplo \divisor, \dividend 129 movlo \divisor, \divisor, lsl #4 130 addlo \order, \order, #4 131 blo 1b 132 133 @ For very big divisors, we must shift it a bit at a time, or 134 @ we will be in danger of overflowing. 1351: cmp \divisor, #0x80000000 136 cmplo \divisor, \dividend 137 movlo \divisor, \divisor, lsl #1 138 addlo \order, \order, #1 139 blo 1b 140 141#endif 142 143 @ Perform all needed substractions to keep only the reminder. 144 @ Do comparisons in batch of 4 first. 145 subs \order, \order, #3 @ yes, 3 is intended here 146 blt 2f 147 1481: cmp \dividend, \divisor 149 subhs \dividend, \dividend, \divisor 150 cmp \dividend, \divisor, lsr #1 151 subhs \dividend, \dividend, \divisor, lsr #1 152 cmp \dividend, \divisor, lsr #2 153 subhs \dividend, \dividend, \divisor, lsr #2 154 cmp \dividend, \divisor, lsr #3 155 subhs \dividend, \dividend, \divisor, lsr #3 156 cmp \dividend, #1 157 mov \divisor, \divisor, lsr #4 158 subsge \order, \order, #4 159 bge 1b 160 161 tst \order, #3 162 teqne \dividend, #0 163 beq 5f 164 165 @ Either 1, 2 or 3 comparison/substractions are left. 1662: cmn \order, #2 167 blt 4f 168 beq 3f 169 cmp \dividend, \divisor 170 subhs \dividend, \dividend, \divisor 171 mov \divisor, \divisor, lsr #1 1723: cmp \dividend, \divisor 173 subhs \dividend, \dividend, \divisor 174 mov \divisor, \divisor, lsr #1 1754: cmp \dividend, \divisor 176 subhs \dividend, \dividend, \divisor 1775: 178.endm 179 180 181ENTRY(__udivsi3) 182.global __aeabi_uidiv 183__aeabi_uidiv: 184 185 subs r2, r1, #1 186 moveq pc, lr 187 bcc Ldiv0 188 cmp r0, r1 189 bls 11f 190 tst r1, r2 191 beq 12f 192 193 ARM_DIV_BODY r0, r1, r2, r3 194 195 mov r0, r2 196 mov pc, lr 197 19811: moveq r0, #1 199 movne r0, #0 200 mov pc, lr 201 20212: ARM_DIV2_ORDER r1, r2 203 204 mov r0, r0, lsr r2 205 mov pc, lr 206 207.type __aeabi_uidiv, %function 208.size __aeabi_uidiv, .-__aeabi_uidiv 209ENDPROC(__udivsi3) 210 211ENTRY(__umodsi3) 212 213 subs r2, r1, #1 @ compare divisor with 1 214 bcc Ldiv0 215 cmpne r0, r1 @ compare dividend with divisor 216 moveq r0, #0 217 tsthi r1, r2 @ see if divisor is power of 2 218 andeq r0, r0, r2 219 movls pc, lr 220 221 ARM_MOD_BODY r0, r1, r2, r3 222 223 mov pc, lr 224 225ENDPROC(__umodsi3) 226 227ENTRY(__divsi3) 228.global __aeabi_idiv 229__aeabi_idiv: 230 231 cmp r1, #0 232 eor ip, r0, r1 @ save the sign of the result. 233 beq Ldiv0 234 rsbmi r1, r1, #0 @ loops below use unsigned. 235 subs r2, r1, #1 @ division by 1 or -1 ? 236 beq 10f 237 movs r3, r0 238 rsbmi r3, r0, #0 @ positive dividend value 239 cmp r3, r1 240 bls 11f 241 tst r1, r2 @ divisor is power of 2 ? 242 beq 12f 243 244 ARM_DIV_BODY r3, r1, r0, r2 245 246 cmp ip, #0 247 rsbmi r0, r0, #0 248 mov pc, lr 249 25010: teq ip, r0 @ same sign ? 251 rsbmi r0, r0, #0 252 mov pc, lr 253 25411: movlo r0, #0 255 moveq r0, ip, asr #31 256 orreq r0, r0, #1 257 mov pc, lr 258 25912: ARM_DIV2_ORDER r1, r2 260 261 cmp ip, #0 262 mov r0, r3, lsr r2 263 rsbmi r0, r0, #0 264 mov pc, lr 265 266.type __aeabi_idiv, %function 267.size __aeabi_idiv, .-__aeabi_idiv 268ENDPROC(__divsi3) 269 270ENTRY(__modsi3) 271 272 cmp r1, #0 273 beq Ldiv0 274 rsbmi r1, r1, #0 @ loops below use unsigned. 275 movs ip, r0 @ preserve sign of dividend 276 rsbmi r0, r0, #0 @ if negative make positive 277 subs r2, r1, #1 @ compare divisor with 1 278 cmpne r0, r1 @ compare dividend with divisor 279 moveq r0, #0 280 tsthi r1, r2 @ see if divisor is power of 2 281 andeq r0, r0, r2 282 bls 10f 283 284 ARM_MOD_BODY r0, r1, r2, r3 285 28610: cmp ip, #0 287 rsbmi r0, r0, #0 288 mov pc, lr 289 290ENDPROC(__modsi3) 291 292ENTRY(__aeabi_uidivmod) 293 294 stmfd sp!, {r0, r1, ip, lr} 295 bl __aeabi_uidiv 296 ldmfd sp!, {r1, r2, ip, lr} 297 mul r3, r0, r2 298 sub r1, r1, r3 299 mov pc, lr 300 301ENDPROC(__aeabi_uidivmod) 302 303ENTRY(__aeabi_idivmod) 304 stmfd sp!, {r0, r1, ip, lr} 305 bl __aeabi_idiv 306 ldmfd sp!, {r1, r2, ip, lr} 307 mul r3, r0, r2 308 sub r1, r1, r3 309 mov pc, lr 310 311ENDPROC(__aeabi_idivmod) 312 313 314Ldiv0: 315 str lr, [sp, #-8]! 316 bl __div0 317 mov r0, #0 @ About as wrong as it could be. 318 ldr pc, [sp], #8 319ENDPROC(Ldiv0) 320 321#endif 322