xref: /aosp_15_r20/external/compiler-rt/lib/builtins/arm/udivmodsi4.S (revision 7c3d14c8b49c529e04be81a3ce6f5cc23712e4c6)
1*7c3d14c8STreehugger Robot/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
2*7c3d14c8STreehugger Robot *
3*7c3d14c8STreehugger Robot *                     The LLVM Compiler Infrastructure
4*7c3d14c8STreehugger Robot *
5*7c3d14c8STreehugger Robot * This file is dual licensed under the MIT and the University of Illinois Open
6*7c3d14c8STreehugger Robot * Source Licenses. See LICENSE.TXT for details.
7*7c3d14c8STreehugger Robot *
8*7c3d14c8STreehugger Robot *===----------------------------------------------------------------------===//
9*7c3d14c8STreehugger Robot *
10*7c3d14c8STreehugger Robot * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
11*7c3d14c8STreehugger Robot * modulus) function for the ARM 32-bit architecture.
12*7c3d14c8STreehugger Robot *
13*7c3d14c8STreehugger Robot *===----------------------------------------------------------------------===*/
14*7c3d14c8STreehugger Robot
15*7c3d14c8STreehugger Robot#include "../assembly.h"
16*7c3d14c8STreehugger Robot
17*7c3d14c8STreehugger Robot	.syntax unified
18*7c3d14c8STreehugger Robot	.text
19*7c3d14c8STreehugger Robot
20*7c3d14c8STreehugger Robot#if __ARM_ARCH_ISA_THUMB == 2
21*7c3d14c8STreehugger Robot	.thumb
22*7c3d14c8STreehugger Robot#endif
23*7c3d14c8STreehugger Robot
24*7c3d14c8STreehugger Robot@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor,
25*7c3d14c8STreehugger Robot@                           unsigned int *remainder)
26*7c3d14c8STreehugger Robot@   Calculate the quotient and remainder of the (unsigned) division.  The return
27*7c3d14c8STreehugger Robot@   value is the quotient, the remainder is placed in the variable.
28*7c3d14c8STreehugger Robot
29*7c3d14c8STreehugger Robot	.p2align 2
30*7c3d14c8STreehugger Robot#if __ARM_ARCH_ISA_THUMB == 2
31*7c3d14c8STreehugger RobotDEFINE_COMPILERRT_THUMB_FUNCTION(__udivmodsi4)
32*7c3d14c8STreehugger Robot#else
33*7c3d14c8STreehugger RobotDEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
34*7c3d14c8STreehugger Robot#endif
35*7c3d14c8STreehugger Robot#if __ARM_ARCH_EXT_IDIV__
36*7c3d14c8STreehugger Robot	tst     r1, r1
37*7c3d14c8STreehugger Robot	beq     LOCAL_LABEL(divby0)
38*7c3d14c8STreehugger Robot	mov 	r3, r0
39*7c3d14c8STreehugger Robot	udiv	r0, r3, r1
40*7c3d14c8STreehugger Robot	mls 	r1, r0, r1, r3
41*7c3d14c8STreehugger Robot	str 	r1, [r2]
42*7c3d14c8STreehugger Robot	bx  	lr
43*7c3d14c8STreehugger Robot#else
44*7c3d14c8STreehugger Robot	cmp	r1, #1
45*7c3d14c8STreehugger Robot	bcc	LOCAL_LABEL(divby0)
46*7c3d14c8STreehugger Robot	beq	LOCAL_LABEL(divby1)
47*7c3d14c8STreehugger Robot	cmp	r0, r1
48*7c3d14c8STreehugger Robot	bcc	LOCAL_LABEL(quotient0)
49*7c3d14c8STreehugger Robot	/*
50*7c3d14c8STreehugger Robot	 * Implement division using binary long division algorithm.
51*7c3d14c8STreehugger Robot	 *
52*7c3d14c8STreehugger Robot	 * r0 is the numerator, r1 the denominator.
53*7c3d14c8STreehugger Robot	 *
54*7c3d14c8STreehugger Robot	 * The code before JMP computes the correct shift I, so that
55*7c3d14c8STreehugger Robot	 * r0 and (r1 << I) have the highest bit set in the same position.
56*7c3d14c8STreehugger Robot	 * At the time of JMP, ip := .Ldiv0block - 12 * I.
57*7c3d14c8STreehugger Robot	 * This depends on the fixed instruction size of block.
58*7c3d14c8STreehugger Robot	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
59*7c3d14c8STreehugger Robot	 *
60*7c3d14c8STreehugger Robot	 * block(shift) implements the test-and-update-quotient core.
61*7c3d14c8STreehugger Robot	 * It assumes (r0 << shift) can be computed without overflow and
62*7c3d14c8STreehugger Robot	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
63*7c3d14c8STreehugger Robot	 */
64*7c3d14c8STreehugger Robot
65*7c3d14c8STreehugger Robot#  ifdef __ARM_FEATURE_CLZ
66*7c3d14c8STreehugger Robot	clz	ip, r0
67*7c3d14c8STreehugger Robot	clz	r3, r1
68*7c3d14c8STreehugger Robot	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
69*7c3d14c8STreehugger Robot	sub	r3, r3, ip
70*7c3d14c8STreehugger Robot#    if __ARM_ARCH_ISA_THUMB == 2
71*7c3d14c8STreehugger Robot	adr	ip, LOCAL_LABEL(div0block) + 1
72*7c3d14c8STreehugger Robot	sub	ip, ip, r3, lsl #1
73*7c3d14c8STreehugger Robot#    else
74*7c3d14c8STreehugger Robot	adr	ip, LOCAL_LABEL(div0block)
75*7c3d14c8STreehugger Robot#    endif
76*7c3d14c8STreehugger Robot	sub	ip, ip, r3, lsl #2
77*7c3d14c8STreehugger Robot	sub	ip, ip, r3, lsl #3
78*7c3d14c8STreehugger Robot	mov	r3, #0
79*7c3d14c8STreehugger Robot	bx	ip
80*7c3d14c8STreehugger Robot#  else
81*7c3d14c8STreehugger Robot#    if __ARM_ARCH_ISA_THUMB == 2
82*7c3d14c8STreehugger Robot#    error THUMB mode requires CLZ or UDIV
83*7c3d14c8STreehugger Robot#    endif
84*7c3d14c8STreehugger Robot	str	r4, [sp, #-8]!
85*7c3d14c8STreehugger Robot
86*7c3d14c8STreehugger Robot	mov	r4, r0
87*7c3d14c8STreehugger Robot	adr	ip, LOCAL_LABEL(div0block)
88*7c3d14c8STreehugger Robot
89*7c3d14c8STreehugger Robot	lsr	r3, r4, #16
90*7c3d14c8STreehugger Robot	cmp	r3, r1
91*7c3d14c8STreehugger Robot	movhs	r4, r3
92*7c3d14c8STreehugger Robot	subhs	ip, ip, #(16 * 12)
93*7c3d14c8STreehugger Robot
94*7c3d14c8STreehugger Robot	lsr	r3, r4, #8
95*7c3d14c8STreehugger Robot	cmp	r3, r1
96*7c3d14c8STreehugger Robot	movhs	r4, r3
97*7c3d14c8STreehugger Robot	subhs	ip, ip, #(8 * 12)
98*7c3d14c8STreehugger Robot
99*7c3d14c8STreehugger Robot	lsr	r3, r4, #4
100*7c3d14c8STreehugger Robot	cmp	r3, r1
101*7c3d14c8STreehugger Robot	movhs	r4, r3
102*7c3d14c8STreehugger Robot	subhs	ip, #(4 * 12)
103*7c3d14c8STreehugger Robot
104*7c3d14c8STreehugger Robot	lsr	r3, r4, #2
105*7c3d14c8STreehugger Robot	cmp	r3, r1
106*7c3d14c8STreehugger Robot	movhs	r4, r3
107*7c3d14c8STreehugger Robot	subhs	ip, ip, #(2 * 12)
108*7c3d14c8STreehugger Robot
109*7c3d14c8STreehugger Robot	/* Last block, no need to update r3 or r4. */
110*7c3d14c8STreehugger Robot	cmp	r1, r4, lsr #1
111*7c3d14c8STreehugger Robot	subls	ip, ip, #(1 * 12)
112*7c3d14c8STreehugger Robot
113*7c3d14c8STreehugger Robot	ldr	r4, [sp], #8	/* restore r4, we are done with it. */
114*7c3d14c8STreehugger Robot	mov	r3, #0
115*7c3d14c8STreehugger Robot
116*7c3d14c8STreehugger Robot	JMP(ip)
117*7c3d14c8STreehugger Robot#  endif
118*7c3d14c8STreehugger Robot
119*7c3d14c8STreehugger Robot#define	IMM	#
120*7c3d14c8STreehugger Robot
121*7c3d14c8STreehugger Robot#define block(shift)                                                           \
122*7c3d14c8STreehugger Robot	cmp	r0, r1, lsl IMM shift;                                         \
123*7c3d14c8STreehugger Robot	ITT(hs);                                                               \
124*7c3d14c8STreehugger Robot	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
125*7c3d14c8STreehugger Robot	WIDE(subhs)	r0, r0, r1, lsl IMM shift
126*7c3d14c8STreehugger Robot
127*7c3d14c8STreehugger Robot	block(31)
128*7c3d14c8STreehugger Robot	block(30)
129*7c3d14c8STreehugger Robot	block(29)
130*7c3d14c8STreehugger Robot	block(28)
131*7c3d14c8STreehugger Robot	block(27)
132*7c3d14c8STreehugger Robot	block(26)
133*7c3d14c8STreehugger Robot	block(25)
134*7c3d14c8STreehugger Robot	block(24)
135*7c3d14c8STreehugger Robot	block(23)
136*7c3d14c8STreehugger Robot	block(22)
137*7c3d14c8STreehugger Robot	block(21)
138*7c3d14c8STreehugger Robot	block(20)
139*7c3d14c8STreehugger Robot	block(19)
140*7c3d14c8STreehugger Robot	block(18)
141*7c3d14c8STreehugger Robot	block(17)
142*7c3d14c8STreehugger Robot	block(16)
143*7c3d14c8STreehugger Robot	block(15)
144*7c3d14c8STreehugger Robot	block(14)
145*7c3d14c8STreehugger Robot	block(13)
146*7c3d14c8STreehugger Robot	block(12)
147*7c3d14c8STreehugger Robot	block(11)
148*7c3d14c8STreehugger Robot	block(10)
149*7c3d14c8STreehugger Robot	block(9)
150*7c3d14c8STreehugger Robot	block(8)
151*7c3d14c8STreehugger Robot	block(7)
152*7c3d14c8STreehugger Robot	block(6)
153*7c3d14c8STreehugger Robot	block(5)
154*7c3d14c8STreehugger Robot	block(4)
155*7c3d14c8STreehugger Robot	block(3)
156*7c3d14c8STreehugger Robot	block(2)
157*7c3d14c8STreehugger Robot	block(1)
158*7c3d14c8STreehugger RobotLOCAL_LABEL(div0block):
159*7c3d14c8STreehugger Robot	block(0)
160*7c3d14c8STreehugger Robot
161*7c3d14c8STreehugger Robot	str	r0, [r2]
162*7c3d14c8STreehugger Robot	mov	r0, r3
163*7c3d14c8STreehugger Robot	JMP(lr)
164*7c3d14c8STreehugger Robot
165*7c3d14c8STreehugger RobotLOCAL_LABEL(quotient0):
166*7c3d14c8STreehugger Robot	str	r0, [r2]
167*7c3d14c8STreehugger Robot	mov	r0, #0
168*7c3d14c8STreehugger Robot	JMP(lr)
169*7c3d14c8STreehugger Robot
170*7c3d14c8STreehugger RobotLOCAL_LABEL(divby1):
171*7c3d14c8STreehugger Robot	mov	r3, #0
172*7c3d14c8STreehugger Robot	str	r3, [r2]
173*7c3d14c8STreehugger Robot	JMP(lr)
174*7c3d14c8STreehugger Robot#endif /* __ARM_ARCH_EXT_IDIV__ */
175*7c3d14c8STreehugger Robot
176*7c3d14c8STreehugger RobotLOCAL_LABEL(divby0):
177*7c3d14c8STreehugger Robot	mov	r0, #0
178*7c3d14c8STreehugger Robot#ifdef __ARM_EABI__
179*7c3d14c8STreehugger Robot	b	__aeabi_idiv0
180*7c3d14c8STreehugger Robot#else
181*7c3d14c8STreehugger Robot	JMP(lr)
182*7c3d14c8STreehugger Robot#endif
183*7c3d14c8STreehugger Robot
184*7c3d14c8STreehugger RobotEND_COMPILERRT_FUNCTION(__udivmodsi4)
185*7c3d14c8STreehugger Robot
186*7c3d14c8STreehugger RobotNO_EXEC_STACK_DIRECTIVE
187*7c3d14c8STreehugger Robot
188