xref: /aosp_15_r20/external/compiler-rt/lib/builtins/arm/udivsi3.S (revision 7c3d14c8b49c529e04be81a3ce6f5cc23712e4c6)
1*7c3d14c8STreehugger Robot/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
2*7c3d14c8STreehugger Robot *
3*7c3d14c8STreehugger Robot *                     The LLVM Compiler Infrastructure
4*7c3d14c8STreehugger Robot *
5*7c3d14c8STreehugger Robot * This file is dual licensed under the MIT and the University of Illinois Open
6*7c3d14c8STreehugger Robot * Source Licenses. See LICENSE.TXT for details.
7*7c3d14c8STreehugger Robot *
8*7c3d14c8STreehugger Robot *===----------------------------------------------------------------------===//
9*7c3d14c8STreehugger Robot *
10*7c3d14c8STreehugger Robot * This file implements the __udivsi3 (32-bit unsigned integer divide)
11*7c3d14c8STreehugger Robot * function for the ARM 32-bit architecture.
12*7c3d14c8STreehugger Robot *
13*7c3d14c8STreehugger Robot *===----------------------------------------------------------------------===*/
14*7c3d14c8STreehugger Robot
15*7c3d14c8STreehugger Robot#include "../assembly.h"
16*7c3d14c8STreehugger Robot
17*7c3d14c8STreehugger Robot	.syntax unified
18*7c3d14c8STreehugger Robot	.text
19*7c3d14c8STreehugger Robot
20*7c3d14c8STreehugger Robot#if __ARM_ARCH_ISA_THUMB == 2
21*7c3d14c8STreehugger Robot	.thumb
22*7c3d14c8STreehugger Robot#endif
23*7c3d14c8STreehugger Robot
24*7c3d14c8STreehugger Robot	.p2align 2
25*7c3d14c8STreehugger RobotDEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
26*7c3d14c8STreehugger Robot
27*7c3d14c8STreehugger Robot@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
28*7c3d14c8STreehugger Robot@   Calculate and return the quotient of the (unsigned) division.
29*7c3d14c8STreehugger Robot
30*7c3d14c8STreehugger Robot#if __ARM_ARCH_ISA_THUMB == 2
31*7c3d14c8STreehugger RobotDEFINE_COMPILERRT_THUMB_FUNCTION(__udivsi3)
32*7c3d14c8STreehugger Robot#else
33*7c3d14c8STreehugger RobotDEFINE_COMPILERRT_FUNCTION(__udivsi3)
34*7c3d14c8STreehugger Robot#endif
35*7c3d14c8STreehugger Robot#if __ARM_ARCH_EXT_IDIV__
36*7c3d14c8STreehugger Robot	tst     r1, r1
37*7c3d14c8STreehugger Robot	beq     LOCAL_LABEL(divby0)
38*7c3d14c8STreehugger Robot	udiv	r0, r0, r1
39*7c3d14c8STreehugger Robot	bx  	lr
40*7c3d14c8STreehugger Robot#else
41*7c3d14c8STreehugger Robot	cmp	r1, #1
42*7c3d14c8STreehugger Robot	bcc	LOCAL_LABEL(divby0)
43*7c3d14c8STreehugger Robot	IT(eq)
44*7c3d14c8STreehugger Robot	JMPc(lr, eq)
45*7c3d14c8STreehugger Robot	cmp	r0, r1
46*7c3d14c8STreehugger Robot	ITT(cc)
47*7c3d14c8STreehugger Robot	movcc	r0, #0
48*7c3d14c8STreehugger Robot	JMPc(lr, cc)
49*7c3d14c8STreehugger Robot	/*
50*7c3d14c8STreehugger Robot	 * Implement division using binary long division algorithm.
51*7c3d14c8STreehugger Robot	 *
52*7c3d14c8STreehugger Robot	 * r0 is the numerator, r1 the denominator.
53*7c3d14c8STreehugger Robot	 *
54*7c3d14c8STreehugger Robot	 * The code before JMP computes the correct shift I, so that
55*7c3d14c8STreehugger Robot	 * r0 and (r1 << I) have the highest bit set in the same position.
56*7c3d14c8STreehugger Robot	 * At the time of JMP, ip := .Ldiv0block - 12 * I.
57*7c3d14c8STreehugger Robot	 * This depends on the fixed instruction size of block.
58*7c3d14c8STreehugger Robot	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
59*7c3d14c8STreehugger Robot	 *
60*7c3d14c8STreehugger Robot	 * block(shift) implements the test-and-update-quotient core.
61*7c3d14c8STreehugger Robot	 * It assumes (r0 << shift) can be computed without overflow and
62*7c3d14c8STreehugger Robot	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
63*7c3d14c8STreehugger Robot	 */
64*7c3d14c8STreehugger Robot
65*7c3d14c8STreehugger Robot#  ifdef __ARM_FEATURE_CLZ
66*7c3d14c8STreehugger Robot	clz	ip, r0
67*7c3d14c8STreehugger Robot	clz	r3, r1
68*7c3d14c8STreehugger Robot	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
69*7c3d14c8STreehugger Robot	sub	r3, r3, ip
70*7c3d14c8STreehugger Robot#    if __ARM_ARCH_ISA_THUMB == 2
71*7c3d14c8STreehugger Robot	adr	ip, LOCAL_LABEL(div0block) + 1
72*7c3d14c8STreehugger Robot	sub	ip, ip, r3, lsl #1
73*7c3d14c8STreehugger Robot#    else
74*7c3d14c8STreehugger Robot	adr	ip, LOCAL_LABEL(div0block)
75*7c3d14c8STreehugger Robot#    endif
76*7c3d14c8STreehugger Robot	sub	ip, ip, r3, lsl #2
77*7c3d14c8STreehugger Robot	sub	ip, ip, r3, lsl #3
78*7c3d14c8STreehugger Robot	mov	r3, #0
79*7c3d14c8STreehugger Robot	bx	ip
80*7c3d14c8STreehugger Robot#  else
81*7c3d14c8STreehugger Robot#    if __ARM_ARCH_ISA_THUMB == 2
82*7c3d14c8STreehugger Robot#    error THUMB mode requires CLZ or UDIV
83*7c3d14c8STreehugger Robot#    endif
84*7c3d14c8STreehugger Robot	mov	r2, r0
85*7c3d14c8STreehugger Robot	adr	ip, LOCAL_LABEL(div0block)
86*7c3d14c8STreehugger Robot
87*7c3d14c8STreehugger Robot	lsr	r3, r2, #16
88*7c3d14c8STreehugger Robot	cmp	r3, r1
89*7c3d14c8STreehugger Robot	movhs	r2, r3
90*7c3d14c8STreehugger Robot	subhs	ip, ip, #(16 * 12)
91*7c3d14c8STreehugger Robot
92*7c3d14c8STreehugger Robot	lsr	r3, r2, #8
93*7c3d14c8STreehugger Robot	cmp	r3, r1
94*7c3d14c8STreehugger Robot	movhs	r2, r3
95*7c3d14c8STreehugger Robot	subhs	ip, ip, #(8 * 12)
96*7c3d14c8STreehugger Robot
97*7c3d14c8STreehugger Robot	lsr	r3, r2, #4
98*7c3d14c8STreehugger Robot	cmp	r3, r1
99*7c3d14c8STreehugger Robot	movhs	r2, r3
100*7c3d14c8STreehugger Robot	subhs	ip, #(4 * 12)
101*7c3d14c8STreehugger Robot
102*7c3d14c8STreehugger Robot	lsr	r3, r2, #2
103*7c3d14c8STreehugger Robot	cmp	r3, r1
104*7c3d14c8STreehugger Robot	movhs	r2, r3
105*7c3d14c8STreehugger Robot	subhs	ip, ip, #(2 * 12)
106*7c3d14c8STreehugger Robot
107*7c3d14c8STreehugger Robot	/* Last block, no need to update r2 or r3. */
108*7c3d14c8STreehugger Robot	cmp	r1, r2, lsr #1
109*7c3d14c8STreehugger Robot	subls	ip, ip, #(1 * 12)
110*7c3d14c8STreehugger Robot
111*7c3d14c8STreehugger Robot	mov	r3, #0
112*7c3d14c8STreehugger Robot
113*7c3d14c8STreehugger Robot	JMP(ip)
114*7c3d14c8STreehugger Robot#  endif
115*7c3d14c8STreehugger Robot
116*7c3d14c8STreehugger Robot#define	IMM	#
117*7c3d14c8STreehugger Robot
118*7c3d14c8STreehugger Robot#define block(shift)                                                           \
119*7c3d14c8STreehugger Robot	cmp	r0, r1, lsl IMM shift;                                         \
120*7c3d14c8STreehugger Robot	ITT(hs);                                                               \
121*7c3d14c8STreehugger Robot	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
122*7c3d14c8STreehugger Robot	WIDE(subhs)	r0, r0, r1, lsl IMM shift
123*7c3d14c8STreehugger Robot
124*7c3d14c8STreehugger Robot	block(31)
125*7c3d14c8STreehugger Robot	block(30)
126*7c3d14c8STreehugger Robot	block(29)
127*7c3d14c8STreehugger Robot	block(28)
128*7c3d14c8STreehugger Robot	block(27)
129*7c3d14c8STreehugger Robot	block(26)
130*7c3d14c8STreehugger Robot	block(25)
131*7c3d14c8STreehugger Robot	block(24)
132*7c3d14c8STreehugger Robot	block(23)
133*7c3d14c8STreehugger Robot	block(22)
134*7c3d14c8STreehugger Robot	block(21)
135*7c3d14c8STreehugger Robot	block(20)
136*7c3d14c8STreehugger Robot	block(19)
137*7c3d14c8STreehugger Robot	block(18)
138*7c3d14c8STreehugger Robot	block(17)
139*7c3d14c8STreehugger Robot	block(16)
140*7c3d14c8STreehugger Robot	block(15)
141*7c3d14c8STreehugger Robot	block(14)
142*7c3d14c8STreehugger Robot	block(13)
143*7c3d14c8STreehugger Robot	block(12)
144*7c3d14c8STreehugger Robot	block(11)
145*7c3d14c8STreehugger Robot	block(10)
146*7c3d14c8STreehugger Robot	block(9)
147*7c3d14c8STreehugger Robot	block(8)
148*7c3d14c8STreehugger Robot	block(7)
149*7c3d14c8STreehugger Robot	block(6)
150*7c3d14c8STreehugger Robot	block(5)
151*7c3d14c8STreehugger Robot	block(4)
152*7c3d14c8STreehugger Robot	block(3)
153*7c3d14c8STreehugger Robot	block(2)
154*7c3d14c8STreehugger Robot	block(1)
155*7c3d14c8STreehugger RobotLOCAL_LABEL(div0block):
156*7c3d14c8STreehugger Robot	block(0)
157*7c3d14c8STreehugger Robot
158*7c3d14c8STreehugger Robot	mov	r0, r3
159*7c3d14c8STreehugger Robot	JMP(lr)
160*7c3d14c8STreehugger Robot#endif /* __ARM_ARCH_EXT_IDIV__ */
161*7c3d14c8STreehugger Robot
162*7c3d14c8STreehugger RobotLOCAL_LABEL(divby0):
163*7c3d14c8STreehugger Robot	mov	r0, #0
164*7c3d14c8STreehugger Robot#ifdef __ARM_EABI__
165*7c3d14c8STreehugger Robot	b	__aeabi_idiv0
166*7c3d14c8STreehugger Robot#else
167*7c3d14c8STreehugger Robot	JMP(lr)
168*7c3d14c8STreehugger Robot#endif
169*7c3d14c8STreehugger Robot
170*7c3d14c8STreehugger RobotEND_COMPILERRT_FUNCTION(__udivsi3)
171*7c3d14c8STreehugger Robot
172*7c3d14c8STreehugger RobotNO_EXEC_STACK_DIRECTIVE
173*7c3d14c8STreehugger Robot
174