xref: /aosp_15_r20/external/coreboot/src/arch/arm/libgcc/lib1funcs.S (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
4 */
5
6
7#if defined __GNUC__
8
9#include <arch/asm.h>
10
11.syntax unified
12
13.macro ARM_DIV_BODY dividend, divisor, result, curbit
14
15#if __COREBOOT_ARM_ARCH__ >= 5
16
17	clz	\curbit, \divisor
18	clz	\result, \dividend
19	sub	\result, \curbit, \result
20	mov	\curbit, #1
21	mov	\divisor, \divisor, lsl \result
22	mov	\curbit, \curbit, lsl \result
23	mov	\result, #0
24
25#else
26
27	@ Initially shift the divisor left 3 bits if possible,
28	@ set curbit accordingly.  This allows for curbit to be located
29	@ at the left end of each 4 bit nibbles in the division loop
30	@ to save one loop in most cases.
31	tst	\divisor, #0xe0000000
32	moveq	\divisor, \divisor, lsl #3
33	moveq	\curbit, #8
34	movne	\curbit, #1
35
36	@ Unless the divisor is very big, shift it up in multiples of
37	@ four bits, since this is the amount of unwinding in the main
38	@ division loop.  Continue shifting until the divisor is
39	@ larger than the dividend.
401:	cmp	\divisor, #0x10000000
41	cmplo	\divisor, \dividend
42	movlo	\divisor, \divisor, lsl #4
43	movlo	\curbit, \curbit, lsl #4
44	blo	1b
45
46	@ For very big divisors, we must shift it a bit at a time, or
47	@ we will be in danger of overflowing.
481:	cmp	\divisor, #0x80000000
49	cmplo	\divisor, \dividend
50	movlo	\divisor, \divisor, lsl #1
51	movlo	\curbit, \curbit, lsl #1
52	blo	1b
53
54	mov	\result, #0
55
56#endif
57
58	@ Division loop
591:	cmp	\dividend, \divisor
60	subhs	\dividend, \dividend, \divisor
61	orrhs	\result,   \result,   \curbit
62	cmp	\dividend, \divisor,  lsr #1
63	subhs	\dividend, \dividend, \divisor, lsr #1
64	orrhs	\result,   \result,   \curbit,  lsr #1
65	cmp	\dividend, \divisor,  lsr #2
66	subhs	\dividend, \dividend, \divisor, lsr #2
67	orrhs	\result,   \result,   \curbit,  lsr #2
68	cmp	\dividend, \divisor,  lsr #3
69	subhs	\dividend, \dividend, \divisor, lsr #3
70	orrhs	\result,   \result,   \curbit,  lsr #3
71	cmp	\dividend, #0			@ Early termination?
72	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
73	movne	\divisor,  \divisor, lsr #4
74	bne	1b
75
76.endm
77
78
79.macro ARM_DIV2_ORDER divisor, order
80
81#if __COREBOOT_ARM_ARCH__ >= 5
82
83	clz	\order, \divisor
84	rsb	\order, \order, #31
85
86#else
87
88	cmp	\divisor, #(1 << 16)
89	movhs	\divisor, \divisor, lsr #16
90	movhs	\order, #16
91	movlo	\order, #0
92
93	cmp	\divisor, #(1 << 8)
94	movhs	\divisor, \divisor, lsr #8
95	addhs	\order, \order, #8
96
97	cmp	\divisor, #(1 << 4)
98	movhs	\divisor, \divisor, lsr #4
99	addhs	\order, \order, #4
100
101	cmp	\divisor, #(1 << 2)
102	addhi	\order, \order, #3
103	addls	\order, \order, \divisor, lsr #1
104
105#endif
106
107.endm
108
109
110.macro ARM_MOD_BODY dividend, divisor, order, spare
111
112#if __COREBOOT_ARM_ARCH__ >= 5
113
114	clz	\order, \divisor
115	clz	\spare, \dividend
116	sub	\order, \order, \spare
117	mov	\divisor, \divisor, lsl \order
118
119#else
120
121	mov	\order, #0
122
123	@ Unless the divisor is very big, shift it up in multiples of
124	@ four bits, since this is the amount of unwinding in the main
125	@ division loop.  Continue shifting until the divisor is
126	@ larger than the dividend.
1271:	cmp	\divisor, #0x10000000
128	cmplo	\divisor, \dividend
129	movlo	\divisor, \divisor, lsl #4
130	addlo	\order, \order, #4
131	blo	1b
132
133	@ For very big divisors, we must shift it a bit at a time, or
134	@ we will be in danger of overflowing.
1351:	cmp	\divisor, #0x80000000
136	cmplo	\divisor, \dividend
137	movlo	\divisor, \divisor, lsl #1
138	addlo	\order, \order, #1
139	blo	1b
140
141#endif
142
143	@ Perform all needed substractions to keep only the reminder.
144	@ Do comparisons in batch of 4 first.
145	subs	\order, \order, #3		@ yes, 3 is intended here
146	blt	2f
147
1481:	cmp	\dividend, \divisor
149	subhs	\dividend, \dividend, \divisor
150	cmp	\dividend, \divisor,  lsr #1
151	subhs	\dividend, \dividend, \divisor, lsr #1
152	cmp	\dividend, \divisor,  lsr #2
153	subhs	\dividend, \dividend, \divisor, lsr #2
154	cmp	\dividend, \divisor,  lsr #3
155	subhs	\dividend, \dividend, \divisor, lsr #3
156	cmp	\dividend, #1
157	mov	\divisor, \divisor, lsr #4
158	subsge	\order, \order, #4
159	bge	1b
160
161	tst	\order, #3
162	teqne	\dividend, #0
163	beq	5f
164
165	@ Either 1, 2 or 3 comparison/substractions are left.
1662:	cmn	\order, #2
167	blt	4f
168	beq	3f
169	cmp	\dividend, \divisor
170	subhs	\dividend, \dividend, \divisor
171	mov	\divisor,  \divisor,  lsr #1
1723:	cmp	\dividend, \divisor
173	subhs	\dividend, \dividend, \divisor
174	mov	\divisor,  \divisor,  lsr #1
1754:	cmp	\dividend, \divisor
176	subhs	\dividend, \dividend, \divisor
1775:
178.endm
179
180
181ENTRY(__udivsi3)
182.global __aeabi_uidiv
183__aeabi_uidiv:
184
185	subs	r2, r1, #1
186	moveq	pc, lr
187	bcc	Ldiv0
188	cmp	r0, r1
189	bls	11f
190	tst	r1, r2
191	beq	12f
192
193	ARM_DIV_BODY r0, r1, r2, r3
194
195	mov	r0, r2
196	mov	pc, lr
197
19811:	moveq	r0, #1
199	movne	r0, #0
200	mov	pc, lr
201
20212:	ARM_DIV2_ORDER r1, r2
203
204	mov	r0, r0, lsr r2
205	mov	pc, lr
206
207.type __aeabi_uidiv, %function
208.size __aeabi_uidiv, .-__aeabi_uidiv
209ENDPROC(__udivsi3)
210
211ENTRY(__umodsi3)
212
213	subs	r2, r1, #1			@ compare divisor with 1
214	bcc	Ldiv0
215	cmpne	r0, r1				@ compare dividend with divisor
216	moveq   r0, #0
217	tsthi	r1, r2				@ see if divisor is power of 2
218	andeq	r0, r0, r2
219	movls	pc, lr
220
221	ARM_MOD_BODY r0, r1, r2, r3
222
223	mov	pc, lr
224
225ENDPROC(__umodsi3)
226
227ENTRY(__divsi3)
228.global __aeabi_idiv
229__aeabi_idiv:
230
231	cmp	r1, #0
232	eor	ip, r0, r1			@ save the sign of the result.
233	beq	Ldiv0
234	rsbmi	r1, r1, #0			@ loops below use unsigned.
235	subs	r2, r1, #1			@ division by 1 or -1 ?
236	beq	10f
237	movs	r3, r0
238	rsbmi	r3, r0, #0			@ positive dividend value
239	cmp	r3, r1
240	bls	11f
241	tst	r1, r2				@ divisor is power of 2 ?
242	beq	12f
243
244	ARM_DIV_BODY r3, r1, r0, r2
245
246	cmp	ip, #0
247	rsbmi	r0, r0, #0
248	mov	pc, lr
249
25010:	teq	ip, r0				@ same sign ?
251	rsbmi	r0, r0, #0
252	mov	pc, lr
253
25411:	movlo	r0, #0
255	moveq	r0, ip, asr #31
256	orreq	r0, r0, #1
257	mov	pc, lr
258
25912:	ARM_DIV2_ORDER r1, r2
260
261	cmp	ip, #0
262	mov	r0, r3, lsr r2
263	rsbmi	r0, r0, #0
264	mov	pc, lr
265
266.type __aeabi_idiv, %function
267.size __aeabi_idiv, .-__aeabi_idiv
268ENDPROC(__divsi3)
269
270ENTRY(__modsi3)
271
272	cmp	r1, #0
273	beq	Ldiv0
274	rsbmi	r1, r1, #0			@ loops below use unsigned.
275	movs	ip, r0				@ preserve sign of dividend
276	rsbmi	r0, r0, #0			@ if negative make positive
277	subs	r2, r1, #1			@ compare divisor with 1
278	cmpne	r0, r1				@ compare dividend with divisor
279	moveq	r0, #0
280	tsthi	r1, r2				@ see if divisor is power of 2
281	andeq	r0, r0, r2
282	bls	10f
283
284	ARM_MOD_BODY r0, r1, r2, r3
285
28610:	cmp	ip, #0
287	rsbmi	r0, r0, #0
288	mov	pc, lr
289
290ENDPROC(__modsi3)
291
292ENTRY(__aeabi_uidivmod)
293
294	stmfd	sp!, {r0, r1, ip, lr}
295	bl	__aeabi_uidiv
296	ldmfd	sp!, {r1, r2, ip, lr}
297	mul	r3, r0, r2
298	sub	r1, r1, r3
299	mov	pc, lr
300
301ENDPROC(__aeabi_uidivmod)
302
303ENTRY(__aeabi_idivmod)
304	stmfd	sp!, {r0, r1, ip, lr}
305	bl	__aeabi_idiv
306	ldmfd	sp!, {r1, r2, ip, lr}
307	mul	r3, r0, r2
308	sub	r1, r1, r3
309	mov	pc, lr
310
311ENDPROC(__aeabi_idivmod)
312
313
314Ldiv0:
315	str	lr, [sp, #-8]!
316	bl	__div0
317	mov	r0, #0			@ About as wrong as it could be.
318	ldr	pc, [sp], #8
319ENDPROC(Ldiv0)
320
321#endif
322