xref: /aosp_15_r20/external/coreboot/src/arch/arm/memset.S (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Based on linux/arch/arm/lib/memset.S
4 *
5 * ASM optimised string functions
6 */
7
8#include <arch/asm.h>
9#include "asmlib.h"
10
11.syntax unified
12
13ENTRY(memset)
14	ands	r3, r0, #3		@ 1 unaligned?
15	mov	ip, r0			@ preserve r0 as return value
16	bne	6f			@ 1
17/*
18 * we know that the pointer in ip is aligned to a word boundary.
19 */
201:	orr	r1, r1, r1, lsl #8
21	orr	r1, r1, r1, lsl #16
22	mov	r3, r1
23	cmp	r2, #16
24	blt	4f
25
26#if ! CALGN(1)+0
27
28/*
29 * We need 2 extra registers for this loop - use r8 and the LR
30 */
31	stmfd	sp!, {r8, lr}
32	mov	r8, r1
33	mov	lr, r1
34
352:	subs	r2, r2, #64
36	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
37	stmiage	ip!, {r1, r3, r8, lr}
38	stmiage	ip!, {r1, r3, r8, lr}
39	stmiage	ip!, {r1, r3, r8, lr}
40	bgt	2b
41	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
42/*
43 * No need to correct the count; we're only testing bits from now on
44 */
45	tst	r2, #32
46	stmiane	ip!, {r1, r3, r8, lr}
47	stmiane	ip!, {r1, r3, r8, lr}
48	tst	r2, #16
49	stmiane	ip!, {r1, r3, r8, lr}
50	ldmfd	sp!, {r8, lr}
51
52#else
53
54/*
55 * This version aligns the destination pointer in order to write
56 * whole cache lines at once.
57 */
58
59	stmfd	sp!, {r4-r8, lr}
60	mov	r4, r1
61	mov	r5, r1
62	mov	r6, r1
63	mov	r7, r1
64	mov	r8, r1
65	mov	lr, r1
66
67	cmp	r2, #96
68	tstgt	ip, #31
69	ble	3f
70
71	and	r8, ip, #31
72	rsb	r8, r8, #32
73	sub	r2, r2, r8
74	movs	r8, r8, lsl #(32 - 4)
75	stmcsia	ip!, {r4, r5, r6, r7}
76	stmmiia	ip!, {r4, r5}
77	tst	r8, #(1 << 30)
78	mov	r8, r1
79	strne	r1, [ip], #4
80
813:	subs	r2, r2, #64
82	stmiage	ip!, {r1, r3-r8, lr}
83	stmiage	ip!, {r1, r3-r8, lr}
84	bgt	3b
85	ldmfdeq	sp!, {r4-r8, pc}
86
87	tst	r2, #32
88	stmiane	ip!, {r1, r3-r8, lr}
89	tst	r2, #16
90	stmiane	ip!, {r4-r7}
91	ldmfd	sp!, {r4-r8, lr}
92
93#endif
94
954:	tst	r2, #8
96	stmiane	ip!, {r1, r3}
97	tst	r2, #4
98	strne	r1, [ip], #4
99/*
100 * When we get here, we've got less than 4 bytes to zero.  We
101 * may have an unaligned pointer as well.
102 */
1035:	tst	r2, #2
104	strbne	r1, [ip], #1
105	strbne	r1, [ip], #1
106	tst	r2, #1
107	strbne	r1, [ip], #1
108	mov	pc, lr
109
1106:	subs	r2, r2, #4		@ 1 do we have enough
111	blt	5b			@ 1 bytes to align with?
112	cmp	r3, #2			@ 1
113	strblt	r1, [ip], #1		@ 1
114	strble	r1, [ip], #1		@ 1
115	strb	r1, [ip], #1		@ 1
116	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
117	b	1b
118ENDPROC(memset)
119