1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Based on linux/arch/arm/lib/memset.S 4 * 5 * ASM optimised string functions 6 */ 7 8#include <arch/asm.h> 9#include "asmlib.h" 10 11.syntax unified 12 13ENTRY(memset) 14 ands r3, r0, #3 @ 1 unaligned? 15 mov ip, r0 @ preserve r0 as return value 16 bne 6f @ 1 17/* 18 * we know that the pointer in ip is aligned to a word boundary. 19 */ 201: orr r1, r1, r1, lsl #8 21 orr r1, r1, r1, lsl #16 22 mov r3, r1 23 cmp r2, #16 24 blt 4f 25 26#if ! CALGN(1)+0 27 28/* 29 * We need 2 extra registers for this loop - use r8 and the LR 30 */ 31 stmfd sp!, {r8, lr} 32 mov r8, r1 33 mov lr, r1 34 352: subs r2, r2, #64 36 stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. 37 stmiage ip!, {r1, r3, r8, lr} 38 stmiage ip!, {r1, r3, r8, lr} 39 stmiage ip!, {r1, r3, r8, lr} 40 bgt 2b 41 ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. 42/* 43 * No need to correct the count; we're only testing bits from now on 44 */ 45 tst r2, #32 46 stmiane ip!, {r1, r3, r8, lr} 47 stmiane ip!, {r1, r3, r8, lr} 48 tst r2, #16 49 stmiane ip!, {r1, r3, r8, lr} 50 ldmfd sp!, {r8, lr} 51 52#else 53 54/* 55 * This version aligns the destination pointer in order to write 56 * whole cache lines at once. 57 */ 58 59 stmfd sp!, {r4-r8, lr} 60 mov r4, r1 61 mov r5, r1 62 mov r6, r1 63 mov r7, r1 64 mov r8, r1 65 mov lr, r1 66 67 cmp r2, #96 68 tstgt ip, #31 69 ble 3f 70 71 and r8, ip, #31 72 rsb r8, r8, #32 73 sub r2, r2, r8 74 movs r8, r8, lsl #(32 - 4) 75 stmcsia ip!, {r4, r5, r6, r7} 76 stmmiia ip!, {r4, r5} 77 tst r8, #(1 << 30) 78 mov r8, r1 79 strne r1, [ip], #4 80 813: subs r2, r2, #64 82 stmiage ip!, {r1, r3-r8, lr} 83 stmiage ip!, {r1, r3-r8, lr} 84 bgt 3b 85 ldmfdeq sp!, {r4-r8, pc} 86 87 tst r2, #32 88 stmiane ip!, {r1, r3-r8, lr} 89 tst r2, #16 90 stmiane ip!, {r4-r7} 91 ldmfd sp!, {r4-r8, lr} 92 93#endif 94 954: tst r2, #8 96 stmiane ip!, {r1, r3} 97 tst r2, #4 98 strne r1, [ip], #4 99/* 100 * When we get here, we've got less than 4 bytes to zero. We 101 * may have an unaligned pointer as well. 102 */ 1035: tst r2, #2 104 strbne r1, [ip], #1 105 strbne r1, [ip], #1 106 tst r2, #1 107 strbne r1, [ip], #1 108 mov pc, lr 109 1106: subs r2, r2, #4 @ 1 do we have enough 111 blt 5b @ 1 bytes to align with? 112 cmp r3, #2 @ 1 113 strblt r1, [ip], #1 @ 1 114 strble r1, [ip], #1 @ 1 115 strb r1, [ip], #1 @ 1 116 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 117 b 1b 118ENDPROC(memset) 119