1*412f47f9SXin Li/* 2*412f47f9SXin Li * memset - fill memory with a constant 3*412f47f9SXin Li * 4*412f47f9SXin Li * Copyright (c) 2010-2021, Arm Limited. 5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6*412f47f9SXin Li */ 7*412f47f9SXin Li 8*412f47f9SXin Li/* 9*412f47f9SXin Li Written by Dave Gilbert <[email protected]> 10*412f47f9SXin Li 11*412f47f9SXin Li This memset routine is optimised on a Cortex-A9 and should work on 12*412f47f9SXin Li all ARMv7 processors. 13*412f47f9SXin Li 14*412f47f9SXin Li */ 15*412f47f9SXin Li 16*412f47f9SXin Li .syntax unified 17*412f47f9SXin Li .arch armv7-a 18*412f47f9SXin Li 19*412f47f9SXin Li@ 2011-08-30 david.gilbert@linaro.org 20*412f47f9SXin Li@ Extracted from local git 2f11b436 21*412f47f9SXin Li 22*412f47f9SXin Li@ this lets us check a flag in a 00/ff byte easily in either endianness 23*412f47f9SXin Li#ifdef __ARMEB__ 24*412f47f9SXin Li#define CHARTSTMASK(c) 1<<(31-(c*8)) 25*412f47f9SXin Li#else 26*412f47f9SXin Li#define CHARTSTMASK(c) 1<<(c*8) 27*412f47f9SXin Li#endif 28*412f47f9SXin Li .thumb 29*412f47f9SXin Li 30*412f47f9SXin Li@ --------------------------------------------------------------------------- 31*412f47f9SXin Li .thumb_func 32*412f47f9SXin Li .align 2 33*412f47f9SXin Li .p2align 4,,15 34*412f47f9SXin Li .global __memset_arm 35*412f47f9SXin Li .type __memset_arm,%function 36*412f47f9SXin Li__memset_arm: 37*412f47f9SXin Li @ r0 = address 38*412f47f9SXin Li @ r1 = character 39*412f47f9SXin Li @ r2 = count 40*412f47f9SXin Li @ returns original address in r0 41*412f47f9SXin Li 42*412f47f9SXin Li mov r3, r0 @ Leave r0 alone 43*412f47f9SXin Li cbz r2, 10f @ Exit if 0 length 44*412f47f9SXin Li 45*412f47f9SXin Li tst r0, #7 46*412f47f9SXin Li beq 2f @ Already aligned 47*412f47f9SXin Li 48*412f47f9SXin Li @ Ok, so we're misaligned here 49*412f47f9SXin Li1: 50*412f47f9SXin Li strb r1, [r3], #1 51*412f47f9SXin Li subs r2,r2,#1 52*412f47f9SXin Li tst r3, #7 53*412f47f9SXin Li cbz r2, 10f @ Exit if we hit the end 54*412f47f9SXin Li bne 1b @ go round again if still misaligned 55*412f47f9SXin Li 56*412f47f9SXin Li2: 57*412f47f9SXin Li @ OK, so we're aligned 58*412f47f9SXin Li push {r4,r5,r6,r7} 59*412f47f9SXin Li bics r4, r2, #15 @ if less than 16 bytes then need to finish it off 60*412f47f9SXin Li beq 5f 61*412f47f9SXin Li 62*412f47f9SXin Li3: 63*412f47f9SXin Li @ POSIX says that ch is cast to an unsigned char. A uxtb is one 64*412f47f9SXin Li @ byte and takes two cycles, where an AND is four bytes but one 65*412f47f9SXin Li @ cycle. 66*412f47f9SXin Li and r1, #0xFF 67*412f47f9SXin Li orr r1, r1, r1, lsl#8 @ Same character into all bytes 68*412f47f9SXin Li orr r1, r1, r1, lsl#16 69*412f47f9SXin Li mov r5,r1 70*412f47f9SXin Li mov r6,r1 71*412f47f9SXin Li mov r7,r1 72*412f47f9SXin Li 73*412f47f9SXin Li4: 74*412f47f9SXin Li subs r4,r4,#16 75*412f47f9SXin Li stmia r3!,{r1,r5,r6,r7} 76*412f47f9SXin Li bne 4b 77*412f47f9SXin Li and r2,r2,#15 78*412f47f9SXin Li 79*412f47f9SXin Li @ At this point we're still aligned and we have upto align-1 bytes left to right 80*412f47f9SXin Li @ we can avoid some of the byte-at-a time now by testing for some big chunks 81*412f47f9SXin Li tst r2,#8 82*412f47f9SXin Li itt ne 83*412f47f9SXin Li subne r2,r2,#8 84*412f47f9SXin Li stmiane r3!,{r1,r5} 85*412f47f9SXin Li 86*412f47f9SXin Li5: 87*412f47f9SXin Li pop {r4,r5,r6,r7} 88*412f47f9SXin Li cbz r2, 10f 89*412f47f9SXin Li 90*412f47f9SXin Li @ Got to do any last < alignment bytes 91*412f47f9SXin Li6: 92*412f47f9SXin Li subs r2,r2,#1 93*412f47f9SXin Li strb r1,[r3],#1 94*412f47f9SXin Li bne 6b 95*412f47f9SXin Li 96*412f47f9SXin Li10: 97*412f47f9SXin Li bx lr @ goodbye 98*412f47f9SXin Li .size __memset_arm, . - __memset_arm 99