1*8d67ca89SAndroid Build Coastguard Worker/* Copyright (c) 2012, Linaro Limited 2*8d67ca89SAndroid Build Coastguard Worker All rights reserved. 3*8d67ca89SAndroid Build Coastguard Worker Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. 4*8d67ca89SAndroid Build Coastguard Worker Redistribution and use in source and binary forms, with or without 5*8d67ca89SAndroid Build Coastguard Worker modification, are permitted provided that the following conditions are met: 6*8d67ca89SAndroid Build Coastguard Worker * Redistributions of source code must retain the above copyright 7*8d67ca89SAndroid Build Coastguard Worker notice, this list of conditions and the following disclaimer. 8*8d67ca89SAndroid Build Coastguard Worker * Redistributions in binary form must reproduce the above copyright 9*8d67ca89SAndroid Build Coastguard Worker notice, this list of conditions and the following disclaimer in the 10*8d67ca89SAndroid Build Coastguard Worker documentation and/or other materials provided with the distribution. 11*8d67ca89SAndroid Build Coastguard Worker * Neither the name of the Linaro nor the 12*8d67ca89SAndroid Build Coastguard Worker names of its contributors may be used to endorse or promote products 13*8d67ca89SAndroid Build Coastguard Worker derived from this software without specific prior written permission. 14*8d67ca89SAndroid Build Coastguard Worker THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15*8d67ca89SAndroid Build Coastguard Worker "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16*8d67ca89SAndroid Build Coastguard Worker LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17*8d67ca89SAndroid Build Coastguard Worker A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18*8d67ca89SAndroid Build Coastguard Worker HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19*8d67ca89SAndroid Build Coastguard Worker SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20*8d67ca89SAndroid Build Coastguard Worker LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21*8d67ca89SAndroid Build Coastguard Worker DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22*8d67ca89SAndroid Build Coastguard Worker THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*8d67ca89SAndroid Build Coastguard Worker (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24*8d67ca89SAndroid Build Coastguard Worker OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*8d67ca89SAndroid Build Coastguard Worker*/ 26*8d67ca89SAndroid Build Coastguard Worker/* Assumptions: 27*8d67ca89SAndroid Build Coastguard Worker * 28*8d67ca89SAndroid Build Coastguard Worker * ARMv8-a, AArch64 29*8d67ca89SAndroid Build Coastguard Worker * Unaligned accesses 30*8d67ca89SAndroid Build Coastguard Worker * 31*8d67ca89SAndroid Build Coastguard Worker */ 32*8d67ca89SAndroid Build Coastguard Worker#include <private/bionic_asm.h> 33*8d67ca89SAndroid Build Coastguard Worker 34*8d67ca89SAndroid Build Coastguard Worker#define dstin x0 35*8d67ca89SAndroid Build Coastguard Worker#define val w1 36*8d67ca89SAndroid Build Coastguard Worker#define count x2 37*8d67ca89SAndroid Build Coastguard Worker#define tmp1 x3 38*8d67ca89SAndroid Build Coastguard Worker#define tmp1w w3 39*8d67ca89SAndroid Build Coastguard Worker#define tmp2 x4 40*8d67ca89SAndroid Build Coastguard Worker#define tmp2w w4 41*8d67ca89SAndroid Build Coastguard Worker#define zva_len_x x5 42*8d67ca89SAndroid Build Coastguard Worker#define zva_len w5 43*8d67ca89SAndroid Build Coastguard Worker#define zva_bits_x x6 44*8d67ca89SAndroid Build Coastguard Worker#define A_l x7 45*8d67ca89SAndroid Build Coastguard Worker#define A_lw w7 46*8d67ca89SAndroid Build Coastguard Worker#define dst x8 47*8d67ca89SAndroid Build Coastguard Worker#define tmp3w w9 48*8d67ca89SAndroid Build Coastguard Worker#define tmp4 x10 49*8d67ca89SAndroid Build Coastguard Worker#define SMALL_BUFFER_SIZE 96 50*8d67ca89SAndroid Build Coastguard Worker 51*8d67ca89SAndroid Build Coastguard WorkerENTRY(__memset_aarch64_nt) 52*8d67ca89SAndroid Build Coastguard Worker mov dst, dstin /* Preserve return value. */ 53*8d67ca89SAndroid Build Coastguard Worker ands A_lw, val, #255 54*8d67ca89SAndroid Build Coastguard Worker b.eq .Lzero_mem /* Use DC ZVA instruction if the val = 0 */ 55*8d67ca89SAndroid Build Coastguard Worker orr A_lw, A_lw, A_lw, lsl #8 56*8d67ca89SAndroid Build Coastguard Worker orr A_lw, A_lw, A_lw, lsl #16 57*8d67ca89SAndroid Build Coastguard Worker orr A_l, A_l, A_l, lsl #32 58*8d67ca89SAndroid Build Coastguard Worker.Ltail_maybe_long: 59*8d67ca89SAndroid Build Coastguard Worker cmp count, #64 60*8d67ca89SAndroid Build Coastguard Worker b.ge .Lnot_short 61*8d67ca89SAndroid Build Coastguard Worker.Ltail_maybe_tiny: 62*8d67ca89SAndroid Build Coastguard Worker cmp count, #15 63*8d67ca89SAndroid Build Coastguard Worker b.le .Ltail15tiny 64*8d67ca89SAndroid Build Coastguard Worker.Ltail63: 65*8d67ca89SAndroid Build Coastguard Worker ands tmp1, count, #0x30 66*8d67ca89SAndroid Build Coastguard Worker b.eq .Ltail15 67*8d67ca89SAndroid Build Coastguard Worker add dst, dst, tmp1 68*8d67ca89SAndroid Build Coastguard Worker cmp tmp1w, #0x20 69*8d67ca89SAndroid Build Coastguard Worker b.eq 1f 70*8d67ca89SAndroid Build Coastguard Worker b.lt 2f 71*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #-48] 72*8d67ca89SAndroid Build Coastguard Worker1: 73*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #-32] 74*8d67ca89SAndroid Build Coastguard Worker2: 75*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #-16] 76*8d67ca89SAndroid Build Coastguard Worker.Ltail15: 77*8d67ca89SAndroid Build Coastguard Worker and count, count, #15 78*8d67ca89SAndroid Build Coastguard Worker add dst, dst, count 79*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ 80*8d67ca89SAndroid Build Coastguard Worker ret 81*8d67ca89SAndroid Build Coastguard Worker.Ltail15tiny: 82*8d67ca89SAndroid Build Coastguard Worker /* Set up to 15 bytes. Does not assume earlier memory 83*8d67ca89SAndroid Build Coastguard Worker being set. */ 84*8d67ca89SAndroid Build Coastguard Worker tbz count, #3, 1f 85*8d67ca89SAndroid Build Coastguard Worker str A_l, [dst], #8 86*8d67ca89SAndroid Build Coastguard Worker1: 87*8d67ca89SAndroid Build Coastguard Worker tbz count, #2, 1f 88*8d67ca89SAndroid Build Coastguard Worker str A_lw, [dst], #4 89*8d67ca89SAndroid Build Coastguard Worker1: 90*8d67ca89SAndroid Build Coastguard Worker tbz count, #1, 1f 91*8d67ca89SAndroid Build Coastguard Worker strh A_lw, [dst], #2 92*8d67ca89SAndroid Build Coastguard Worker1: 93*8d67ca89SAndroid Build Coastguard Worker tbz count, #0, 1f 94*8d67ca89SAndroid Build Coastguard Worker strb A_lw, [dst] 95*8d67ca89SAndroid Build Coastguard Worker1: 96*8d67ca89SAndroid Build Coastguard Worker ret 97*8d67ca89SAndroid Build Coastguard Worker /* Critical loop. Start at a new cache line boundary. Assuming 98*8d67ca89SAndroid Build Coastguard Worker * 64 bytes per line, this ensures the entire loop is in one line. */ 99*8d67ca89SAndroid Build Coastguard Worker .p2align 6 100*8d67ca89SAndroid Build Coastguard Worker.Lnot_short: 101*8d67ca89SAndroid Build Coastguard Worker mov tmp4, #SMALL_BUFFER_SIZE 102*8d67ca89SAndroid Build Coastguard Worker cmp count, tmp4, LSL#10 103*8d67ca89SAndroid Build Coastguard Worker /* Use non-temporal instruction if count > SMALL_BUFFER_SIZE */ 104*8d67ca89SAndroid Build Coastguard Worker bgt L(not_short_nt) 105*8d67ca89SAndroid Build Coastguard Worker neg tmp2, dst 106*8d67ca89SAndroid Build Coastguard Worker ands tmp2, tmp2, #15 107*8d67ca89SAndroid Build Coastguard Worker b.eq 2f 108*8d67ca89SAndroid Build Coastguard Worker /* Bring DST to 128-bit (16-byte) alignment. We know that there's 109*8d67ca89SAndroid Build Coastguard Worker * more than that to set, so we simply store 16 bytes and advance by 110*8d67ca89SAndroid Build Coastguard Worker * the amount required to reach alignment. */ 111*8d67ca89SAndroid Build Coastguard Worker sub count, count, tmp2 112*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst] 113*8d67ca89SAndroid Build Coastguard Worker add dst, dst, tmp2 114*8d67ca89SAndroid Build Coastguard Worker /* There may be less than 63 bytes to go now. */ 115*8d67ca89SAndroid Build Coastguard Worker cmp count, #63 116*8d67ca89SAndroid Build Coastguard Worker b.le .Ltail63 117*8d67ca89SAndroid Build Coastguard Worker2: 118*8d67ca89SAndroid Build Coastguard Worker sub dst, dst, #16 /* Pre-bias. */ 119*8d67ca89SAndroid Build Coastguard Worker sub count, count, #64 120*8d67ca89SAndroid Build Coastguard Worker1: 121*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #16] 122*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #32] 123*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #48] 124*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #64]! 125*8d67ca89SAndroid Build Coastguard Worker subs count, count, #64 126*8d67ca89SAndroid Build Coastguard Worker b.ge 1b 127*8d67ca89SAndroid Build Coastguard Worker tst count, #0x3f 128*8d67ca89SAndroid Build Coastguard Worker add dst, dst, #16 129*8d67ca89SAndroid Build Coastguard Worker b.ne .Ltail63 130*8d67ca89SAndroid Build Coastguard Worker ret 131*8d67ca89SAndroid Build Coastguard Worker.Lnot_short_nt: 132*8d67ca89SAndroid Build Coastguard Worker neg tmp2, dst 133*8d67ca89SAndroid Build Coastguard Worker ands tmp2, tmp2, #15 134*8d67ca89SAndroid Build Coastguard Worker b.eq 2f 135*8d67ca89SAndroid Build Coastguard Worker /* Bring DST to 128-bit (16-byte) alignment. We know that there's 136*8d67ca89SAndroid Build Coastguard Worker * more than that to set, so we simply store 16 bytes and advance by 137*8d67ca89SAndroid Build Coastguard Worker * the amount required to reach alignment. */ 138*8d67ca89SAndroid Build Coastguard Worker sub count, count, tmp2 139*8d67ca89SAndroid Build Coastguard Worker stnp A_l, A_l, [dst] 140*8d67ca89SAndroid Build Coastguard Worker add dst, dst, tmp2 141*8d67ca89SAndroid Build Coastguard Worker /* There may be less than 63 bytes to go now. */ 142*8d67ca89SAndroid Build Coastguard Worker cmp count, #63 143*8d67ca89SAndroid Build Coastguard Worker b.le .Ltail63 144*8d67ca89SAndroid Build Coastguard Worker2: 145*8d67ca89SAndroid Build Coastguard Worker sub dst, dst, #16 /* Pre-bias. */ 146*8d67ca89SAndroid Build Coastguard Worker sub count, count, #64 147*8d67ca89SAndroid Build Coastguard Worker1: 148*8d67ca89SAndroid Build Coastguard Worker stnp A_l, A_l, [dst, #16] 149*8d67ca89SAndroid Build Coastguard Worker stnp A_l, A_l, [dst, #32] 150*8d67ca89SAndroid Build Coastguard Worker stnp A_l, A_l, [dst, #48] 151*8d67ca89SAndroid Build Coastguard Worker stnp A_l, A_l, [dst, #64] 152*8d67ca89SAndroid Build Coastguard Worker add dst, dst, #64 153*8d67ca89SAndroid Build Coastguard Worker subs count, count, #64 154*8d67ca89SAndroid Build Coastguard Worker b.ge 1b 155*8d67ca89SAndroid Build Coastguard Worker tst count, #0x3f 156*8d67ca89SAndroid Build Coastguard Worker add dst, dst, #16 157*8d67ca89SAndroid Build Coastguard Worker b.ne .Ltail63 158*8d67ca89SAndroid Build Coastguard Worker ret 159*8d67ca89SAndroid Build Coastguard Worker.Lzero_mem: 160*8d67ca89SAndroid Build Coastguard Worker mov A_l, #0 161*8d67ca89SAndroid Build Coastguard Worker cmp count, #63 162*8d67ca89SAndroid Build Coastguard Worker b.le .Ltail_maybe_tiny 163*8d67ca89SAndroid Build Coastguard Worker neg tmp2, dst 164*8d67ca89SAndroid Build Coastguard Worker ands tmp2, tmp2, #15 165*8d67ca89SAndroid Build Coastguard Worker b.eq 1f 166*8d67ca89SAndroid Build Coastguard Worker sub count, count, tmp2 167*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst] 168*8d67ca89SAndroid Build Coastguard Worker add dst, dst, tmp2 169*8d67ca89SAndroid Build Coastguard Worker cmp count, #63 170*8d67ca89SAndroid Build Coastguard Worker b.le .Ltail63 171*8d67ca89SAndroid Build Coastguard Worker1: 172*8d67ca89SAndroid Build Coastguard Worker /* For zeroing small amounts of memory, it's not worth setting up 173*8d67ca89SAndroid Build Coastguard Worker * the line-clear code. */ 174*8d67ca89SAndroid Build Coastguard Worker cmp count, #128 175*8d67ca89SAndroid Build Coastguard Worker b.lt .Lnot_short 176*8d67ca89SAndroid Build Coastguard Worker mrs tmp1, dczid_el0 177*8d67ca89SAndroid Build Coastguard Worker tbnz tmp1, #4, .Lnot_short 178*8d67ca89SAndroid Build Coastguard Worker mov tmp3w, #4 179*8d67ca89SAndroid Build Coastguard Worker and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ 180*8d67ca89SAndroid Build Coastguard Worker lsl zva_len, tmp3w, zva_len 181*8d67ca89SAndroid Build Coastguard Worker.Lzero_by_line: 182*8d67ca89SAndroid Build Coastguard Worker /* Compute how far we need to go to become suitably aligned. We're 183*8d67ca89SAndroid Build Coastguard Worker * already at quad-word alignment. */ 184*8d67ca89SAndroid Build Coastguard Worker cmp count, zva_len_x 185*8d67ca89SAndroid Build Coastguard Worker b.lt .Lnot_short /* Not enough to reach alignment. */ 186*8d67ca89SAndroid Build Coastguard Worker sub zva_bits_x, zva_len_x, #1 187*8d67ca89SAndroid Build Coastguard Worker neg tmp2, dst 188*8d67ca89SAndroid Build Coastguard Worker ands tmp2, tmp2, zva_bits_x 189*8d67ca89SAndroid Build Coastguard Worker b.eq 1f /* Already aligned. */ 190*8d67ca89SAndroid Build Coastguard Worker /* Not aligned, check that there's enough to copy after alignment. */ 191*8d67ca89SAndroid Build Coastguard Worker sub tmp1, count, tmp2 192*8d67ca89SAndroid Build Coastguard Worker cmp tmp1, #64 193*8d67ca89SAndroid Build Coastguard Worker ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ 194*8d67ca89SAndroid Build Coastguard Worker b.lt .Lnot_short 195*8d67ca89SAndroid Build Coastguard Worker /* We know that there's at least 64 bytes to zero and that it's safe 196*8d67ca89SAndroid Build Coastguard Worker * to overrun by 64 bytes. */ 197*8d67ca89SAndroid Build Coastguard Worker mov count, tmp1 198*8d67ca89SAndroid Build Coastguard Worker2: 199*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst] 200*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #16] 201*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #32] 202*8d67ca89SAndroid Build Coastguard Worker subs tmp2, tmp2, #64 203*8d67ca89SAndroid Build Coastguard Worker stp A_l, A_l, [dst, #48] 204*8d67ca89SAndroid Build Coastguard Worker add dst, dst, #64 205*8d67ca89SAndroid Build Coastguard Worker b.ge 2b 206*8d67ca89SAndroid Build Coastguard Worker /* We've overrun a bit, so adjust dst downwards. */ 207*8d67ca89SAndroid Build Coastguard Worker add dst, dst, tmp2 208*8d67ca89SAndroid Build Coastguard Worker1: 209*8d67ca89SAndroid Build Coastguard Worker sub count, count, zva_len_x 210*8d67ca89SAndroid Build Coastguard Worker3: 211*8d67ca89SAndroid Build Coastguard Worker dc zva, dst 212*8d67ca89SAndroid Build Coastguard Worker add dst, dst, zva_len_x 213*8d67ca89SAndroid Build Coastguard Worker subs count, count, zva_len_x 214*8d67ca89SAndroid Build Coastguard Worker b.ge 3b 215*8d67ca89SAndroid Build Coastguard Worker ands count, count, zva_bits_x 216*8d67ca89SAndroid Build Coastguard Worker b.ne .Ltail_maybe_long 217*8d67ca89SAndroid Build Coastguard Worker ret 218*8d67ca89SAndroid Build Coastguard WorkerEND(__memset_aarch64_nt) 219