1*c9945492SAndroid Build Coastguard Worker/* 2*c9945492SAndroid Build Coastguard Worker * memcpy - copy memory area 3*c9945492SAndroid Build Coastguard Worker * 4*c9945492SAndroid Build Coastguard Worker * Copyright (c) 2012-2020, Arm Limited. 5*c9945492SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT 6*c9945492SAndroid Build Coastguard Worker */ 7*c9945492SAndroid Build Coastguard Worker 8*c9945492SAndroid Build Coastguard Worker/* Assumptions: 9*c9945492SAndroid Build Coastguard Worker * 10*c9945492SAndroid Build Coastguard Worker * ARMv8-a, AArch64, unaligned accesses. 11*c9945492SAndroid Build Coastguard Worker * 12*c9945492SAndroid Build Coastguard Worker */ 13*c9945492SAndroid Build Coastguard Worker 14*c9945492SAndroid Build Coastguard Worker#define dstin x0 15*c9945492SAndroid Build Coastguard Worker#define src x1 16*c9945492SAndroid Build Coastguard Worker#define count x2 17*c9945492SAndroid Build Coastguard Worker#define dst x3 18*c9945492SAndroid Build Coastguard Worker#define srcend x4 19*c9945492SAndroid Build Coastguard Worker#define dstend x5 20*c9945492SAndroid Build Coastguard Worker#define A_l x6 21*c9945492SAndroid Build Coastguard Worker#define A_lw w6 22*c9945492SAndroid Build Coastguard Worker#define A_h x7 23*c9945492SAndroid Build Coastguard Worker#define B_l x8 24*c9945492SAndroid Build Coastguard Worker#define B_lw w8 25*c9945492SAndroid Build Coastguard Worker#define B_h x9 26*c9945492SAndroid Build Coastguard Worker#define C_l x10 27*c9945492SAndroid Build Coastguard Worker#define C_lw w10 28*c9945492SAndroid Build Coastguard Worker#define C_h x11 29*c9945492SAndroid Build Coastguard Worker#define D_l x12 30*c9945492SAndroid Build Coastguard Worker#define D_h x13 31*c9945492SAndroid Build Coastguard Worker#define E_l x14 32*c9945492SAndroid Build Coastguard Worker#define E_h x15 33*c9945492SAndroid Build Coastguard Worker#define F_l x16 34*c9945492SAndroid Build Coastguard Worker#define F_h x17 35*c9945492SAndroid Build Coastguard Worker#define G_l count 36*c9945492SAndroid Build Coastguard Worker#define G_h dst 37*c9945492SAndroid Build Coastguard Worker#define H_l src 38*c9945492SAndroid Build Coastguard Worker#define H_h srcend 39*c9945492SAndroid Build Coastguard Worker#define tmp1 x14 40*c9945492SAndroid Build Coastguard Worker 41*c9945492SAndroid Build Coastguard Worker/* This implementation of memcpy uses unaligned accesses and branchless 42*c9945492SAndroid Build Coastguard Worker sequences to keep the code small, simple and improve performance. 43*c9945492SAndroid Build Coastguard Worker 44*c9945492SAndroid Build Coastguard Worker Copies are split into 3 main cases: small copies of up to 32 bytes, medium 45*c9945492SAndroid Build Coastguard Worker copies of up to 128 bytes, and large copies. The overhead of the overlap 46*c9945492SAndroid Build Coastguard Worker check is negligible since it is only required for large copies. 47*c9945492SAndroid Build Coastguard Worker 48*c9945492SAndroid Build Coastguard Worker Large copies use a software pipelined loop processing 64 bytes per iteration. 49*c9945492SAndroid Build Coastguard Worker The destination pointer is 16-byte aligned to minimize unaligned accesses. 50*c9945492SAndroid Build Coastguard Worker The loop tail is handled by always copying 64 bytes from the end. 51*c9945492SAndroid Build Coastguard Worker*/ 52*c9945492SAndroid Build Coastguard Worker 53*c9945492SAndroid Build Coastguard Worker.global memcpy 54*c9945492SAndroid Build Coastguard Worker.type memcpy,%function 55*c9945492SAndroid Build Coastguard Workermemcpy: 56*c9945492SAndroid Build Coastguard Worker add srcend, src, count 57*c9945492SAndroid Build Coastguard Worker add dstend, dstin, count 58*c9945492SAndroid Build Coastguard Worker cmp count, 128 59*c9945492SAndroid Build Coastguard Worker b.hi .Lcopy_long 60*c9945492SAndroid Build Coastguard Worker cmp count, 32 61*c9945492SAndroid Build Coastguard Worker b.hi .Lcopy32_128 62*c9945492SAndroid Build Coastguard Worker 63*c9945492SAndroid Build Coastguard Worker /* Small copies: 0..32 bytes. */ 64*c9945492SAndroid Build Coastguard Worker cmp count, 16 65*c9945492SAndroid Build Coastguard Worker b.lo .Lcopy16 66*c9945492SAndroid Build Coastguard Worker ldp A_l, A_h, [src] 67*c9945492SAndroid Build Coastguard Worker ldp D_l, D_h, [srcend, -16] 68*c9945492SAndroid Build Coastguard Worker stp A_l, A_h, [dstin] 69*c9945492SAndroid Build Coastguard Worker stp D_l, D_h, [dstend, -16] 70*c9945492SAndroid Build Coastguard Worker ret 71*c9945492SAndroid Build Coastguard Worker 72*c9945492SAndroid Build Coastguard Worker /* Copy 8-15 bytes. */ 73*c9945492SAndroid Build Coastguard Worker.Lcopy16: 74*c9945492SAndroid Build Coastguard Worker tbz count, 3, .Lcopy8 75*c9945492SAndroid Build Coastguard Worker ldr A_l, [src] 76*c9945492SAndroid Build Coastguard Worker ldr A_h, [srcend, -8] 77*c9945492SAndroid Build Coastguard Worker str A_l, [dstin] 78*c9945492SAndroid Build Coastguard Worker str A_h, [dstend, -8] 79*c9945492SAndroid Build Coastguard Worker ret 80*c9945492SAndroid Build Coastguard Worker 81*c9945492SAndroid Build Coastguard Worker .p2align 3 82*c9945492SAndroid Build Coastguard Worker /* Copy 4-7 bytes. */ 83*c9945492SAndroid Build Coastguard Worker.Lcopy8: 84*c9945492SAndroid Build Coastguard Worker tbz count, 2, .Lcopy4 85*c9945492SAndroid Build Coastguard Worker ldr A_lw, [src] 86*c9945492SAndroid Build Coastguard Worker ldr B_lw, [srcend, -4] 87*c9945492SAndroid Build Coastguard Worker str A_lw, [dstin] 88*c9945492SAndroid Build Coastguard Worker str B_lw, [dstend, -4] 89*c9945492SAndroid Build Coastguard Worker ret 90*c9945492SAndroid Build Coastguard Worker 91*c9945492SAndroid Build Coastguard Worker /* Copy 0..3 bytes using a branchless sequence. */ 92*c9945492SAndroid Build Coastguard Worker.Lcopy4: 93*c9945492SAndroid Build Coastguard Worker cbz count, .Lcopy0 94*c9945492SAndroid Build Coastguard Worker lsr tmp1, count, 1 95*c9945492SAndroid Build Coastguard Worker ldrb A_lw, [src] 96*c9945492SAndroid Build Coastguard Worker ldrb C_lw, [srcend, -1] 97*c9945492SAndroid Build Coastguard Worker ldrb B_lw, [src, tmp1] 98*c9945492SAndroid Build Coastguard Worker strb A_lw, [dstin] 99*c9945492SAndroid Build Coastguard Worker strb B_lw, [dstin, tmp1] 100*c9945492SAndroid Build Coastguard Worker strb C_lw, [dstend, -1] 101*c9945492SAndroid Build Coastguard Worker.Lcopy0: 102*c9945492SAndroid Build Coastguard Worker ret 103*c9945492SAndroid Build Coastguard Worker 104*c9945492SAndroid Build Coastguard Worker .p2align 4 105*c9945492SAndroid Build Coastguard Worker /* Medium copies: 33..128 bytes. */ 106*c9945492SAndroid Build Coastguard Worker.Lcopy32_128: 107*c9945492SAndroid Build Coastguard Worker ldp A_l, A_h, [src] 108*c9945492SAndroid Build Coastguard Worker ldp B_l, B_h, [src, 16] 109*c9945492SAndroid Build Coastguard Worker ldp C_l, C_h, [srcend, -32] 110*c9945492SAndroid Build Coastguard Worker ldp D_l, D_h, [srcend, -16] 111*c9945492SAndroid Build Coastguard Worker cmp count, 64 112*c9945492SAndroid Build Coastguard Worker b.hi .Lcopy128 113*c9945492SAndroid Build Coastguard Worker stp A_l, A_h, [dstin] 114*c9945492SAndroid Build Coastguard Worker stp B_l, B_h, [dstin, 16] 115*c9945492SAndroid Build Coastguard Worker stp C_l, C_h, [dstend, -32] 116*c9945492SAndroid Build Coastguard Worker stp D_l, D_h, [dstend, -16] 117*c9945492SAndroid Build Coastguard Worker ret 118*c9945492SAndroid Build Coastguard Worker 119*c9945492SAndroid Build Coastguard Worker .p2align 4 120*c9945492SAndroid Build Coastguard Worker /* Copy 65..128 bytes. */ 121*c9945492SAndroid Build Coastguard Worker.Lcopy128: 122*c9945492SAndroid Build Coastguard Worker ldp E_l, E_h, [src, 32] 123*c9945492SAndroid Build Coastguard Worker ldp F_l, F_h, [src, 48] 124*c9945492SAndroid Build Coastguard Worker cmp count, 96 125*c9945492SAndroid Build Coastguard Worker b.ls .Lcopy96 126*c9945492SAndroid Build Coastguard Worker ldp G_l, G_h, [srcend, -64] 127*c9945492SAndroid Build Coastguard Worker ldp H_l, H_h, [srcend, -48] 128*c9945492SAndroid Build Coastguard Worker stp G_l, G_h, [dstend, -64] 129*c9945492SAndroid Build Coastguard Worker stp H_l, H_h, [dstend, -48] 130*c9945492SAndroid Build Coastguard Worker.Lcopy96: 131*c9945492SAndroid Build Coastguard Worker stp A_l, A_h, [dstin] 132*c9945492SAndroid Build Coastguard Worker stp B_l, B_h, [dstin, 16] 133*c9945492SAndroid Build Coastguard Worker stp E_l, E_h, [dstin, 32] 134*c9945492SAndroid Build Coastguard Worker stp F_l, F_h, [dstin, 48] 135*c9945492SAndroid Build Coastguard Worker stp C_l, C_h, [dstend, -32] 136*c9945492SAndroid Build Coastguard Worker stp D_l, D_h, [dstend, -16] 137*c9945492SAndroid Build Coastguard Worker ret 138*c9945492SAndroid Build Coastguard Worker 139*c9945492SAndroid Build Coastguard Worker .p2align 4 140*c9945492SAndroid Build Coastguard Worker /* Copy more than 128 bytes. */ 141*c9945492SAndroid Build Coastguard Worker.Lcopy_long: 142*c9945492SAndroid Build Coastguard Worker 143*c9945492SAndroid Build Coastguard Worker /* Copy 16 bytes and then align dst to 16-byte alignment. */ 144*c9945492SAndroid Build Coastguard Worker 145*c9945492SAndroid Build Coastguard Worker ldp D_l, D_h, [src] 146*c9945492SAndroid Build Coastguard Worker and tmp1, dstin, 15 147*c9945492SAndroid Build Coastguard Worker bic dst, dstin, 15 148*c9945492SAndroid Build Coastguard Worker sub src, src, tmp1 149*c9945492SAndroid Build Coastguard Worker add count, count, tmp1 /* Count is now 16 too large. */ 150*c9945492SAndroid Build Coastguard Worker ldp A_l, A_h, [src, 16] 151*c9945492SAndroid Build Coastguard Worker stp D_l, D_h, [dstin] 152*c9945492SAndroid Build Coastguard Worker ldp B_l, B_h, [src, 32] 153*c9945492SAndroid Build Coastguard Worker ldp C_l, C_h, [src, 48] 154*c9945492SAndroid Build Coastguard Worker ldp D_l, D_h, [src, 64]! 155*c9945492SAndroid Build Coastguard Worker subs count, count, 128 + 16 /* Test and readjust count. */ 156*c9945492SAndroid Build Coastguard Worker b.ls .Lcopy64_from_end 157*c9945492SAndroid Build Coastguard Worker 158*c9945492SAndroid Build Coastguard Worker.Lloop64: 159*c9945492SAndroid Build Coastguard Worker stp A_l, A_h, [dst, 16] 160*c9945492SAndroid Build Coastguard Worker ldp A_l, A_h, [src, 16] 161*c9945492SAndroid Build Coastguard Worker stp B_l, B_h, [dst, 32] 162*c9945492SAndroid Build Coastguard Worker ldp B_l, B_h, [src, 32] 163*c9945492SAndroid Build Coastguard Worker stp C_l, C_h, [dst, 48] 164*c9945492SAndroid Build Coastguard Worker ldp C_l, C_h, [src, 48] 165*c9945492SAndroid Build Coastguard Worker stp D_l, D_h, [dst, 64]! 166*c9945492SAndroid Build Coastguard Worker ldp D_l, D_h, [src, 64]! 167*c9945492SAndroid Build Coastguard Worker subs count, count, 64 168*c9945492SAndroid Build Coastguard Worker b.hi .Lloop64 169*c9945492SAndroid Build Coastguard Worker 170*c9945492SAndroid Build Coastguard Worker /* Write the last iteration and copy 64 bytes from the end. */ 171*c9945492SAndroid Build Coastguard Worker.Lcopy64_from_end: 172*c9945492SAndroid Build Coastguard Worker ldp E_l, E_h, [srcend, -64] 173*c9945492SAndroid Build Coastguard Worker stp A_l, A_h, [dst, 16] 174*c9945492SAndroid Build Coastguard Worker ldp A_l, A_h, [srcend, -48] 175*c9945492SAndroid Build Coastguard Worker stp B_l, B_h, [dst, 32] 176*c9945492SAndroid Build Coastguard Worker ldp B_l, B_h, [srcend, -32] 177*c9945492SAndroid Build Coastguard Worker stp C_l, C_h, [dst, 48] 178*c9945492SAndroid Build Coastguard Worker ldp C_l, C_h, [srcend, -16] 179*c9945492SAndroid Build Coastguard Worker stp D_l, D_h, [dst, 64] 180*c9945492SAndroid Build Coastguard Worker stp E_l, E_h, [dstend, -64] 181*c9945492SAndroid Build Coastguard Worker stp A_l, A_h, [dstend, -48] 182*c9945492SAndroid Build Coastguard Worker stp B_l, B_h, [dstend, -32] 183*c9945492SAndroid Build Coastguard Worker stp C_l, C_h, [dstend, -16] 184*c9945492SAndroid Build Coastguard Worker ret 185*c9945492SAndroid Build Coastguard Worker 186*c9945492SAndroid Build Coastguard Worker.size memcpy,.-memcpy 187