xref: /aosp_15_r20/external/musl/src/string/aarch64/memcpy.S (revision c9945492fdd68bbe62686c5b452b4dc1be3f8453)
1*c9945492SAndroid Build Coastguard Worker/*
2*c9945492SAndroid Build Coastguard Worker * memcpy - copy memory area
3*c9945492SAndroid Build Coastguard Worker *
4*c9945492SAndroid Build Coastguard Worker * Copyright (c) 2012-2020, Arm Limited.
5*c9945492SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
6*c9945492SAndroid Build Coastguard Worker */
7*c9945492SAndroid Build Coastguard Worker
8*c9945492SAndroid Build Coastguard Worker/* Assumptions:
9*c9945492SAndroid Build Coastguard Worker *
10*c9945492SAndroid Build Coastguard Worker * ARMv8-a, AArch64, unaligned accesses.
11*c9945492SAndroid Build Coastguard Worker *
12*c9945492SAndroid Build Coastguard Worker */
13*c9945492SAndroid Build Coastguard Worker
14*c9945492SAndroid Build Coastguard Worker#define dstin   x0
15*c9945492SAndroid Build Coastguard Worker#define src     x1
16*c9945492SAndroid Build Coastguard Worker#define count   x2
17*c9945492SAndroid Build Coastguard Worker#define dst     x3
18*c9945492SAndroid Build Coastguard Worker#define srcend  x4
19*c9945492SAndroid Build Coastguard Worker#define dstend  x5
20*c9945492SAndroid Build Coastguard Worker#define A_l     x6
21*c9945492SAndroid Build Coastguard Worker#define A_lw    w6
22*c9945492SAndroid Build Coastguard Worker#define A_h     x7
23*c9945492SAndroid Build Coastguard Worker#define B_l     x8
24*c9945492SAndroid Build Coastguard Worker#define B_lw    w8
25*c9945492SAndroid Build Coastguard Worker#define B_h     x9
26*c9945492SAndroid Build Coastguard Worker#define C_l     x10
27*c9945492SAndroid Build Coastguard Worker#define C_lw    w10
28*c9945492SAndroid Build Coastguard Worker#define C_h     x11
29*c9945492SAndroid Build Coastguard Worker#define D_l     x12
30*c9945492SAndroid Build Coastguard Worker#define D_h     x13
31*c9945492SAndroid Build Coastguard Worker#define E_l     x14
32*c9945492SAndroid Build Coastguard Worker#define E_h     x15
33*c9945492SAndroid Build Coastguard Worker#define F_l     x16
34*c9945492SAndroid Build Coastguard Worker#define F_h     x17
35*c9945492SAndroid Build Coastguard Worker#define G_l     count
36*c9945492SAndroid Build Coastguard Worker#define G_h     dst
37*c9945492SAndroid Build Coastguard Worker#define H_l     src
38*c9945492SAndroid Build Coastguard Worker#define H_h     srcend
39*c9945492SAndroid Build Coastguard Worker#define tmp1    x14
40*c9945492SAndroid Build Coastguard Worker
41*c9945492SAndroid Build Coastguard Worker/* This implementation of memcpy uses unaligned accesses and branchless
42*c9945492SAndroid Build Coastguard Worker   sequences to keep the code small, simple and improve performance.
43*c9945492SAndroid Build Coastguard Worker
44*c9945492SAndroid Build Coastguard Worker   Copies are split into 3 main cases: small copies of up to 32 bytes, medium
45*c9945492SAndroid Build Coastguard Worker   copies of up to 128 bytes, and large copies.  The overhead of the overlap
46*c9945492SAndroid Build Coastguard Worker   check is negligible since it is only required for large copies.
47*c9945492SAndroid Build Coastguard Worker
48*c9945492SAndroid Build Coastguard Worker   Large copies use a software pipelined loop processing 64 bytes per iteration.
49*c9945492SAndroid Build Coastguard Worker   The destination pointer is 16-byte aligned to minimize unaligned accesses.
50*c9945492SAndroid Build Coastguard Worker   The loop tail is handled by always copying 64 bytes from the end.
51*c9945492SAndroid Build Coastguard Worker*/
52*c9945492SAndroid Build Coastguard Worker
53*c9945492SAndroid Build Coastguard Worker.global memcpy
54*c9945492SAndroid Build Coastguard Worker.type memcpy,%function
55*c9945492SAndroid Build Coastguard Workermemcpy:
56*c9945492SAndroid Build Coastguard Worker	add     srcend, src, count
57*c9945492SAndroid Build Coastguard Worker	add     dstend, dstin, count
58*c9945492SAndroid Build Coastguard Worker	cmp     count, 128
59*c9945492SAndroid Build Coastguard Worker	b.hi    .Lcopy_long
60*c9945492SAndroid Build Coastguard Worker	cmp     count, 32
61*c9945492SAndroid Build Coastguard Worker	b.hi    .Lcopy32_128
62*c9945492SAndroid Build Coastguard Worker
63*c9945492SAndroid Build Coastguard Worker	/* Small copies: 0..32 bytes.  */
64*c9945492SAndroid Build Coastguard Worker	cmp     count, 16
65*c9945492SAndroid Build Coastguard Worker	b.lo    .Lcopy16
66*c9945492SAndroid Build Coastguard Worker	ldp     A_l, A_h, [src]
67*c9945492SAndroid Build Coastguard Worker	ldp     D_l, D_h, [srcend, -16]
68*c9945492SAndroid Build Coastguard Worker	stp     A_l, A_h, [dstin]
69*c9945492SAndroid Build Coastguard Worker	stp     D_l, D_h, [dstend, -16]
70*c9945492SAndroid Build Coastguard Worker	ret
71*c9945492SAndroid Build Coastguard Worker
72*c9945492SAndroid Build Coastguard Worker	/* Copy 8-15 bytes.  */
73*c9945492SAndroid Build Coastguard Worker.Lcopy16:
74*c9945492SAndroid Build Coastguard Worker	tbz     count, 3, .Lcopy8
75*c9945492SAndroid Build Coastguard Worker	ldr     A_l, [src]
76*c9945492SAndroid Build Coastguard Worker	ldr     A_h, [srcend, -8]
77*c9945492SAndroid Build Coastguard Worker	str     A_l, [dstin]
78*c9945492SAndroid Build Coastguard Worker	str     A_h, [dstend, -8]
79*c9945492SAndroid Build Coastguard Worker	ret
80*c9945492SAndroid Build Coastguard Worker
81*c9945492SAndroid Build Coastguard Worker	.p2align 3
82*c9945492SAndroid Build Coastguard Worker	/* Copy 4-7 bytes.  */
83*c9945492SAndroid Build Coastguard Worker.Lcopy8:
84*c9945492SAndroid Build Coastguard Worker	tbz     count, 2, .Lcopy4
85*c9945492SAndroid Build Coastguard Worker	ldr     A_lw, [src]
86*c9945492SAndroid Build Coastguard Worker	ldr     B_lw, [srcend, -4]
87*c9945492SAndroid Build Coastguard Worker	str     A_lw, [dstin]
88*c9945492SAndroid Build Coastguard Worker	str     B_lw, [dstend, -4]
89*c9945492SAndroid Build Coastguard Worker	ret
90*c9945492SAndroid Build Coastguard Worker
91*c9945492SAndroid Build Coastguard Worker	/* Copy 0..3 bytes using a branchless sequence.  */
92*c9945492SAndroid Build Coastguard Worker.Lcopy4:
93*c9945492SAndroid Build Coastguard Worker	cbz     count, .Lcopy0
94*c9945492SAndroid Build Coastguard Worker	lsr     tmp1, count, 1
95*c9945492SAndroid Build Coastguard Worker	ldrb    A_lw, [src]
96*c9945492SAndroid Build Coastguard Worker	ldrb    C_lw, [srcend, -1]
97*c9945492SAndroid Build Coastguard Worker	ldrb    B_lw, [src, tmp1]
98*c9945492SAndroid Build Coastguard Worker	strb    A_lw, [dstin]
99*c9945492SAndroid Build Coastguard Worker	strb    B_lw, [dstin, tmp1]
100*c9945492SAndroid Build Coastguard Worker	strb    C_lw, [dstend, -1]
101*c9945492SAndroid Build Coastguard Worker.Lcopy0:
102*c9945492SAndroid Build Coastguard Worker	ret
103*c9945492SAndroid Build Coastguard Worker
104*c9945492SAndroid Build Coastguard Worker	.p2align 4
105*c9945492SAndroid Build Coastguard Worker	/* Medium copies: 33..128 bytes.  */
106*c9945492SAndroid Build Coastguard Worker.Lcopy32_128:
107*c9945492SAndroid Build Coastguard Worker	ldp     A_l, A_h, [src]
108*c9945492SAndroid Build Coastguard Worker	ldp     B_l, B_h, [src, 16]
109*c9945492SAndroid Build Coastguard Worker	ldp     C_l, C_h, [srcend, -32]
110*c9945492SAndroid Build Coastguard Worker	ldp     D_l, D_h, [srcend, -16]
111*c9945492SAndroid Build Coastguard Worker	cmp     count, 64
112*c9945492SAndroid Build Coastguard Worker	b.hi    .Lcopy128
113*c9945492SAndroid Build Coastguard Worker	stp     A_l, A_h, [dstin]
114*c9945492SAndroid Build Coastguard Worker	stp     B_l, B_h, [dstin, 16]
115*c9945492SAndroid Build Coastguard Worker	stp     C_l, C_h, [dstend, -32]
116*c9945492SAndroid Build Coastguard Worker	stp     D_l, D_h, [dstend, -16]
117*c9945492SAndroid Build Coastguard Worker	ret
118*c9945492SAndroid Build Coastguard Worker
119*c9945492SAndroid Build Coastguard Worker	.p2align 4
120*c9945492SAndroid Build Coastguard Worker	/* Copy 65..128 bytes.  */
121*c9945492SAndroid Build Coastguard Worker.Lcopy128:
122*c9945492SAndroid Build Coastguard Worker	ldp     E_l, E_h, [src, 32]
123*c9945492SAndroid Build Coastguard Worker	ldp     F_l, F_h, [src, 48]
124*c9945492SAndroid Build Coastguard Worker	cmp     count, 96
125*c9945492SAndroid Build Coastguard Worker	b.ls    .Lcopy96
126*c9945492SAndroid Build Coastguard Worker	ldp     G_l, G_h, [srcend, -64]
127*c9945492SAndroid Build Coastguard Worker	ldp     H_l, H_h, [srcend, -48]
128*c9945492SAndroid Build Coastguard Worker	stp     G_l, G_h, [dstend, -64]
129*c9945492SAndroid Build Coastguard Worker	stp     H_l, H_h, [dstend, -48]
130*c9945492SAndroid Build Coastguard Worker.Lcopy96:
131*c9945492SAndroid Build Coastguard Worker	stp     A_l, A_h, [dstin]
132*c9945492SAndroid Build Coastguard Worker	stp     B_l, B_h, [dstin, 16]
133*c9945492SAndroid Build Coastguard Worker	stp     E_l, E_h, [dstin, 32]
134*c9945492SAndroid Build Coastguard Worker	stp     F_l, F_h, [dstin, 48]
135*c9945492SAndroid Build Coastguard Worker	stp     C_l, C_h, [dstend, -32]
136*c9945492SAndroid Build Coastguard Worker	stp     D_l, D_h, [dstend, -16]
137*c9945492SAndroid Build Coastguard Worker	ret
138*c9945492SAndroid Build Coastguard Worker
139*c9945492SAndroid Build Coastguard Worker	.p2align 4
140*c9945492SAndroid Build Coastguard Worker	/* Copy more than 128 bytes.  */
141*c9945492SAndroid Build Coastguard Worker.Lcopy_long:
142*c9945492SAndroid Build Coastguard Worker
143*c9945492SAndroid Build Coastguard Worker	/* Copy 16 bytes and then align dst to 16-byte alignment.  */
144*c9945492SAndroid Build Coastguard Worker
145*c9945492SAndroid Build Coastguard Worker	ldp     D_l, D_h, [src]
146*c9945492SAndroid Build Coastguard Worker	and     tmp1, dstin, 15
147*c9945492SAndroid Build Coastguard Worker	bic     dst, dstin, 15
148*c9945492SAndroid Build Coastguard Worker	sub     src, src, tmp1
149*c9945492SAndroid Build Coastguard Worker	add     count, count, tmp1      /* Count is now 16 too large.  */
150*c9945492SAndroid Build Coastguard Worker	ldp     A_l, A_h, [src, 16]
151*c9945492SAndroid Build Coastguard Worker	stp     D_l, D_h, [dstin]
152*c9945492SAndroid Build Coastguard Worker	ldp     B_l, B_h, [src, 32]
153*c9945492SAndroid Build Coastguard Worker	ldp     C_l, C_h, [src, 48]
154*c9945492SAndroid Build Coastguard Worker	ldp     D_l, D_h, [src, 64]!
155*c9945492SAndroid Build Coastguard Worker	subs    count, count, 128 + 16  /* Test and readjust count.  */
156*c9945492SAndroid Build Coastguard Worker	b.ls    .Lcopy64_from_end
157*c9945492SAndroid Build Coastguard Worker
158*c9945492SAndroid Build Coastguard Worker.Lloop64:
159*c9945492SAndroid Build Coastguard Worker	stp     A_l, A_h, [dst, 16]
160*c9945492SAndroid Build Coastguard Worker	ldp     A_l, A_h, [src, 16]
161*c9945492SAndroid Build Coastguard Worker	stp     B_l, B_h, [dst, 32]
162*c9945492SAndroid Build Coastguard Worker	ldp     B_l, B_h, [src, 32]
163*c9945492SAndroid Build Coastguard Worker	stp     C_l, C_h, [dst, 48]
164*c9945492SAndroid Build Coastguard Worker	ldp     C_l, C_h, [src, 48]
165*c9945492SAndroid Build Coastguard Worker	stp     D_l, D_h, [dst, 64]!
166*c9945492SAndroid Build Coastguard Worker	ldp     D_l, D_h, [src, 64]!
167*c9945492SAndroid Build Coastguard Worker	subs    count, count, 64
168*c9945492SAndroid Build Coastguard Worker	b.hi    .Lloop64
169*c9945492SAndroid Build Coastguard Worker
170*c9945492SAndroid Build Coastguard Worker	/* Write the last iteration and copy 64 bytes from the end.  */
171*c9945492SAndroid Build Coastguard Worker.Lcopy64_from_end:
172*c9945492SAndroid Build Coastguard Worker	ldp     E_l, E_h, [srcend, -64]
173*c9945492SAndroid Build Coastguard Worker	stp     A_l, A_h, [dst, 16]
174*c9945492SAndroid Build Coastguard Worker	ldp     A_l, A_h, [srcend, -48]
175*c9945492SAndroid Build Coastguard Worker	stp     B_l, B_h, [dst, 32]
176*c9945492SAndroid Build Coastguard Worker	ldp     B_l, B_h, [srcend, -32]
177*c9945492SAndroid Build Coastguard Worker	stp     C_l, C_h, [dst, 48]
178*c9945492SAndroid Build Coastguard Worker	ldp     C_l, C_h, [srcend, -16]
179*c9945492SAndroid Build Coastguard Worker	stp     D_l, D_h, [dst, 64]
180*c9945492SAndroid Build Coastguard Worker	stp     E_l, E_h, [dstend, -64]
181*c9945492SAndroid Build Coastguard Worker	stp     A_l, A_h, [dstend, -48]
182*c9945492SAndroid Build Coastguard Worker	stp     B_l, B_h, [dstend, -32]
183*c9945492SAndroid Build Coastguard Worker	stp     C_l, C_h, [dstend, -16]
184*c9945492SAndroid Build Coastguard Worker	ret
185*c9945492SAndroid Build Coastguard Worker
186*c9945492SAndroid Build Coastguard Worker.size memcpy,.-memcpy
187