1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ppc64 || ppc64le
6
7#include "textflag.h"
8
9// See memmove Go doc for important implementation constraints.
10
11// func memmove(to, from unsafe.Pointer, n uintptr)
12
13// target address
14#define TGT R3
15// source address
16#define SRC R4
17// length to move
18#define LEN R5
19// number of doublewords
20#define DWORDS R6
21// number of bytes < 8
22#define BYTES R7
23// const 16 used as index
24#define IDX16 R8
25// temp used for copies, etc.
26#define TMP R9
27// number of 64 byte chunks
28#define QWORDS R10
29// index values
30#define IDX32 R14
31#define IDX48 R15
32#define OCTWORDS R16
33
34TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
35	// R3 = TGT = to
36	// R4 = SRC = from
37	// R5 = LEN = n
38
39	// Determine if there are doublewords to
40	// copy so a more efficient move can be done
41check:
42#ifdef GOPPC64_power10
43	CMP	LEN, $16
44	BGT	mcopy
45	SLD	$56, LEN, TMP
46	LXVL	SRC, TMP, V0
47	STXVL	V0, TGT, TMP
48	RET
49#endif
50mcopy:
51	ANDCC	$7, LEN, BYTES	// R7: bytes to copy
52	SRD	$3, LEN, DWORDS	// R6: double words to copy
53	MOVFL	CR0, CR3	// save CR from ANDCC
54	CMP	DWORDS, $0, CR1	// CR1[EQ] set if no double words to copy
55
56	// Determine overlap by subtracting dest - src and comparing against the
57	// length.  This catches the cases where src and dest are in different types
58	// of storage such as stack and static to avoid doing backward move when not
59	// necessary.
60
61	SUB	SRC, TGT, TMP	// dest - src
62	CMPU	TMP, LEN, CR2	// < len?
63	BC	12, 8, backward // BLT CR2 backward
64
65	// Copying forward if no overlap.
66
67	BC	12, 6, checkbytes	// BEQ CR1, checkbytes
68	SRDCC	$3, DWORDS, OCTWORDS	// 64 byte chunks?
69	MOVD	$16, IDX16
70	BEQ	lt64gt8			// < 64 bytes
71
72	// Prepare for moves of 64 bytes at a time.
73
74forward64setup:
75	DCBTST	(TGT)			// prepare data cache
76	DCBT	(SRC)
77	MOVD	OCTWORDS, CTR		// Number of 64 byte chunks
78	MOVD	$32, IDX32
79	MOVD	$48, IDX48
80	PCALIGN	$16
81
82forward64:
83	LXVD2X	(R0)(SRC), VS32		// load 64 bytes
84	LXVD2X	(IDX16)(SRC), VS33
85	LXVD2X	(IDX32)(SRC), VS34
86	LXVD2X	(IDX48)(SRC), VS35
87	ADD	$64, SRC
88	STXVD2X	VS32, (R0)(TGT)		// store 64 bytes
89	STXVD2X	VS33, (IDX16)(TGT)
90	STXVD2X	VS34, (IDX32)(TGT)
91	STXVD2X VS35, (IDX48)(TGT)
92	ADD	$64,TGT			// bump up for next set
93	BC	16, 0, forward64	// continue
94	ANDCC	$7, DWORDS		// remaining doublewords
95	BEQ	checkbytes		// only bytes remain
96
97lt64gt8:
98	CMP	DWORDS, $4
99	BLT	lt32gt8
100	LXVD2X	(R0)(SRC), VS32
101	LXVD2X	(IDX16)(SRC), VS33
102	ADD	$-4, DWORDS
103	STXVD2X	VS32, (R0)(TGT)
104	STXVD2X	VS33, (IDX16)(TGT)
105	ADD	$32, SRC
106	ADD	$32, TGT
107
108lt32gt8:
109	// At this point >= 8 and < 32
110	// Move 16 bytes if possible
111	CMP     DWORDS, $2
112	BLT     lt16
113	LXVD2X	(R0)(SRC), VS32
114	ADD	$-2, DWORDS
115	STXVD2X	VS32, (R0)(TGT)
116	ADD     $16, SRC
117	ADD     $16, TGT
118
119lt16:	// Move 8 bytes if possible
120	CMP     DWORDS, $1
121	BLT     checkbytes
122#ifdef GOPPC64_power10
123	ADD	$8, BYTES
124	SLD	$56, BYTES, TMP
125	LXVL	SRC, TMP, V0
126	STXVL	V0, TGT, TMP
127	RET
128#endif
129
130	MOVD    0(SRC), TMP
131	ADD	$8, SRC
132	MOVD    TMP, 0(TGT)
133	ADD     $8, TGT
134checkbytes:
135	BC	12, 14, LR		// BEQ lr
136#ifdef GOPPC64_power10
137	SLD	$56, BYTES, TMP
138	LXVL	SRC, TMP, V0
139	STXVL	V0, TGT, TMP
140	RET
141#endif
142lt8:	// Move word if possible
143	CMP BYTES, $4
144	BLT lt4
145	MOVWZ 0(SRC), TMP
146	ADD $-4, BYTES
147	MOVW TMP, 0(TGT)
148	ADD $4, SRC
149	ADD $4, TGT
150lt4:	// Move halfword if possible
151	CMP BYTES, $2
152	BLT lt2
153	MOVHZ 0(SRC), TMP
154	ADD $-2, BYTES
155	MOVH TMP, 0(TGT)
156	ADD $2, SRC
157	ADD $2, TGT
158lt2:	// Move last byte if 1 left
159	CMP BYTES, $1
160	BC 12, 0, LR	// ble lr
161	MOVBZ 0(SRC), TMP
162	MOVBZ TMP, 0(TGT)
163	RET
164
165backward:
166	// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
167	// R3 and R4 are advanced to the end of the destination/source buffers
168	// respectively and moved back as we copy.
169
170	ADD	LEN, SRC, SRC		// end of source
171	ADD	TGT, LEN, TGT		// end of dest
172
173	BEQ	nobackwardtail		// earlier condition
174
175	MOVD	BYTES, CTR			// bytes to move
176
177backwardtailloop:
178	MOVBZ 	-1(SRC), TMP		// point to last byte
179	SUB	$1,SRC
180	MOVBZ 	TMP, -1(TGT)
181	SUB	$1,TGT
182	BDNZ	backwardtailloop
183
184nobackwardtail:
185	BC	4, 5, LR		// blelr cr1, return if DWORDS == 0
186	SRDCC	$2,DWORDS,QWORDS	// Compute number of 32B blocks and compare to 0
187	BNE	backward32setup		// If QWORDS != 0, start the 32B copy loop.
188
189backward24:
190	// DWORDS is a value between 1-3.
191	CMP	DWORDS, $2
192
193	MOVD 	-8(SRC), TMP
194	MOVD 	TMP, -8(TGT)
195	BC	12, 0, LR		// bltlr, return if DWORDS == 1
196
197	MOVD 	-16(SRC), TMP
198	MOVD 	TMP, -16(TGT)
199	BC	12, 2, LR		// beqlr, return if DWORDS == 2
200
201	MOVD 	-24(SRC), TMP
202	MOVD 	TMP, -24(TGT)
203	RET
204
205backward32setup:
206	ANDCC   $3,DWORDS		// Compute remaining DWORDS and compare to 0
207	MOVD	QWORDS, CTR		// set up loop ctr
208	MOVD	$16, IDX16		// 32 bytes at a time
209	PCALIGN	$16
210
211backward32loop:
212	SUB	$32, TGT
213	SUB	$32, SRC
214	LXVD2X	(R0)(SRC), VS32		// load 16x2 bytes
215	LXVD2X	(IDX16)(SRC), VS33
216	STXVD2X	VS32, (R0)(TGT)		// store 16x2 bytes
217	STXVD2X	VS33, (IDX16)(TGT)
218	BDNZ	backward32loop
219	BC	12, 2, LR		// beqlr, return if DWORDS == 0
220	BR	backward24
221