1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ignore
6
7// runtime·duffzero is a Duff's device for zeroing memory.
8// The compiler jumps to computed addresses within
9// the routine to zero chunks of memory.
10// Do not change duffzero without also
11// changing the uses in cmd/compile/internal/*/*.go.
12
13// runtime·duffcopy is a Duff's device for copying memory.
14// The compiler jumps to computed addresses within
15// the routine to copy chunks of memory.
16// Source and destination must not overlap.
17// Do not change duffcopy without also
18// changing the uses in cmd/compile/internal/*/*.go.
19
20// See the zero* and copy* generators below
21// for architecture-specific comments.
22
23// mkduff generates duff_*.s.
24package main
25
26import (
27	"bytes"
28	"fmt"
29	"io"
30	"log"
31	"os"
32)
33
34func main() {
35	gen("amd64", notags, zeroAMD64, copyAMD64)
36	gen("386", notags, zero386, copy386)
37	gen("arm", notags, zeroARM, copyARM)
38	gen("arm64", notags, zeroARM64, copyARM64)
39	gen("loong64", notags, zeroLOONG64, copyLOONG64)
40	gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x)
41	gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x)
42	gen("riscv64", notags, zeroRISCV64, copyRISCV64)
43}
44
45func gen(arch string, tags, zero, copy func(io.Writer)) {
46	var buf bytes.Buffer
47
48	fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.")
49	fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
50	fmt.Fprintln(&buf, "// See mkduff.go for comments.")
51	tags(&buf)
52	fmt.Fprintln(&buf, "#include \"textflag.h\"")
53	fmt.Fprintln(&buf)
54	zero(&buf)
55	fmt.Fprintln(&buf)
56	copy(&buf)
57
58	if err := os.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil {
59		log.Fatalln(err)
60	}
61}
62
63func notags(w io.Writer) { fmt.Fprintln(w) }
64
65func zeroAMD64(w io.Writer) {
66	// X15: zero
67	// DI: ptr to memory to be zeroed
68	// DI is updated as a side effect.
69	fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
70	for i := 0; i < 16; i++ {
71		fmt.Fprintln(w, "\tMOVUPS\tX15,(DI)")
72		fmt.Fprintln(w, "\tMOVUPS\tX15,16(DI)")
73		fmt.Fprintln(w, "\tMOVUPS\tX15,32(DI)")
74		fmt.Fprintln(w, "\tMOVUPS\tX15,48(DI)")
75		fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags
76		fmt.Fprintln(w)
77	}
78	fmt.Fprintln(w, "\tRET")
79}
80
81func copyAMD64(w io.Writer) {
82	// SI: ptr to source memory
83	// DI: ptr to destination memory
84	// SI and DI are updated as a side effect.
85	//
86	// This is equivalent to a sequence of MOVSQ but
87	// for some reason that is 3.5x slower than this code.
88	fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
89	for i := 0; i < 64; i++ {
90		fmt.Fprintln(w, "\tMOVUPS\t(SI), X0")
91		fmt.Fprintln(w, "\tADDQ\t$16, SI")
92		fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)")
93		fmt.Fprintln(w, "\tADDQ\t$16, DI")
94		fmt.Fprintln(w)
95	}
96	fmt.Fprintln(w, "\tRET")
97}
98
99func zero386(w io.Writer) {
100	// AX: zero
101	// DI: ptr to memory to be zeroed
102	// DI is updated as a side effect.
103	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
104	for i := 0; i < 128; i++ {
105		fmt.Fprintln(w, "\tSTOSL")
106	}
107	fmt.Fprintln(w, "\tRET")
108}
109
110func copy386(w io.Writer) {
111	// SI: ptr to source memory
112	// DI: ptr to destination memory
113	// SI and DI are updated as a side effect.
114	//
115	// This is equivalent to a sequence of MOVSL but
116	// for some reason MOVSL is really slow.
117	fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
118	for i := 0; i < 128; i++ {
119		fmt.Fprintln(w, "\tMOVL\t(SI), CX")
120		fmt.Fprintln(w, "\tADDL\t$4, SI")
121		fmt.Fprintln(w, "\tMOVL\tCX, (DI)")
122		fmt.Fprintln(w, "\tADDL\t$4, DI")
123		fmt.Fprintln(w)
124	}
125	fmt.Fprintln(w, "\tRET")
126}
127
128func zeroARM(w io.Writer) {
129	// R0: zero
130	// R1: ptr to memory to be zeroed
131	// R1 is updated as a side effect.
132	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
133	for i := 0; i < 128; i++ {
134		fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)")
135	}
136	fmt.Fprintln(w, "\tRET")
137}
138
139func copyARM(w io.Writer) {
140	// R0: scratch space
141	// R1: ptr to source memory
142	// R2: ptr to destination memory
143	// R1 and R2 are updated as a side effect
144	fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
145	for i := 0; i < 128; i++ {
146		fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0")
147		fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)")
148		fmt.Fprintln(w)
149	}
150	fmt.Fprintln(w, "\tRET")
151}
152
153func zeroARM64(w io.Writer) {
154	// ZR: always zero
155	// R20: ptr to memory to be zeroed
156	// On return, R20 points to the last zeroed dword.
157	fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
158	for i := 0; i < 63; i++ {
159		fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R20)")
160	}
161	fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R20)")
162	fmt.Fprintln(w, "\tRET")
163}
164
165func copyARM64(w io.Writer) {
166	// R20: ptr to source memory
167	// R21: ptr to destination memory
168	// R26, R27 (aka REGTMP): scratch space
169	// R20 and R21 are updated as a side effect
170	fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
171
172	for i := 0; i < 64; i++ {
173		fmt.Fprintln(w, "\tLDP.P\t16(R20), (R26, R27)")
174		fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R21)")
175		fmt.Fprintln(w)
176	}
177	fmt.Fprintln(w, "\tRET")
178}
179
180func zeroLOONG64(w io.Writer) {
181	// R0: always zero
182	// R19 (aka REGRT1): ptr to memory to be zeroed
183	// On return, R19 points to the last zeroed dword.
184	fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
185	for i := 0; i < 128; i++ {
186		fmt.Fprintln(w, "\tMOVV\tR0, (R20)")
187		fmt.Fprintln(w, "\tADDV\t$8, R20")
188	}
189	fmt.Fprintln(w, "\tRET")
190}
191
192func copyLOONG64(w io.Writer) {
193	fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
194	for i := 0; i < 128; i++ {
195		fmt.Fprintln(w, "\tMOVV\t(R20), R30")
196		fmt.Fprintln(w, "\tADDV\t$8, R20")
197		fmt.Fprintln(w, "\tMOVV\tR30, (R21)")
198		fmt.Fprintln(w, "\tADDV\t$8, R21")
199		fmt.Fprintln(w)
200	}
201	fmt.Fprintln(w, "\tRET")
202}
203
204func tagsPPC64x(w io.Writer) {
205	fmt.Fprintln(w)
206	fmt.Fprintln(w, "//go:build ppc64 || ppc64le")
207	fmt.Fprintln(w)
208}
209
210func zeroPPC64x(w io.Writer) {
211	// R0: always zero
212	// R3 (aka REGRT1): ptr to memory to be zeroed - 8
213	// On return, R3 points to the last zeroed dword.
214	fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
215	for i := 0; i < 128; i++ {
216		fmt.Fprintln(w, "\tMOVDU\tR0, 8(R20)")
217	}
218	fmt.Fprintln(w, "\tRET")
219}
220
221func copyPPC64x(w io.Writer) {
222	// duffcopy is not used on PPC64.
223	fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
224	for i := 0; i < 128; i++ {
225		fmt.Fprintln(w, "\tMOVDU\t8(R20), R5")
226		fmt.Fprintln(w, "\tMOVDU\tR5, 8(R21)")
227	}
228	fmt.Fprintln(w, "\tRET")
229}
230
231func tagsMIPS64x(w io.Writer) {
232	fmt.Fprintln(w)
233	fmt.Fprintln(w, "//go:build mips64 || mips64le")
234	fmt.Fprintln(w)
235}
236
237func zeroMIPS64x(w io.Writer) {
238	// R0: always zero
239	// R1 (aka REGRT1): ptr to memory to be zeroed - 8
240	// On return, R1 points to the last zeroed dword.
241	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
242	for i := 0; i < 128; i++ {
243		fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)")
244		fmt.Fprintln(w, "\tADDV\t$8, R1")
245	}
246	fmt.Fprintln(w, "\tRET")
247}
248
249func copyMIPS64x(w io.Writer) {
250	fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
251	for i := 0; i < 128; i++ {
252		fmt.Fprintln(w, "\tMOVV\t(R1), R23")
253		fmt.Fprintln(w, "\tADDV\t$8, R1")
254		fmt.Fprintln(w, "\tMOVV\tR23, (R2)")
255		fmt.Fprintln(w, "\tADDV\t$8, R2")
256		fmt.Fprintln(w)
257	}
258	fmt.Fprintln(w, "\tRET")
259}
260
261func zeroRISCV64(w io.Writer) {
262	// ZERO: always zero
263	// X25: ptr to memory to be zeroed
264	// X25 is updated as a side effect.
265	fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
266	for i := 0; i < 128; i++ {
267		fmt.Fprintln(w, "\tMOV\tZERO, (X25)")
268		fmt.Fprintln(w, "\tADD\t$8, X25")
269	}
270	fmt.Fprintln(w, "\tRET")
271}
272
273func copyRISCV64(w io.Writer) {
274	// X24: ptr to source memory
275	// X25: ptr to destination memory
276	// X24 and X25 are updated as a side effect
277	fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0")
278	for i := 0; i < 128; i++ {
279		fmt.Fprintln(w, "\tMOV\t(X24), X31")
280		fmt.Fprintln(w, "\tADD\t$8, X24")
281		fmt.Fprintln(w, "\tMOV\tX31, (X25)")
282		fmt.Fprintln(w, "\tADD\t$8, X25")
283		fmt.Fprintln(w)
284	}
285	fmt.Fprintln(w, "\tRET")
286}
287