1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ignore
6
7// mkpreempt generates the asyncPreempt functions for each
8// architecture.
9package main
10
11import (
12	"flag"
13	"fmt"
14	"io"
15	"log"
16	"os"
17	"strings"
18)
19
20// Copied from cmd/compile/internal/ssa/gen/*Ops.go
21
22var regNames386 = []string{
23	"AX",
24	"CX",
25	"DX",
26	"BX",
27	"SP",
28	"BP",
29	"SI",
30	"DI",
31	"X0",
32	"X1",
33	"X2",
34	"X3",
35	"X4",
36	"X5",
37	"X6",
38	"X7",
39}
40
41var regNamesAMD64 = []string{
42	"AX",
43	"CX",
44	"DX",
45	"BX",
46	"SP",
47	"BP",
48	"SI",
49	"DI",
50	"R8",
51	"R9",
52	"R10",
53	"R11",
54	"R12",
55	"R13",
56	"R14",
57	"R15",
58	"X0",
59	"X1",
60	"X2",
61	"X3",
62	"X4",
63	"X5",
64	"X6",
65	"X7",
66	"X8",
67	"X9",
68	"X10",
69	"X11",
70	"X12",
71	"X13",
72	"X14",
73	"X15",
74}
75
76var out io.Writer
77
78var arches = map[string]func(){
79	"386":     gen386,
80	"amd64":   genAMD64,
81	"arm":     genARM,
82	"arm64":   genARM64,
83	"loong64": genLoong64,
84	"mips64x": func() { genMIPS(true) },
85	"mipsx":   func() { genMIPS(false) },
86	"ppc64x":  genPPC64,
87	"riscv64": genRISCV64,
88	"s390x":   genS390X,
89	"wasm":    genWasm,
90}
91var beLe = map[string]bool{"mips64x": true, "mipsx": true, "ppc64x": true}
92
93func main() {
94	flag.Parse()
95	if flag.NArg() > 0 {
96		out = os.Stdout
97		for _, arch := range flag.Args() {
98			gen, ok := arches[arch]
99			if !ok {
100				log.Fatalf("unknown arch %s", arch)
101			}
102			header(arch)
103			gen()
104		}
105		return
106	}
107
108	for arch, gen := range arches {
109		f, err := os.Create(fmt.Sprintf("preempt_%s.s", arch))
110		if err != nil {
111			log.Fatal(err)
112		}
113		out = f
114		header(arch)
115		gen()
116		if err := f.Close(); err != nil {
117			log.Fatal(err)
118		}
119	}
120}
121
122func header(arch string) {
123	fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n")
124	if beLe[arch] {
125		base := arch[:len(arch)-1]
126		fmt.Fprintf(out, "//go:build %s || %sle\n\n", base, base)
127	}
128	fmt.Fprintf(out, "#include \"go_asm.h\"\n")
129	if arch == "amd64" {
130		fmt.Fprintf(out, "#include \"asm_amd64.h\"\n")
131	}
132	fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
133	fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
134}
135
136func p(f string, args ...any) {
137	fmted := fmt.Sprintf(f, args...)
138	fmt.Fprintf(out, "\t%s\n", strings.ReplaceAll(fmted, "\n", "\n\t"))
139}
140
141func label(l string) {
142	fmt.Fprintf(out, "%s\n", l)
143}
144
145type layout struct {
146	stack int
147	regs  []regPos
148	sp    string // stack pointer register
149}
150
151type regPos struct {
152	pos int
153
154	saveOp    string
155	restoreOp string
156	reg       string
157
158	// If this register requires special save and restore, these
159	// give those operations with a %d placeholder for the stack
160	// offset.
161	save, restore string
162}
163
164func (l *layout) add(op, reg string, size int) {
165	l.regs = append(l.regs, regPos{saveOp: op, restoreOp: op, reg: reg, pos: l.stack})
166	l.stack += size
167}
168
169func (l *layout) add2(sop, rop, reg string, size int) {
170	l.regs = append(l.regs, regPos{saveOp: sop, restoreOp: rop, reg: reg, pos: l.stack})
171	l.stack += size
172}
173
174func (l *layout) addSpecial(save, restore string, size int) {
175	l.regs = append(l.regs, regPos{save: save, restore: restore, pos: l.stack})
176	l.stack += size
177}
178
179func (l *layout) save() {
180	for _, reg := range l.regs {
181		if reg.save != "" {
182			p(reg.save, reg.pos)
183		} else {
184			p("%s %s, %d(%s)", reg.saveOp, reg.reg, reg.pos, l.sp)
185		}
186	}
187}
188
189func (l *layout) restore() {
190	for i := len(l.regs) - 1; i >= 0; i-- {
191		reg := l.regs[i]
192		if reg.restore != "" {
193			p(reg.restore, reg.pos)
194		} else {
195			p("%s %d(%s), %s", reg.restoreOp, reg.pos, l.sp, reg.reg)
196		}
197	}
198}
199
200func gen386() {
201	p("PUSHFL")
202	// Save general purpose registers.
203	var l = layout{sp: "SP"}
204	for _, reg := range regNames386 {
205		if reg == "SP" || strings.HasPrefix(reg, "X") {
206			continue
207		}
208		l.add("MOVL", reg, 4)
209	}
210
211	softfloat := "GO386_softfloat"
212
213	// Save SSE state only if supported.
214	lSSE := layout{stack: l.stack, sp: "SP"}
215	for i := 0; i < 8; i++ {
216		lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16)
217	}
218
219	p("ADJSP $%d", lSSE.stack)
220	p("NOP SP")
221	l.save()
222	p("#ifndef %s", softfloat)
223	lSSE.save()
224	p("#endif")
225	p("CALL ·asyncPreempt2(SB)")
226	p("#ifndef %s", softfloat)
227	lSSE.restore()
228	p("#endif")
229	l.restore()
230	p("ADJSP $%d", -lSSE.stack)
231
232	p("POPFL")
233	p("RET")
234}
235
236func genAMD64() {
237	// Assign stack offsets.
238	var l = layout{sp: "SP"}
239	for _, reg := range regNamesAMD64 {
240		if reg == "SP" || reg == "BP" {
241			continue
242		}
243		if !strings.HasPrefix(reg, "X") {
244			l.add("MOVQ", reg, 8)
245		}
246	}
247	lSSE := layout{stack: l.stack, sp: "SP"}
248	for _, reg := range regNamesAMD64 {
249		if strings.HasPrefix(reg, "X") {
250			lSSE.add("MOVUPS", reg, 16)
251		}
252	}
253
254	// TODO: MXCSR register?
255
256	p("PUSHQ BP")
257	p("MOVQ SP, BP")
258	p("// Save flags before clobbering them")
259	p("PUSHFQ")
260	p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
261	p("ADJSP $%d", lSSE.stack)
262	p("// But vet doesn't know ADJSP, so suppress vet stack checking")
263	p("NOP SP")
264
265	l.save()
266
267	// Apparently, the signal handling code path in darwin kernel leaves
268	// the upper bits of Y registers in a dirty state, which causes
269	// many SSE operations (128-bit and narrower) become much slower.
270	// Clear the upper bits to get to a clean state. See issue #37174.
271	// It is safe here as Go code don't use the upper bits of Y registers.
272	p("#ifdef GOOS_darwin")
273	p("#ifndef hasAVX")
274	p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
275	p("JE 2(PC)")
276	p("#endif")
277	p("VZEROUPPER")
278	p("#endif")
279
280	lSSE.save()
281	p("CALL ·asyncPreempt2(SB)")
282	lSSE.restore()
283	l.restore()
284	p("ADJSP $%d", -lSSE.stack)
285	p("POPFQ")
286	p("POPQ BP")
287	p("RET")
288}
289
290func genARM() {
291	// Add integer registers R0-R12.
292	// R13 (SP), R14 (LR), R15 (PC) are special and not saved here.
293	var l = layout{sp: "R13", stack: 4} // add LR slot
294	for i := 0; i <= 12; i++ {
295		reg := fmt.Sprintf("R%d", i)
296		if i == 10 {
297			continue // R10 is g register, no need to save/restore
298		}
299		l.add("MOVW", reg, 4)
300	}
301	// Add flag register.
302	l.addSpecial(
303		"MOVW CPSR, R0\nMOVW R0, %d(R13)",
304		"MOVW %d(R13), R0\nMOVW R0, CPSR",
305		4)
306
307	// Add floating point registers F0-F15 and flag register.
308	var lfp = layout{stack: l.stack, sp: "R13"}
309	lfp.addSpecial(
310		"MOVW FPCR, R0\nMOVW R0, %d(R13)",
311		"MOVW %d(R13), R0\nMOVW R0, FPCR",
312		4)
313	for i := 0; i <= 15; i++ {
314		reg := fmt.Sprintf("F%d", i)
315		lfp.add("MOVD", reg, 8)
316	}
317
318	p("MOVW.W R14, -%d(R13)", lfp.stack) // allocate frame, save LR
319	l.save()
320	p("MOVB ·goarmsoftfp(SB), R0\nCMP $0, R0\nBNE nofp") // test goarmsoftfp, and skip FP registers if goarmsoftfp!=0.
321	lfp.save()
322	label("nofp:")
323	p("CALL ·asyncPreempt2(SB)")
324	p("MOVB ·goarmsoftfp(SB), R0\nCMP $0, R0\nBNE nofp2") // test goarmsoftfp, and skip FP registers if goarmsoftfp!=0.
325	lfp.restore()
326	label("nofp2:")
327	l.restore()
328
329	p("MOVW %d(R13), R14", lfp.stack)     // sigctxt.pushCall pushes LR on stack, restore it
330	p("MOVW.P %d(R13), R15", lfp.stack+4) // load PC, pop frame (including the space pushed by sigctxt.pushCall)
331	p("UNDEF")                            // shouldn't get here
332}
333
334func genARM64() {
335	// Add integer registers R0-R26
336	// R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
337	// and not saved here.
338	var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction
339	for i := 0; i < 26; i += 2 {
340		if i == 18 {
341			i--
342			continue // R18 is not used, skip
343		}
344		reg := fmt.Sprintf("(R%d, R%d)", i, i+1)
345		l.add2("STP", "LDP", reg, 16)
346	}
347	// Add flag registers.
348	l.addSpecial(
349		"MOVD NZCV, R0\nMOVD R0, %d(RSP)",
350		"MOVD %d(RSP), R0\nMOVD R0, NZCV",
351		8)
352	l.addSpecial(
353		"MOVD FPSR, R0\nMOVD R0, %d(RSP)",
354		"MOVD %d(RSP), R0\nMOVD R0, FPSR",
355		8)
356	// TODO: FPCR? I don't think we'll change it, so no need to save.
357	// Add floating point registers F0-F31.
358	for i := 0; i < 31; i += 2 {
359		reg := fmt.Sprintf("(F%d, F%d)", i, i+1)
360		l.add2("FSTPD", "FLDPD", reg, 16)
361	}
362	if l.stack%16 != 0 {
363		l.stack += 8 // SP needs 16-byte alignment
364	}
365
366	// allocate frame, save PC of interrupted instruction (in LR)
367	p("MOVD R30, %d(RSP)", -l.stack)
368	p("SUB $%d, RSP", l.stack)
369	p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
370	p("SUB $8, RSP, R29")  // set up new frame pointer
371	// On iOS, save the LR again after decrementing SP. We run the
372	// signal handler on the G stack (as it doesn't support sigaltstack),
373	// so any writes below SP may be clobbered.
374	p("#ifdef GOOS_ios")
375	p("MOVD R30, (RSP)")
376	p("#endif")
377
378	l.save()
379	p("CALL ·asyncPreempt2(SB)")
380	l.restore()
381
382	p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
383	p("MOVD -8(RSP), R29")          // restore frame pointer
384	p("MOVD (RSP), R27")            // load PC to REGTMP
385	p("ADD $%d, RSP", l.stack+16)   // pop frame (including the space pushed by sigctxt.pushCall)
386	p("JMP (R27)")
387}
388
389func genMIPS(_64bit bool) {
390	mov := "MOVW"
391	movf := "MOVF"
392	add := "ADD"
393	sub := "SUB"
394	r28 := "R28"
395	regsize := 4
396	softfloat := "GOMIPS_softfloat"
397	if _64bit {
398		mov = "MOVV"
399		movf = "MOVD"
400		add = "ADDV"
401		sub = "SUBV"
402		r28 = "RSB"
403		regsize = 8
404		softfloat = "GOMIPS64_softfloat"
405	}
406
407	// Add integer registers R1-R22, R24-R25, R28
408	// R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special,
409	// and not saved here. R26 and R27 are reserved by kernel and not used.
410	var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
411	for i := 1; i <= 25; i++ {
412		if i == 23 {
413			continue // R23 is REGTMP
414		}
415		reg := fmt.Sprintf("R%d", i)
416		l.add(mov, reg, regsize)
417	}
418	l.add(mov, r28, regsize)
419	l.addSpecial(
420		mov+" HI, R1\n"+mov+" R1, %d(R29)",
421		mov+" %d(R29), R1\n"+mov+" R1, HI",
422		regsize)
423	l.addSpecial(
424		mov+" LO, R1\n"+mov+" R1, %d(R29)",
425		mov+" %d(R29), R1\n"+mov+" R1, LO",
426		regsize)
427
428	// Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant)
429	var lfp = layout{sp: "R29", stack: l.stack}
430	lfp.addSpecial(
431		mov+" FCR31, R1\n"+mov+" R1, %d(R29)",
432		mov+" %d(R29), R1\n"+mov+" R1, FCR31",
433		regsize)
434	// Add floating point registers F0-F31.
435	for i := 0; i <= 31; i++ {
436		reg := fmt.Sprintf("F%d", i)
437		lfp.add(movf, reg, regsize)
438	}
439
440	// allocate frame, save PC of interrupted instruction (in LR)
441	p(mov+" R31, -%d(R29)", lfp.stack)
442	p(sub+" $%d, R29", lfp.stack)
443
444	l.save()
445	p("#ifndef %s", softfloat)
446	lfp.save()
447	p("#endif")
448	p("CALL ·asyncPreempt2(SB)")
449	p("#ifndef %s", softfloat)
450	lfp.restore()
451	p("#endif")
452	l.restore()
453
454	p(mov+" %d(R29), R31", lfp.stack)     // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
455	p(mov + " (R29), R23")                // load PC to REGTMP
456	p(add+" $%d, R29", lfp.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
457	p("JMP (R23)")
458}
459
460func genLoong64() {
461	mov := "MOVV"
462	movf := "MOVD"
463	add := "ADDV"
464	sub := "SUBV"
465	regsize := 8
466
467	// Add integer registers r4-r21 r23-r29 r31
468	// R0 (zero), R30 (REGTMP), R2 (tp), R3 (SP), R22 (g), R1 (LR) are special,
469	var l = layout{sp: "R3", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
470	for i := 4; i <= 31; i++ {
471		if i == 22 || i == 30 {
472			continue
473		}
474		reg := fmt.Sprintf("R%d", i)
475		l.add(mov, reg, regsize)
476	}
477
478	// Add floating point registers F0-F31.
479	for i := 0; i <= 31; i++ {
480		reg := fmt.Sprintf("F%d", i)
481		l.add(movf, reg, regsize)
482	}
483
484	// save/restore FCC0
485	l.addSpecial(
486		mov+" FCC0, R4\n"+mov+" R4, %d(R3)",
487		mov+" %d(R3), R4\n"+mov+" R4, FCC0",
488		regsize)
489
490	// allocate frame, save PC of interrupted instruction (in LR)
491	p(mov+" R1, -%d(R3)", l.stack)
492	p(sub+" $%d, R3", l.stack)
493
494	l.save()
495	p("CALL ·asyncPreempt2(SB)")
496	l.restore()
497
498	p(mov+" %d(R3), R1", l.stack)      // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
499	p(mov + " (R3), R30")              // load PC to REGTMP
500	p(add+" $%d, R3", l.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
501	p("JMP (R30)")
502}
503
504func genPPC64() {
505	// Add integer registers R3-R29
506	// R0 (zero), R1 (SP), R30 (g) are special and not saved here.
507	// R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall.
508	// R31 (REGTMP) will be saved manually.
509	var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31
510	for i := 3; i <= 29; i++ {
511		if i == 12 || i == 13 {
512			// R12 has been saved in sigctxt.pushCall.
513			// R13 is TLS pointer, not used by Go code. we must NOT
514			// restore it, otherwise if we parked and resumed on a
515			// different thread we'll mess up TLS addresses.
516			continue
517		}
518		reg := fmt.Sprintf("R%d", i)
519		l.add("MOVD", reg, 8)
520	}
521	l.addSpecial(
522		"MOVW CR, R31\nMOVW R31, %d(R1)",
523		"MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR
524		8)                                    // CR is 4-byte wide, but just keep the alignment
525	l.addSpecial(
526		"MOVD XER, R31\nMOVD R31, %d(R1)",
527		"MOVD %d(R1), R31\nMOVD R31, XER",
528		8)
529	// Add floating point registers F0-F31.
530	for i := 0; i <= 31; i++ {
531		reg := fmt.Sprintf("F%d", i)
532		l.add("FMOVD", reg, 8)
533	}
534	// Add floating point control/status register FPSCR.
535	l.addSpecial(
536		"MOVFL FPSCR, F0\nFMOVD F0, %d(R1)",
537		"FMOVD %d(R1), F0\nMOVFL F0, FPSCR",
538		8)
539
540	p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR
541	p("MOVD LR, R31")
542	p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR)
543
544	l.save()
545	p("CALL ·asyncPreempt2(SB)")
546	l.restore()
547
548	p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them
549	p("MOVD R31, LR")
550	p("MOVD %d(R1), R2", l.stack+8)
551	p("MOVD %d(R1), R12", l.stack+16)
552	p("MOVD (R1), R31") // load PC to CTR
553	p("MOVD R31, CTR")
554	p("MOVD 32(R1), R31")        // restore R31
555	p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
556	p("JMP (CTR)")
557}
558
559func genRISCV64() {
560	// X0 (zero), X1 (LR), X2 (SP), X3 (GP), X4 (TP), X27 (g), X31 (TMP) are special.
561	var l = layout{sp: "X2", stack: 8}
562
563	// Add integer registers (X5-X26, X28-30).
564	for i := 5; i < 31; i++ {
565		if i == 27 {
566			continue
567		}
568		reg := fmt.Sprintf("X%d", i)
569		l.add("MOV", reg, 8)
570	}
571
572	// Add floating point registers (F0-F31).
573	for i := 0; i <= 31; i++ {
574		reg := fmt.Sprintf("F%d", i)
575		l.add("MOVD", reg, 8)
576	}
577
578	p("MOV X1, -%d(X2)", l.stack)
579	p("SUB $%d, X2", l.stack)
580	l.save()
581	p("CALL ·asyncPreempt2(SB)")
582	l.restore()
583	p("MOV %d(X2), X1", l.stack)
584	p("MOV (X2), X31")
585	p("ADD $%d, X2", l.stack+8)
586	p("JMP (X31)")
587}
588
589func genS390X() {
590	// Add integer registers R0-R12
591	// R13 (g), R14 (LR), R15 (SP) are special, and not saved here.
592	// Saving R10 (REGTMP) is not necessary, but it is saved anyway.
593	var l = layout{sp: "R15", stack: 16} // add slot to save PC of interrupted instruction and flags
594	l.addSpecial(
595		"STMG R0, R12, %d(R15)",
596		"LMG %d(R15), R0, R12",
597		13*8)
598	// Add floating point registers F0-F31.
599	for i := 0; i <= 15; i++ {
600		reg := fmt.Sprintf("F%d", i)
601		l.add("FMOVD", reg, 8)
602	}
603
604	// allocate frame, save PC of interrupted instruction (in LR) and flags (condition code)
605	p("IPM R10") // save flags upfront, as ADD will clobber flags
606	p("MOVD R14, -%d(R15)", l.stack)
607	p("ADD $-%d, R15", l.stack)
608	p("MOVW R10, 8(R15)") // save flags
609
610	l.save()
611	p("CALL ·asyncPreempt2(SB)")
612	l.restore()
613
614	p("MOVD %d(R15), R14", l.stack)    // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
615	p("ADD $%d, R15", l.stack+8)       // pop frame (including the space pushed by sigctxt.pushCall)
616	p("MOVWZ -%d(R15), R10", l.stack)  // load flags to REGTMP
617	p("TMLH R10, $(3<<12)")            // restore flags
618	p("MOVD -%d(R15), R10", l.stack+8) // load PC to REGTMP
619	p("JMP (R10)")
620}
621
622func genWasm() {
623	p("// No async preemption on wasm")
624	p("UNDEF")
625}
626
627func notImplemented() {
628	p("// Not implemented yet")
629	p("JMP ·abort(SB)")
630}
631