1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// The gen command generates Go code (in the parent directory) for all
6// the architecture-specific opcodes, blocks, and rewrites.
7package main
8
9import (
10	"bytes"
11	"flag"
12	"fmt"
13	"go/format"
14	"log"
15	"math/bits"
16	"os"
17	"path"
18	"regexp"
19	"runtime"
20	"runtime/pprof"
21	"runtime/trace"
22	"sort"
23	"strings"
24	"sync"
25)
26
27// TODO: capitalize these types, so that we can more easily tell variable names
28// apart from type names, and avoid awkward func parameters like "arch arch".
29
30type arch struct {
31	name               string
32	pkg                string // obj package to import for this arch.
33	genfile            string // source file containing opcode code generation.
34	ops                []opData
35	blocks             []blockData
36	regnames           []string
37	ParamIntRegNames   string
38	ParamFloatRegNames string
39	gpregmask          regMask
40	fpregmask          regMask
41	fp32regmask        regMask
42	fp64regmask        regMask
43	specialregmask     regMask
44	framepointerreg    int8
45	linkreg            int8
46	generic            bool
47	imports            []string
48}
49
50type opData struct {
51	name              string
52	reg               regInfo
53	asm               string
54	typ               string // default result type
55	aux               string
56	rematerializeable bool
57	argLength         int32  // number of arguments, if -1, then this operation has a variable number of arguments
58	commutative       bool   // this operation is commutative on its first 2 arguments (e.g. addition)
59	resultInArg0      bool   // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
60	resultNotInArgs   bool   // outputs must not be allocated to the same registers as inputs
61	clobberFlags      bool   // this op clobbers flags register
62	needIntTemp       bool   // need a temporary free integer register
63	call              bool   // is a function call
64	tailCall          bool   // is a tail call
65	nilCheck          bool   // this op is a nil check on arg0
66	faultOnNilArg0    bool   // this op will fault if arg0 is nil (and aux encodes a small offset)
67	faultOnNilArg1    bool   // this op will fault if arg1 is nil (and aux encodes a small offset)
68	hasSideEffects    bool   // for "reasons", not to be eliminated.  E.g., atomic store, #19182.
69	zeroWidth         bool   // op never translates into any machine code. example: copy, which may sometimes translate to machine code, is not zero-width.
70	unsafePoint       bool   // this op is an unsafe point, i.e. not safe for async preemption
71	symEffect         string // effect this op has on symbol in aux
72	scale             uint8  // amd64/386 indexed load scale
73}
74
75type blockData struct {
76	name     string // the suffix for this block ("EQ", "LT", etc.)
77	controls int    // the number of control values this type of block requires
78	aux      string // the type of the Aux/AuxInt value, if any
79}
80
81type regInfo struct {
82	// inputs[i] encodes the set of registers allowed for the i'th input.
83	// Inputs that don't use registers (flags, memory, etc.) should be 0.
84	inputs []regMask
85	// clobbers encodes the set of registers that are overwritten by
86	// the instruction (other than the output registers).
87	clobbers regMask
88	// outputs[i] encodes the set of registers allowed for the i'th output.
89	outputs []regMask
90}
91
92type regMask uint64
93
94func (a arch) regMaskComment(r regMask) string {
95	var buf strings.Builder
96	for i := uint64(0); r != 0; i++ {
97		if r&1 != 0 {
98			if buf.Len() == 0 {
99				buf.WriteString(" //")
100			}
101			buf.WriteString(" ")
102			buf.WriteString(a.regnames[i])
103		}
104		r >>= 1
105	}
106	return buf.String()
107}
108
109var archs []arch
110
111var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
112var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
113var tracefile = flag.String("trace", "", "write trace to `file`")
114
115func main() {
116	flag.Parse()
117	if *cpuprofile != "" {
118		f, err := os.Create(*cpuprofile)
119		if err != nil {
120			log.Fatal("could not create CPU profile: ", err)
121		}
122		defer f.Close()
123		if err := pprof.StartCPUProfile(f); err != nil {
124			log.Fatal("could not start CPU profile: ", err)
125		}
126		defer pprof.StopCPUProfile()
127	}
128	if *tracefile != "" {
129		f, err := os.Create(*tracefile)
130		if err != nil {
131			log.Fatalf("failed to create trace output file: %v", err)
132		}
133		defer func() {
134			if err := f.Close(); err != nil {
135				log.Fatalf("failed to close trace file: %v", err)
136			}
137		}()
138
139		if err := trace.Start(f); err != nil {
140			log.Fatalf("failed to start trace: %v", err)
141		}
142		defer trace.Stop()
143	}
144
145	sort.Sort(ArchsByName(archs))
146
147	// The generate tasks are run concurrently, since they are CPU-intensive
148	// that can easily make use of many cores on a machine.
149	//
150	// Note that there is no limit on the concurrency at the moment. On a
151	// four-core laptop at the time of writing, peak RSS usually reaches
152	// ~200MiB, which seems doable by practically any machine nowadays. If
153	// that stops being the case, we can cap this func to a fixed number of
154	// architectures being generated at once.
155
156	tasks := []func(){
157		genOp,
158		genAllocators,
159	}
160	for _, a := range archs {
161		a := a // the funcs are ran concurrently at a later time
162		tasks = append(tasks, func() {
163			genRules(a)
164			genSplitLoadRules(a)
165			genLateLowerRules(a)
166		})
167	}
168	var wg sync.WaitGroup
169	for _, task := range tasks {
170		task := task
171		wg.Add(1)
172		go func() {
173			task()
174			wg.Done()
175		}()
176	}
177	wg.Wait()
178
179	if *memprofile != "" {
180		f, err := os.Create(*memprofile)
181		if err != nil {
182			log.Fatal("could not create memory profile: ", err)
183		}
184		defer f.Close()
185		runtime.GC() // get up-to-date statistics
186		if err := pprof.WriteHeapProfile(f); err != nil {
187			log.Fatal("could not write memory profile: ", err)
188		}
189	}
190}
191
192func genOp() {
193	w := new(bytes.Buffer)
194	fmt.Fprintf(w, "// Code generated from _gen/*Ops.go using 'go generate'; DO NOT EDIT.\n")
195	fmt.Fprintln(w)
196	fmt.Fprintln(w, "package ssa")
197
198	fmt.Fprintln(w, "import (")
199	fmt.Fprintln(w, "\"cmd/internal/obj\"")
200	for _, a := range archs {
201		if a.pkg != "" {
202			fmt.Fprintf(w, "%q\n", a.pkg)
203		}
204	}
205	fmt.Fprintln(w, ")")
206
207	// generate Block* declarations
208	fmt.Fprintln(w, "const (")
209	fmt.Fprintln(w, "BlockInvalid BlockKind = iota")
210	for _, a := range archs {
211		fmt.Fprintln(w)
212		for _, d := range a.blocks {
213			fmt.Fprintf(w, "Block%s%s\n", a.Name(), d.name)
214		}
215	}
216	fmt.Fprintln(w, ")")
217
218	// generate block kind string method
219	fmt.Fprintln(w, "var blockString = [...]string{")
220	fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",")
221	for _, a := range archs {
222		fmt.Fprintln(w)
223		for _, b := range a.blocks {
224			fmt.Fprintf(w, "Block%s%s:\"%s\",\n", a.Name(), b.name, b.name)
225		}
226	}
227	fmt.Fprintln(w, "}")
228	fmt.Fprintln(w, "func (k BlockKind) String() string {return blockString[k]}")
229
230	// generate block kind auxint method
231	fmt.Fprintln(w, "func (k BlockKind) AuxIntType() string {")
232	fmt.Fprintln(w, "switch k {")
233	for _, a := range archs {
234		for _, b := range a.blocks {
235			if b.auxIntType() == "invalid" {
236				continue
237			}
238			fmt.Fprintf(w, "case Block%s%s: return \"%s\"\n", a.Name(), b.name, b.auxIntType())
239		}
240	}
241	fmt.Fprintln(w, "}")
242	fmt.Fprintln(w, "return \"\"")
243	fmt.Fprintln(w, "}")
244
245	// generate Op* declarations
246	fmt.Fprintln(w, "const (")
247	fmt.Fprintln(w, "OpInvalid Op = iota") // make sure OpInvalid is 0.
248	for _, a := range archs {
249		fmt.Fprintln(w)
250		for _, v := range a.ops {
251			if v.name == "Invalid" {
252				continue
253			}
254			fmt.Fprintf(w, "Op%s%s\n", a.Name(), v.name)
255		}
256	}
257	fmt.Fprintln(w, ")")
258
259	// generate OpInfo table
260	fmt.Fprintln(w, "var opcodeTable = [...]opInfo{")
261	fmt.Fprintln(w, " { name: \"OpInvalid\" },")
262	for _, a := range archs {
263		fmt.Fprintln(w)
264
265		pkg := path.Base(a.pkg)
266		for _, v := range a.ops {
267			if v.name == "Invalid" {
268				continue
269			}
270			fmt.Fprintln(w, "{")
271			fmt.Fprintf(w, "name:\"%s\",\n", v.name)
272
273			// flags
274			if v.aux != "" {
275				fmt.Fprintf(w, "auxType: aux%s,\n", v.aux)
276			}
277			fmt.Fprintf(w, "argLen: %d,\n", v.argLength)
278
279			if v.rematerializeable {
280				if v.reg.clobbers != 0 {
281					log.Fatalf("%s is rematerializeable and clobbers registers", v.name)
282				}
283				if v.clobberFlags {
284					log.Fatalf("%s is rematerializeable and clobbers flags", v.name)
285				}
286				fmt.Fprintln(w, "rematerializeable: true,")
287			}
288			if v.commutative {
289				fmt.Fprintln(w, "commutative: true,")
290			}
291			if v.resultInArg0 {
292				fmt.Fprintln(w, "resultInArg0: true,")
293				// OpConvert's register mask is selected dynamically,
294				// so don't try to check it in the static table.
295				if v.name != "Convert" && v.reg.inputs[0] != v.reg.outputs[0] {
296					log.Fatalf("%s: input[0] and output[0] must use the same registers for %s", a.name, v.name)
297				}
298				if v.name != "Convert" && v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
299					log.Fatalf("%s: input[1] and output[0] must use the same registers for %s", a.name, v.name)
300				}
301			}
302			if v.resultNotInArgs {
303				fmt.Fprintln(w, "resultNotInArgs: true,")
304			}
305			if v.clobberFlags {
306				fmt.Fprintln(w, "clobberFlags: true,")
307			}
308			if v.needIntTemp {
309				fmt.Fprintln(w, "needIntTemp: true,")
310			}
311			if v.call {
312				fmt.Fprintln(w, "call: true,")
313			}
314			if v.tailCall {
315				fmt.Fprintln(w, "tailCall: true,")
316			}
317			if v.nilCheck {
318				fmt.Fprintln(w, "nilCheck: true,")
319			}
320			if v.faultOnNilArg0 {
321				fmt.Fprintln(w, "faultOnNilArg0: true,")
322				if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" {
323					log.Fatalf("faultOnNilArg0 with aux %s not allowed", v.aux)
324				}
325			}
326			if v.faultOnNilArg1 {
327				fmt.Fprintln(w, "faultOnNilArg1: true,")
328				if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" {
329					log.Fatalf("faultOnNilArg1 with aux %s not allowed", v.aux)
330				}
331			}
332			if v.hasSideEffects {
333				fmt.Fprintln(w, "hasSideEffects: true,")
334			}
335			if v.zeroWidth {
336				fmt.Fprintln(w, "zeroWidth: true,")
337			}
338			if v.unsafePoint {
339				fmt.Fprintln(w, "unsafePoint: true,")
340			}
341			needEffect := strings.HasPrefix(v.aux, "Sym")
342			if v.symEffect != "" {
343				if !needEffect {
344					log.Fatalf("symEffect with aux %s not allowed", v.aux)
345				}
346				fmt.Fprintf(w, "symEffect: Sym%s,\n", strings.Replace(v.symEffect, ",", "|Sym", -1))
347			} else if needEffect {
348				log.Fatalf("symEffect needed for aux %s", v.aux)
349			}
350			if a.name == "generic" {
351				fmt.Fprintln(w, "generic:true,")
352				fmt.Fprintln(w, "},") // close op
353				// generic ops have no reg info or asm
354				continue
355			}
356			if v.asm != "" {
357				fmt.Fprintf(w, "asm: %s.A%s,\n", pkg, v.asm)
358			}
359			if v.scale != 0 {
360				fmt.Fprintf(w, "scale: %d,\n", v.scale)
361			}
362			fmt.Fprintln(w, "reg:regInfo{")
363
364			// Compute input allocation order. We allocate from the
365			// most to the least constrained input. This order guarantees
366			// that we will always be able to find a register.
367			var s []intPair
368			for i, r := range v.reg.inputs {
369				if r != 0 {
370					s = append(s, intPair{countRegs(r), i})
371				}
372			}
373			if len(s) > 0 {
374				sort.Sort(byKey(s))
375				fmt.Fprintln(w, "inputs: []inputInfo{")
376				for _, p := range s {
377					r := v.reg.inputs[p.val]
378					fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
379				}
380				fmt.Fprintln(w, "},")
381			}
382
383			if v.reg.clobbers > 0 {
384				fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers))
385			}
386
387			// reg outputs
388			s = s[:0]
389			for i, r := range v.reg.outputs {
390				s = append(s, intPair{countRegs(r), i})
391			}
392			if len(s) > 0 {
393				sort.Sort(byKey(s))
394				fmt.Fprintln(w, "outputs: []outputInfo{")
395				for _, p := range s {
396					r := v.reg.outputs[p.val]
397					fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
398				}
399				fmt.Fprintln(w, "},")
400			}
401			fmt.Fprintln(w, "},") // close reg info
402			fmt.Fprintln(w, "},") // close op
403		}
404	}
405	fmt.Fprintln(w, "}")
406
407	fmt.Fprintln(w, "func (o Op) Asm() obj.As {return opcodeTable[o].asm}")
408	fmt.Fprintln(w, "func (o Op) Scale() int16 {return int16(opcodeTable[o].scale)}")
409
410	// generate op string method
411	fmt.Fprintln(w, "func (o Op) String() string {return opcodeTable[o].name }")
412
413	fmt.Fprintln(w, "func (o Op) SymEffect() SymEffect { return opcodeTable[o].symEffect }")
414	fmt.Fprintln(w, "func (o Op) IsCall() bool { return opcodeTable[o].call }")
415	fmt.Fprintln(w, "func (o Op) IsTailCall() bool { return opcodeTable[o].tailCall }")
416	fmt.Fprintln(w, "func (o Op) HasSideEffects() bool { return opcodeTable[o].hasSideEffects }")
417	fmt.Fprintln(w, "func (o Op) UnsafePoint() bool { return opcodeTable[o].unsafePoint }")
418	fmt.Fprintln(w, "func (o Op) ResultInArg0() bool { return opcodeTable[o].resultInArg0 }")
419
420	// generate registers
421	for _, a := range archs {
422		if a.generic {
423			continue
424		}
425		fmt.Fprintf(w, "var registers%s = [...]Register {\n", a.name)
426		var gcRegN int
427		num := map[string]int8{}
428		for i, r := range a.regnames {
429			num[r] = int8(i)
430			pkg := a.pkg[len("cmd/internal/obj/"):]
431			var objname string // name in cmd/internal/obj/$ARCH
432			switch r {
433			case "SB":
434				// SB isn't a real register.  cmd/internal/obj expects 0 in this case.
435				objname = "0"
436			case "SP":
437				objname = pkg + ".REGSP"
438			case "g":
439				objname = pkg + ".REGG"
440			default:
441				objname = pkg + ".REG_" + r
442			}
443			// Assign a GC register map index to registers
444			// that may contain pointers.
445			gcRegIdx := -1
446			if a.gpregmask&(1<<uint(i)) != 0 {
447				gcRegIdx = gcRegN
448				gcRegN++
449			}
450			fmt.Fprintf(w, "  {%d, %s, %d, \"%s\"},\n", i, objname, gcRegIdx, r)
451		}
452		parameterRegisterList := func(paramNamesString string) []int8 {
453			paramNamesString = strings.TrimSpace(paramNamesString)
454			if paramNamesString == "" {
455				return nil
456			}
457			paramNames := strings.Split(paramNamesString, " ")
458			var paramRegs []int8
459			for _, regName := range paramNames {
460				if regName == "" {
461					// forgive extra spaces
462					continue
463				}
464				if regNum, ok := num[regName]; ok {
465					paramRegs = append(paramRegs, regNum)
466					delete(num, regName)
467				} else {
468					log.Fatalf("parameter register %s for architecture %s not a register name (or repeated in parameter list)", regName, a.name)
469				}
470			}
471			return paramRegs
472		}
473
474		paramIntRegs := parameterRegisterList(a.ParamIntRegNames)
475		paramFloatRegs := parameterRegisterList(a.ParamFloatRegNames)
476
477		if gcRegN > 32 {
478			// Won't fit in a uint32 mask.
479			log.Fatalf("too many GC registers (%d > 32) on %s", gcRegN, a.name)
480		}
481		fmt.Fprintln(w, "}")
482		fmt.Fprintf(w, "var paramIntReg%s = %#v\n", a.name, paramIntRegs)
483		fmt.Fprintf(w, "var paramFloatReg%s = %#v\n", a.name, paramFloatRegs)
484		fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask)
485		fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask)
486		if a.fp32regmask != 0 {
487			fmt.Fprintf(w, "var fp32RegMask%s = regMask(%d)\n", a.name, a.fp32regmask)
488		}
489		if a.fp64regmask != 0 {
490			fmt.Fprintf(w, "var fp64RegMask%s = regMask(%d)\n", a.name, a.fp64regmask)
491		}
492		fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask)
493		fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg)
494		fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg)
495	}
496
497	// gofmt result
498	b := w.Bytes()
499	var err error
500	b, err = format.Source(b)
501	if err != nil {
502		fmt.Printf("%s\n", w.Bytes())
503		panic(err)
504	}
505
506	if err := os.WriteFile("../opGen.go", b, 0666); err != nil {
507		log.Fatalf("can't write output: %v\n", err)
508	}
509
510	// Check that the arch genfile handles all the arch-specific opcodes.
511	// This is very much a hack, but it is better than nothing.
512	//
513	// Do a single regexp pass to record all ops being handled in a map, and
514	// then compare that with the ops list. This is much faster than one
515	// regexp pass per opcode.
516	for _, a := range archs {
517		if a.genfile == "" {
518			continue
519		}
520
521		pattern := fmt.Sprintf(`\Wssa\.Op%s([a-zA-Z0-9_]+)\W`, a.name)
522		rxOp, err := regexp.Compile(pattern)
523		if err != nil {
524			log.Fatalf("bad opcode regexp %s: %v", pattern, err)
525		}
526
527		src, err := os.ReadFile(a.genfile)
528		if err != nil {
529			log.Fatalf("can't read %s: %v", a.genfile, err)
530		}
531		seen := make(map[string]bool, len(a.ops))
532		for _, m := range rxOp.FindAllSubmatch(src, -1) {
533			seen[string(m[1])] = true
534		}
535		for _, op := range a.ops {
536			if !seen[op.name] {
537				log.Fatalf("Op%s%s has no code generation in %s", a.name, op.name, a.genfile)
538			}
539		}
540	}
541}
542
543// Name returns the name of the architecture for use in Op* and Block* enumerations.
544func (a arch) Name() string {
545	s := a.name
546	if s == "generic" {
547		s = ""
548	}
549	return s
550}
551
552// countRegs returns the number of set bits in the register mask.
553func countRegs(r regMask) int {
554	return bits.OnesCount64(uint64(r))
555}
556
557// for sorting a pair of integers by key
558type intPair struct {
559	key, val int
560}
561type byKey []intPair
562
563func (a byKey) Len() int           { return len(a) }
564func (a byKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
565func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key }
566
567type ArchsByName []arch
568
569func (x ArchsByName) Len() int           { return len(x) }
570func (x ArchsByName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
571func (x ArchsByName) Less(i, j int) bool { return x[i].name < x[j].name }
572