1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package s390x
6
7import (
8	"cmd/compile/internal/base"
9	"cmd/compile/internal/objw"
10	"cmd/internal/obj"
11	"cmd/internal/obj/s390x"
12)
13
14// clearLoopCutOff is the (somewhat arbitrary) value above which it is better
15// to have a loop of clear instructions (e.g. XCs) rather than just generating
16// multiple instructions (i.e. loop unrolling).
17// Must be between 256 and 4096.
18const clearLoopCutoff = 1024
19
20// zerorange clears the stack in the given range.
21func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
22	if cnt == 0 {
23		return p
24	}
25
26	// Adjust the frame to account for LR.
27	off += base.Ctxt.Arch.FixedFrameSize
28	reg := int16(s390x.REGSP)
29
30	// If the off cannot fit in a 12-bit unsigned displacement then we
31	// need to create a copy of the stack pointer that we can adjust.
32	// We also need to do this if we are going to loop.
33	if off < 0 || off > 4096-clearLoopCutoff || cnt > clearLoopCutoff {
34		p = pp.Append(p, s390x.AADD, obj.TYPE_CONST, 0, off, obj.TYPE_REG, s390x.REGRT1, 0)
35		p.Reg = int16(s390x.REGSP)
36		reg = s390x.REGRT1
37		off = 0
38	}
39
40	// Generate a loop of large clears.
41	if cnt > clearLoopCutoff {
42		ireg := int16(s390x.REGRT2) // register holds number of remaining loop iterations
43		p = pp.Append(p, s390x.AMOVD, obj.TYPE_CONST, 0, cnt/256, obj.TYPE_REG, ireg, 0)
44		p = pp.Append(p, s390x.ACLEAR, obj.TYPE_CONST, 0, 256, obj.TYPE_MEM, reg, off)
45		pl := p
46		p = pp.Append(p, s390x.AADD, obj.TYPE_CONST, 0, 256, obj.TYPE_REG, reg, 0)
47		p = pp.Append(p, s390x.ABRCTG, obj.TYPE_REG, ireg, 0, obj.TYPE_BRANCH, 0, 0)
48		p.To.SetTarget(pl)
49		cnt = cnt % 256
50	}
51
52	// Generate remaining clear instructions without a loop.
53	for cnt > 0 {
54		n := cnt
55
56		// Can clear at most 256 bytes per instruction.
57		if n > 256 {
58			n = 256
59		}
60
61		switch n {
62		// Handle very small clears with move instructions.
63		case 8, 4, 2, 1:
64			ins := s390x.AMOVB
65			switch n {
66			case 8:
67				ins = s390x.AMOVD
68			case 4:
69				ins = s390x.AMOVW
70			case 2:
71				ins = s390x.AMOVH
72			}
73			p = pp.Append(p, ins, obj.TYPE_CONST, 0, 0, obj.TYPE_MEM, reg, off)
74
75		// Handle clears that would require multiple move instructions with CLEAR (assembled as XC).
76		default:
77			p = pp.Append(p, s390x.ACLEAR, obj.TYPE_CONST, 0, n, obj.TYPE_MEM, reg, off)
78		}
79
80		cnt -= n
81		off += n
82	}
83
84	return p
85}
86
87func ginsnop(pp *objw.Progs) *obj.Prog {
88	return pp.Prog(s390x.ANOPH)
89}
90