1 //===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass searches for instructions that are prevented from being compressed
10 // by one of the following:
11 //
12 // 1. The use of a single uncompressed register.
13 // 2. A base register + offset where the offset is too large to be compressed
14 // and the base register may or may not be compressed.
15 //
16 //
17 // For case 1, if a compressed register is available, then the uncompressed
18 // register is copied to the compressed register and its uses are replaced.
19 //
20 // For example, storing zero uses the uncompressible zero register:
21 // sw zero, 0(a0) # if zero
22 // sw zero, 8(a0) # if zero
23 // sw zero, 4(a0) # if zero
24 // sw zero, 24(a0) # if zero
25 //
26 // If a compressed register (e.g. a1) is available, the above can be transformed
27 // to the following to improve code size:
28 // li a1, 0
29 // c.sw a1, 0(a0)
30 // c.sw a1, 8(a0)
31 // c.sw a1, 4(a0)
32 // c.sw a1, 24(a0)
33 //
34 //
35 // For case 2, if a compressed register is available, then the original base
36 // is copied and adjusted such that:
37 //
38 // new_base_register = base_register + adjustment
39 // base_register + large_offset = new_base_register + small_offset
40 //
41 // For example, the following offsets are too large for c.sw:
42 // lui a2, 983065
43 // sw a1, -236(a2)
44 // sw a1, -240(a2)
45 // sw a1, -244(a2)
46 // sw a1, -248(a2)
47 // sw a1, -252(a2)
48 // sw a0, -256(a2)
49 //
50 // If a compressed register is available (e.g. a3), a new base could be created
51 // such that the addresses can accessed with a compressible offset, thus
52 // improving code size:
53 // lui a2, 983065
54 // addi a3, a2, -256
55 // c.sw a1, 20(a3)
56 // c.sw a1, 16(a3)
57 // c.sw a1, 12(a3)
58 // c.sw a1, 8(a3)
59 // c.sw a1, 4(a3)
60 // c.sw a0, 0(a3)
61 //
62 //
63 // This optimization is only applied if there are enough uses of the copied
64 // register for code size to be reduced.
65 //
66 //===----------------------------------------------------------------------===//
67
68 #include "RISCV.h"
69 #include "RISCVSubtarget.h"
70 #include "llvm/CodeGen/Passes.h"
71 #include "llvm/CodeGen/RegisterScavenging.h"
72 #include "llvm/MC/TargetRegistry.h"
73 #include "llvm/Support/Debug.h"
74
75 using namespace llvm;
76
77 #define DEBUG_TYPE "riscv-make-compressible"
78 #define RISCV_COMPRESS_INSTRS_NAME "RISCV Make Compressible"
79
80 namespace {
81
82 struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
83 static char ID;
84
85 bool runOnMachineFunction(MachineFunction &Fn) override;
86
RISCVMakeCompressibleOpt__anonc701f1c90111::RISCVMakeCompressibleOpt87 RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {
88 initializeRISCVMakeCompressibleOptPass(*PassRegistry::getPassRegistry());
89 }
90
getPassName__anonc701f1c90111::RISCVMakeCompressibleOpt91 StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
92 };
93 } // namespace
94
95 char RISCVMakeCompressibleOpt::ID = 0;
96 INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible",
97 RISCV_COMPRESS_INSTRS_NAME, false, false)
98
99 // Return log2(widthInBytes) of load/store done by Opcode.
log2LdstWidth(unsigned Opcode)100 static unsigned log2LdstWidth(unsigned Opcode) {
101 switch (Opcode) {
102 default:
103 llvm_unreachable("Unexpected opcode");
104 case RISCV::LW:
105 case RISCV::SW:
106 case RISCV::FLW:
107 case RISCV::FSW:
108 return 2;
109 case RISCV::LD:
110 case RISCV::SD:
111 case RISCV::FLD:
112 case RISCV::FSD:
113 return 3;
114 }
115 }
116
117 // Return a mask for the offset bits of a non-stack-pointer based compressed
118 // load/store.
compressedLDSTOffsetMask(unsigned Opcode)119 static uint8_t compressedLDSTOffsetMask(unsigned Opcode) {
120 return 0x1f << log2LdstWidth(Opcode);
121 }
122
123 // Return true if Offset fits within a compressed stack-pointer based
124 // load/store.
compressibleSPOffset(int64_t Offset,unsigned Opcode)125 static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) {
126 return log2LdstWidth(Opcode) == 2 ? isShiftedUInt<6, 2>(Offset)
127 : isShiftedUInt<6, 3>(Offset);
128 }
129
130 // Given an offset for a load/store, return the adjustment required to the base
131 // register such that the address can be accessed with a compressible offset.
132 // This will return 0 if the offset is already compressible.
getBaseAdjustForCompression(int64_t Offset,unsigned Opcode)133 static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
134 // Return the excess bits that do not fit in a compressible offset.
135 return Offset & ~compressedLDSTOffsetMask(Opcode);
136 }
137
138 // Return true if Reg is in a compressed register class.
isCompressedReg(Register Reg)139 static bool isCompressedReg(Register Reg) {
140 return RISCV::GPRCRegClass.contains(Reg) ||
141 RISCV::FPR32CRegClass.contains(Reg) ||
142 RISCV::FPR64CRegClass.contains(Reg);
143 }
144
145 // Return true if MI is a load for which there exists a compressed version.
isCompressibleLoad(const MachineInstr & MI)146 static bool isCompressibleLoad(const MachineInstr &MI) {
147 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
148 const unsigned Opcode = MI.getOpcode();
149
150 return Opcode == RISCV::LW || (!STI.is64Bit() && Opcode == RISCV::FLW) ||
151 Opcode == RISCV::LD || Opcode == RISCV::FLD;
152 }
153
154 // Return true if MI is a store for which there exists a compressed version.
isCompressibleStore(const MachineInstr & MI)155 static bool isCompressibleStore(const MachineInstr &MI) {
156 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
157 const unsigned Opcode = MI.getOpcode();
158
159 return Opcode == RISCV::SW || (!STI.is64Bit() && Opcode == RISCV::FSW) ||
160 Opcode == RISCV::SD || Opcode == RISCV::FSD;
161 }
162
163 // Find a single register and/or large offset which, if compressible, would
164 // allow the given instruction to be compressed.
165 //
166 // Possible return values:
167 //
168 // {Reg, 0} - Uncompressed Reg needs replacing with a compressed
169 // register.
170 // {Reg, N} - Reg needs replacing with a compressed register and
171 // N needs adding to the new register. (Reg may be
172 // compressed or uncompressed).
173 // {RISCV::NoRegister, 0} - No suitable optimization found for this
174 // instruction.
getRegImmPairPreventingCompression(const MachineInstr & MI)175 static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
176 const unsigned Opcode = MI.getOpcode();
177
178 if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
179 const MachineOperand &MOImm = MI.getOperand(2);
180 if (!MOImm.isImm())
181 return RegImmPair(RISCV::NoRegister, 0);
182
183 int64_t Offset = MOImm.getImm();
184 int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
185 Register Base = MI.getOperand(1).getReg();
186
187 // Memory accesses via the stack pointer do not have a requirement for
188 // either of the registers to be compressible and can take a larger offset.
189 if (RISCV::SPRegClass.contains(Base)) {
190 if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust)
191 return RegImmPair(Base, NewBaseAdjust);
192 } else {
193 Register SrcDest = MI.getOperand(0).getReg();
194 bool SrcDestCompressed = isCompressedReg(SrcDest);
195 bool BaseCompressed = isCompressedReg(Base);
196
197 // If only Base and/or offset prevent compression, then return Base and
198 // any adjustment required to make the offset compressible.
199 if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed)
200 return RegImmPair(Base, NewBaseAdjust);
201
202 // For loads, we can only change the base register since dest is defined
203 // rather than used.
204 //
205 // For stores, we can change SrcDest (and Base if SrcDest == Base) but
206 // cannot resolve an uncompressible offset in this case.
207 if (isCompressibleStore(MI)) {
208 if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) &&
209 !NewBaseAdjust)
210 return RegImmPair(SrcDest, NewBaseAdjust);
211 }
212 }
213 }
214 return RegImmPair(RISCV::NoRegister, 0);
215 }
216
217 // Check all uses after FirstMI of the given register, keeping a vector of
218 // instructions that would be compressible if the given register (and offset if
219 // applicable) were compressible.
220 //
221 // If there are enough uses for this optimization to improve code size and a
222 // compressed register is available, return that compressed register.
analyzeCompressibleUses(MachineInstr & FirstMI,RegImmPair RegImm,SmallVectorImpl<MachineInstr * > & MIs)223 static Register analyzeCompressibleUses(MachineInstr &FirstMI,
224 RegImmPair RegImm,
225 SmallVectorImpl<MachineInstr *> &MIs) {
226 MachineBasicBlock &MBB = *FirstMI.getParent();
227 const TargetRegisterInfo *TRI =
228 MBB.getParent()->getSubtarget().getRegisterInfo();
229
230 RegScavenger RS;
231 RS.enterBasicBlock(MBB);
232
233 for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
234 E = MBB.instr_end();
235 I != E; ++I) {
236 MachineInstr &MI = *I;
237
238 // Determine if this is an instruction which would benefit from using the
239 // new register.
240 RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
241 if (CandidateRegImm.Reg == RegImm.Reg &&
242 CandidateRegImm.Imm == RegImm.Imm) {
243 // Advance tracking since the value in the new register must be live for
244 // this instruction too.
245 RS.forward(I);
246
247 MIs.push_back(&MI);
248 }
249
250 // If RegImm.Reg is modified by this instruction, then we cannot optimize
251 // past this instruction. If the register is already compressed, then it may
252 // possible to optimize a large offset in the current instruction - this
253 // will have been detected by the preceeding call to
254 // getRegImmPairPreventingCompression.
255 if (MI.modifiesRegister(RegImm.Reg, TRI))
256 break;
257 }
258
259 // Adjusting the base costs one new uncompressed addi and therefore three uses
260 // are required for a code size reduction. If no base adjustment is required,
261 // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying"
262 // the zero register) and therefore two uses are required for a code size
263 // reduction.
264 if (MIs.size() < 2 || (RegImm.Imm != 0 && MIs.size() < 3))
265 return RISCV::NoRegister;
266
267 // Find a compressible register which will be available from the first
268 // instruction we care about to the last.
269 const TargetRegisterClass *RCToScavenge;
270
271 // Work out the compressed register class from which to scavenge.
272 if (RISCV::GPRRegClass.contains(RegImm.Reg))
273 RCToScavenge = &RISCV::GPRCRegClass;
274 else if (RISCV::FPR32RegClass.contains(RegImm.Reg))
275 RCToScavenge = &RISCV::FPR32CRegClass;
276 else if (RISCV::FPR64RegClass.contains(RegImm.Reg))
277 RCToScavenge = &RISCV::FPR64CRegClass;
278 else
279 return RISCV::NoRegister;
280
281 return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(),
282 /*RestoreAfter=*/false, /*SPAdj=*/0,
283 /*AllowSpill=*/false);
284 }
285
286 // Update uses of the old register in the given instruction to the new register.
updateOperands(MachineInstr & MI,RegImmPair OldRegImm,Register NewReg)287 static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
288 Register NewReg) {
289 unsigned Opcode = MI.getOpcode();
290
291 // If this pass is extended to support more instructions, the check for
292 // definedness may need to be strengthened.
293 assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) &&
294 "Unsupported instruction for this optimization.");
295
296 int SkipN = 0;
297
298 // Skip the first (value) operand to a store instruction (except if the store
299 // offset is zero) in order to avoid an incorrect transformation.
300 // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2)
301 if (isCompressibleStore(MI) && OldRegImm.Imm != 0)
302 SkipN = 1;
303
304 // Update registers
305 for (MachineOperand &MO : drop_begin(MI.operands(), SkipN))
306 if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
307 // Do not update operands that define the old register.
308 //
309 // The new register was scavenged for the range of instructions that are
310 // being updated, therefore it should not be defined within this range
311 // except possibly in the final instruction.
312 if (MO.isDef()) {
313 assert(isCompressibleLoad(MI));
314 continue;
315 }
316 // Update reg
317 MO.setReg(NewReg);
318 }
319
320 // Update offset
321 MachineOperand &MOImm = MI.getOperand(2);
322 int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode);
323 MOImm.setImm(NewOffset);
324 }
325
runOnMachineFunction(MachineFunction & Fn)326 bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
327 // This is a size optimization.
328 if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasMinSize())
329 return false;
330
331 const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
332 const RISCVInstrInfo &TII = *STI.getInstrInfo();
333
334 // This optimization only makes sense if compressed instructions are emitted.
335 // FIXME: Support Zca, Zcf, Zcd granularity.
336 if (!STI.hasStdExtC())
337 return false;
338
339 for (MachineBasicBlock &MBB : Fn) {
340 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
341 for (MachineInstr &MI : MBB) {
342 // Determine if this instruction would otherwise be compressed if not for
343 // an uncompressible register or offset.
344 RegImmPair RegImm = getRegImmPairPreventingCompression(MI);
345 if (!RegImm.Reg && RegImm.Imm == 0)
346 continue;
347
348 // Determine if there is a set of instructions for which replacing this
349 // register with a compressed register (and compressible offset if
350 // applicable) is possible and will allow compression.
351 SmallVector<MachineInstr *, 8> MIs;
352 Register NewReg = analyzeCompressibleUses(MI, RegImm, MIs);
353 if (!NewReg)
354 continue;
355
356 // Create the appropriate copy and/or offset.
357 if (RISCV::GPRRegClass.contains(RegImm.Reg)) {
358 assert(isInt<12>(RegImm.Imm));
359 BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg)
360 .addReg(RegImm.Reg)
361 .addImm(RegImm.Imm);
362 } else {
363 // If we are looking at replacing an FPR register we don't expect to
364 // have any offset. The only compressible FP instructions with an offset
365 // are loads and stores, for which the offset applies to the GPR operand
366 // not the FPR operand.
367 assert(RegImm.Imm == 0);
368 unsigned Opcode = RISCV::FPR32RegClass.contains(RegImm.Reg)
369 ? RISCV::FSGNJ_S
370 : RISCV::FSGNJ_D;
371 BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(Opcode), NewReg)
372 .addReg(RegImm.Reg)
373 .addReg(RegImm.Reg);
374 }
375
376 // Update the set of instructions to use the compressed register and
377 // compressible offset instead. These instructions should now be
378 // compressible.
379 // TODO: Update all uses if RegImm.Imm == 0? Not just those that are
380 // expected to become compressible.
381 for (MachineInstr *UpdateMI : MIs)
382 updateOperands(*UpdateMI, RegImm, NewReg);
383 }
384 }
385 return true;
386 }
387
388 /// Returns an instance of the Make Compressible Optimization pass.
createRISCVMakeCompressibleOptPass()389 FunctionPass *llvm::createRISCVMakeCompressibleOptPass() {
390 return new RISCVMakeCompressibleOpt();
391 }
392