1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "LoongArchTargetMachine.h"
20 #include "MCTargetDesc/LoongArchBaseInfo.h"
21 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/ISDOpcodes.h"
24 #include "llvm/CodeGen/RuntimeLibcalls.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/IntrinsicsLoongArch.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/KnownBits.h"
29 #include "llvm/Support/MathExtras.h"
30
31 using namespace llvm;
32
33 #define DEBUG_TYPE "loongarch-isel-lowering"
34
35 STATISTIC(NumTailCalls, "Number of tail calls");
36
37 static cl::opt<bool> ZeroDivCheck(
38 "loongarch-check-zero-division", cl::Hidden,
39 cl::desc("Trap on integer division by zero."),
40 cl::init(false));
41
LoongArchTargetLowering(const TargetMachine & TM,const LoongArchSubtarget & STI)42 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
43 const LoongArchSubtarget &STI)
44 : TargetLowering(TM), Subtarget(STI) {
45
46 MVT GRLenVT = Subtarget.getGRLenVT();
47 // Set up the register classes.
48 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
49 if (Subtarget.hasBasicF())
50 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
51 if (Subtarget.hasBasicD())
52 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
53
54 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
55 MVT::i1, Promote);
56
57 // TODO: add necessary setOperationAction calls later.
58 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
59 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
60 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
61 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
62 setOperationAction(ISD::ROTL, GRLenVT, Expand);
63 setOperationAction(ISD::CTPOP, GRLenVT, Expand);
64 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
65 setOperationAction(ISD::TRAP, MVT::Other, Legal);
66 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
67 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
68
69 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
70 ISD::JumpTable},
71 GRLenVT, Custom);
72
73 setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom);
74
75 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
76
77 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
78 if (Subtarget.is64Bit())
79 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
80
81 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
82 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
83 setOperationAction(ISD::VASTART, MVT::Other, Custom);
84 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
85
86 if (Subtarget.is64Bit()) {
87 setOperationAction(ISD::SHL, MVT::i32, Custom);
88 setOperationAction(ISD::SRA, MVT::i32, Custom);
89 setOperationAction(ISD::SRL, MVT::i32, Custom);
90 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
91 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
92 setOperationAction(ISD::ROTR, MVT::i32, Custom);
93 setOperationAction(ISD::ROTL, MVT::i32, Custom);
94 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
95 setOperationAction(ISD::CTLZ, MVT::i32, Custom);
96 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
97 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
98 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
99 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
100 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
101 if (Subtarget.hasBasicF() && !Subtarget.hasBasicD())
102 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
103 if (Subtarget.hasBasicF())
104 setOperationAction(ISD::FRINT, MVT::f32, Legal);
105 if (Subtarget.hasBasicD())
106 setOperationAction(ISD::FRINT, MVT::f64, Legal);
107 }
108
109 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
110 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
111 // and i32 could still be byte-swapped relatively cheaply.
112 setOperationAction(ISD::BSWAP, MVT::i16, Custom);
113 if (Subtarget.is64Bit()) {
114 setOperationAction(ISD::BSWAP, MVT::i32, Custom);
115 }
116
117 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
118 // we get to know which of sll and revb.2h is faster.
119 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
120 if (Subtarget.is64Bit()) {
121 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
122 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
123 } else {
124 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
125 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
126 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
127 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
128 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
129 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
130 }
131
132 static const ISD::CondCode FPCCToExpand[] = {
133 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
134 ISD::SETGE, ISD::SETNE, ISD::SETGT};
135
136 if (Subtarget.hasBasicF()) {
137 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
138 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
139 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
140 setOperationAction(ISD::FMA, MVT::f32, Legal);
141 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
142 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
143 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
144 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
145 setOperationAction(ISD::FSIN, MVT::f32, Expand);
146 setOperationAction(ISD::FCOS, MVT::f32, Expand);
147 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
148 setOperationAction(ISD::FPOW, MVT::f32, Expand);
149 setOperationAction(ISD::FREM, MVT::f32, Expand);
150 }
151 if (Subtarget.hasBasicD()) {
152 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
153 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
154 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
155 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
156 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
157 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
158 setOperationAction(ISD::FMA, MVT::f64, Legal);
159 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
160 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
161 setOperationAction(ISD::FSIN, MVT::f64, Expand);
162 setOperationAction(ISD::FCOS, MVT::f64, Expand);
163 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
164 setOperationAction(ISD::FPOW, MVT::f64, Expand);
165 setOperationAction(ISD::FREM, MVT::f64, Expand);
166 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
167 }
168
169 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
170
171 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
172 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
173 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
174 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
175 if (!Subtarget.is64Bit())
176 setLibcallName(RTLIB::MUL_I128, nullptr);
177
178 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
179 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
180 if ((Subtarget.is64Bit() && Subtarget.hasBasicF() &&
181 !Subtarget.hasBasicD())) {
182 setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom);
183 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom);
184 }
185
186 // Compute derived properties from the register classes.
187 computeRegisterProperties(STI.getRegisterInfo());
188
189 setStackPointerRegisterToSaveRestore(LoongArch::R3);
190
191 setBooleanContents(ZeroOrOneBooleanContent);
192
193 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
194
195 setMinCmpXchgSizeInBits(32);
196
197 // Function alignments.
198 const Align FunctionAlignment(4);
199 setMinFunctionAlignment(FunctionAlignment);
200
201 setTargetDAGCombine(ISD::AND);
202 setTargetDAGCombine(ISD::OR);
203 setTargetDAGCombine(ISD::SRL);
204 }
205
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const206 bool LoongArchTargetLowering::isOffsetFoldingLegal(
207 const GlobalAddressSDNode *GA) const {
208 // In order to maximise the opportunity for common subexpression elimination,
209 // keep a separate ADD node for the global address offset instead of folding
210 // it in the global address node. Later peephole optimisations may choose to
211 // fold it back in when profitable.
212 return false;
213 }
214
LowerOperation(SDValue Op,SelectionDAG & DAG) const215 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
216 SelectionDAG &DAG) const {
217 switch (Op.getOpcode()) {
218 case ISD::EH_DWARF_CFA:
219 return lowerEH_DWARF_CFA(Op, DAG);
220 case ISD::GlobalAddress:
221 return lowerGlobalAddress(Op, DAG);
222 case ISD::GlobalTLSAddress:
223 return lowerGlobalTLSAddress(Op, DAG);
224 case ISD::INTRINSIC_WO_CHAIN:
225 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
226 case ISD::INTRINSIC_W_CHAIN:
227 return lowerINTRINSIC_W_CHAIN(Op, DAG);
228 case ISD::INTRINSIC_VOID:
229 return lowerINTRINSIC_VOID(Op, DAG);
230 case ISD::BlockAddress:
231 return lowerBlockAddress(Op, DAG);
232 case ISD::JumpTable:
233 return lowerJumpTable(Op, DAG);
234 case ISD::SHL_PARTS:
235 return lowerShiftLeftParts(Op, DAG);
236 case ISD::SRA_PARTS:
237 return lowerShiftRightParts(Op, DAG, true);
238 case ISD::SRL_PARTS:
239 return lowerShiftRightParts(Op, DAG, false);
240 case ISD::ConstantPool:
241 return lowerConstantPool(Op, DAG);
242 case ISD::FP_TO_SINT:
243 return lowerFP_TO_SINT(Op, DAG);
244 case ISD::BITCAST:
245 return lowerBITCAST(Op, DAG);
246 case ISD::UINT_TO_FP:
247 return lowerUINT_TO_FP(Op, DAG);
248 case ISD::SINT_TO_FP:
249 return lowerSINT_TO_FP(Op, DAG);
250 case ISD::VASTART:
251 return lowerVASTART(Op, DAG);
252 case ISD::FRAMEADDR:
253 return lowerFRAMEADDR(Op, DAG);
254 case ISD::RETURNADDR:
255 return lowerRETURNADDR(Op, DAG);
256 case ISD::WRITE_REGISTER:
257 return lowerWRITE_REGISTER(Op, DAG);
258 }
259 return SDValue();
260 }
261
lowerWRITE_REGISTER(SDValue Op,SelectionDAG & DAG) const262 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
263 SelectionDAG &DAG) const {
264
265 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
266 DAG.getContext()->emitError(
267 "On LA64, only 64-bit registers can be written.");
268 return Op.getOperand(0);
269 }
270
271 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
272 DAG.getContext()->emitError(
273 "On LA32, only 32-bit registers can be written.");
274 return Op.getOperand(0);
275 }
276
277 return Op;
278 }
279
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG) const280 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
281 SelectionDAG &DAG) const {
282 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
283 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
284 "be a constant integer");
285 return SDValue();
286 }
287
288 MachineFunction &MF = DAG.getMachineFunction();
289 MF.getFrameInfo().setFrameAddressIsTaken(true);
290 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
291 EVT VT = Op.getValueType();
292 SDLoc DL(Op);
293 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
294 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
295 int GRLenInBytes = Subtarget.getGRLen() / 8;
296
297 while (Depth--) {
298 int Offset = -(GRLenInBytes * 2);
299 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
300 DAG.getIntPtrConstant(Offset, DL));
301 FrameAddr =
302 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
303 }
304 return FrameAddr;
305 }
306
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG) const307 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
308 SelectionDAG &DAG) const {
309 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
310 return SDValue();
311
312 // Currently only support lowering return address for current frame.
313 if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
314 DAG.getContext()->emitError(
315 "return address can only be determined for the current frame");
316 return SDValue();
317 }
318
319 MachineFunction &MF = DAG.getMachineFunction();
320 MF.getFrameInfo().setReturnAddressIsTaken(true);
321 MVT GRLenVT = Subtarget.getGRLenVT();
322
323 // Return the value of the return address register, marking it an implicit
324 // live-in.
325 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
326 getRegClassFor(GRLenVT));
327 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
328 }
329
lowerEH_DWARF_CFA(SDValue Op,SelectionDAG & DAG) const330 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
331 SelectionDAG &DAG) const {
332 MachineFunction &MF = DAG.getMachineFunction();
333 auto Size = Subtarget.getGRLen() / 8;
334 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
335 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
336 }
337
lowerVASTART(SDValue Op,SelectionDAG & DAG) const338 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
339 SelectionDAG &DAG) const {
340 MachineFunction &MF = DAG.getMachineFunction();
341 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
342
343 SDLoc DL(Op);
344 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
345 getPointerTy(MF.getDataLayout()));
346
347 // vastart just stores the address of the VarArgsFrameIndex slot into the
348 // memory location argument.
349 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
350 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
351 MachinePointerInfo(SV));
352 }
353
lowerUINT_TO_FP(SDValue Op,SelectionDAG & DAG) const354 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
355 SelectionDAG &DAG) const {
356 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
357 !Subtarget.hasBasicD() && "unexpected target features");
358
359 SDLoc DL(Op);
360 SDValue Op0 = Op.getOperand(0);
361 if (Op0->getOpcode() == ISD::AND) {
362 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
363 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
364 return Op;
365 }
366
367 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
368 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
369 Op0.getConstantOperandVal(2) == UINT64_C(0))
370 return Op;
371
372 if (Op0.getOpcode() == ISD::AssertZext &&
373 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
374 return Op;
375
376 EVT OpVT = Op0.getValueType();
377 EVT RetVT = Op.getValueType();
378 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
379 MakeLibCallOptions CallOptions;
380 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
381 SDValue Chain = SDValue();
382 SDValue Result;
383 std::tie(Result, Chain) =
384 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
385 return Result;
386 }
387
lowerSINT_TO_FP(SDValue Op,SelectionDAG & DAG) const388 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
389 SelectionDAG &DAG) const {
390 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
391 !Subtarget.hasBasicD() && "unexpected target features");
392
393 SDLoc DL(Op);
394 SDValue Op0 = Op.getOperand(0);
395
396 if ((Op0.getOpcode() == ISD::AssertSext ||
397 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
398 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
399 return Op;
400
401 EVT OpVT = Op0.getValueType();
402 EVT RetVT = Op.getValueType();
403 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
404 MakeLibCallOptions CallOptions;
405 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
406 SDValue Chain = SDValue();
407 SDValue Result;
408 std::tie(Result, Chain) =
409 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
410 return Result;
411 }
412
lowerBITCAST(SDValue Op,SelectionDAG & DAG) const413 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
414 SelectionDAG &DAG) const {
415
416 SDLoc DL(Op);
417 SDValue Op0 = Op.getOperand(0);
418
419 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
420 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
421 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
422 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
423 }
424 return Op;
425 }
426
lowerFP_TO_SINT(SDValue Op,SelectionDAG & DAG) const427 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
428 SelectionDAG &DAG) const {
429
430 SDLoc DL(Op);
431
432 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
433 !Subtarget.hasBasicD()) {
434 SDValue Dst =
435 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
436 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
437 }
438
439 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
440 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
441 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
442 }
443
getTargetNode(GlobalAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)444 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
445 SelectionDAG &DAG, unsigned Flags) {
446 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
447 }
448
getTargetNode(BlockAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)449 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
450 SelectionDAG &DAG, unsigned Flags) {
451 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
452 Flags);
453 }
454
getTargetNode(ConstantPoolSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)455 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
456 SelectionDAG &DAG, unsigned Flags) {
457 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
458 N->getOffset(), Flags);
459 }
460
getTargetNode(JumpTableSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)461 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
462 SelectionDAG &DAG, unsigned Flags) {
463 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
464 }
465
466 template <class NodeTy>
getAddr(NodeTy * N,SelectionDAG & DAG,bool IsLocal) const467 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
468 bool IsLocal) const {
469 SDLoc DL(N);
470 EVT Ty = getPointerTy(DAG.getDataLayout());
471 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
472 // TODO: Check CodeModel.
473 if (IsLocal)
474 // This generates the pattern (PseudoLA_PCREL sym), which expands to
475 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
476 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr),
477 0);
478
479 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
480 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
481 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
482 }
483
lowerBlockAddress(SDValue Op,SelectionDAG & DAG) const484 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
485 SelectionDAG &DAG) const {
486 return getAddr(cast<BlockAddressSDNode>(Op), DAG);
487 }
488
lowerJumpTable(SDValue Op,SelectionDAG & DAG) const489 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
490 SelectionDAG &DAG) const {
491 return getAddr(cast<JumpTableSDNode>(Op), DAG);
492 }
493
lowerConstantPool(SDValue Op,SelectionDAG & DAG) const494 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
495 SelectionDAG &DAG) const {
496 return getAddr(cast<ConstantPoolSDNode>(Op), DAG);
497 }
498
lowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const499 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
500 SelectionDAG &DAG) const {
501 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
502 assert(N->getOffset() == 0 && "unexpected offset in global node");
503 return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
504 }
505
getStaticTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc) const506 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
507 SelectionDAG &DAG,
508 unsigned Opc) const {
509 SDLoc DL(N);
510 EVT Ty = getPointerTy(DAG.getDataLayout());
511 MVT GRLenVT = Subtarget.getGRLenVT();
512
513 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
514 SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
515
516 // Add the thread pointer.
517 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
518 DAG.getRegister(LoongArch::R2, GRLenVT));
519 }
520
getDynamicTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc) const521 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
522 SelectionDAG &DAG,
523 unsigned Opc) const {
524 SDLoc DL(N);
525 EVT Ty = getPointerTy(DAG.getDataLayout());
526 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
527
528 // Use a PC-relative addressing mode to access the dynamic GOT address.
529 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
530 SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
531
532 // Prepare argument list to generate call.
533 ArgListTy Args;
534 ArgListEntry Entry;
535 Entry.Node = Load;
536 Entry.Ty = CallTy;
537 Args.push_back(Entry);
538
539 // Setup call to __tls_get_addr.
540 TargetLowering::CallLoweringInfo CLI(DAG);
541 CLI.setDebugLoc(DL)
542 .setChain(DAG.getEntryNode())
543 .setLibCallee(CallingConv::C, CallTy,
544 DAG.getExternalSymbol("__tls_get_addr", Ty),
545 std::move(Args));
546
547 return LowerCallTo(CLI).first;
548 }
549
550 SDValue
lowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const551 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
552 SelectionDAG &DAG) const {
553 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
554 CallingConv::GHC)
555 report_fatal_error("In GHC calling convention TLS is not supported");
556
557 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
558 assert(N->getOffset() == 0 && "unexpected offset in global node");
559
560 SDValue Addr;
561 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
562 case TLSModel::GeneralDynamic:
563 // In this model, application code calls the dynamic linker function
564 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
565 // runtime.
566 Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD);
567 break;
568 case TLSModel::LocalDynamic:
569 // Same as GeneralDynamic, except for assembly modifiers and relocation
570 // records.
571 Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD);
572 break;
573 case TLSModel::InitialExec:
574 // This model uses the GOT to resolve TLS offsets.
575 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE);
576 break;
577 case TLSModel::LocalExec:
578 // This model is used when static linking as the TLS offsets are resolved
579 // during program linking.
580 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
581 break;
582 }
583
584 return Addr;
585 }
586
587 SDValue
lowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const588 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
589 SelectionDAG &DAG) const {
590 switch (Op.getConstantOperandVal(0)) {
591 default:
592 return SDValue(); // Don't custom lower most intrinsics.
593 case Intrinsic::thread_pointer: {
594 EVT PtrVT = getPointerTy(DAG.getDataLayout());
595 return DAG.getRegister(LoongArch::R2, PtrVT);
596 }
597 }
598 }
599
600 // Helper function that emits error message for intrinsics with chain.
emitIntrinsicWithChainErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)601 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
602 StringRef ErrorMsg,
603 SelectionDAG &DAG) {
604
605 DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
606 ErrorMsg);
607 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
608 SDLoc(Op));
609 }
610
611 SDValue
lowerINTRINSIC_W_CHAIN(SDValue Op,SelectionDAG & DAG) const612 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
613 SelectionDAG &DAG) const {
614 SDLoc DL(Op);
615 MVT GRLenVT = Subtarget.getGRLenVT();
616 SDValue Op0 = Op.getOperand(0);
617 std::string Name = Op->getOperationName(0);
618 const StringRef ErrorMsgOOR = "out of range";
619
620 switch (Op.getConstantOperandVal(1)) {
621 default:
622 return Op;
623 case Intrinsic::loongarch_crc_w_b_w:
624 case Intrinsic::loongarch_crc_w_h_w:
625 case Intrinsic::loongarch_crc_w_w_w:
626 case Intrinsic::loongarch_crc_w_d_w:
627 case Intrinsic::loongarch_crcc_w_b_w:
628 case Intrinsic::loongarch_crcc_w_h_w:
629 case Intrinsic::loongarch_crcc_w_w_w:
630 case Intrinsic::loongarch_crcc_w_d_w: {
631 std::string Name = Op->getOperationName(0);
632 DAG.getContext()->emitError(Name + " requires target: loongarch64");
633 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
634 }
635 case Intrinsic::loongarch_csrrd_w:
636 case Intrinsic::loongarch_csrrd_d: {
637 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
638 if (!isUInt<14>(Imm))
639 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
640 return DAG.getMergeValues(
641 {DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
642 DAG.getConstant(Imm, DL, GRLenVT)),
643 Op0},
644 DL);
645 }
646 case Intrinsic::loongarch_csrwr_w:
647 case Intrinsic::loongarch_csrwr_d: {
648 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
649 if (!isUInt<14>(Imm))
650 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
651 return DAG.getMergeValues(
652 {DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, Op.getOperand(2),
653 DAG.getConstant(Imm, DL, GRLenVT)),
654 Op0},
655 DL);
656 }
657 case Intrinsic::loongarch_csrxchg_w:
658 case Intrinsic::loongarch_csrxchg_d: {
659 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
660 if (!isUInt<14>(Imm))
661 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
662 return DAG.getMergeValues(
663 {DAG.getNode(LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, Op.getOperand(2),
664 Op.getOperand(3), DAG.getConstant(Imm, DL, GRLenVT)),
665 Op0},
666 DL);
667 }
668 case Intrinsic::loongarch_iocsrrd_d: {
669 if (Subtarget.is64Bit())
670 return DAG.getMergeValues(
671 {DAG.getNode(
672 LoongArchISD::IOCSRRD_D, DL, GRLenVT, Op0,
673 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))),
674 Op0},
675 DL);
676 else {
677 DAG.getContext()->emitError(
678 "llvm.loongarch.crc.w.d.w requires target: loongarch64");
679 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
680 }
681 }
682 #define IOCSRRD_CASE(NAME, NODE) \
683 case Intrinsic::loongarch_##NAME: { \
684 return DAG.getMergeValues( \
685 {DAG.getNode(LoongArchISD::NODE, DL, GRLenVT, Op0, Op.getOperand(2)), \
686 Op0}, \
687 DL); \
688 }
689 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
690 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
691 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
692 #undef IOCSRRD_CASE
693 case Intrinsic::loongarch_cpucfg: {
694 return DAG.getMergeValues(
695 {DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, Op.getOperand(2)),
696 Op0},
697 DL);
698 }
699 case Intrinsic::loongarch_lddir_d: {
700 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
701 if (!isUInt<8>(Imm)) {
702 DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
703 "' out of range");
704 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
705 }
706
707 return Op;
708 }
709 case Intrinsic::loongarch_movfcsr2gr: {
710 if (!Subtarget.hasBasicF()) {
711 DAG.getContext()->emitError(
712 "llvm.loongarch.movfcsr2gr expects basic f target feature");
713 return DAG.getMergeValues(
714 {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
715 }
716 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
717 if (!isUInt<2>(Imm)) {
718 DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
719 "' " + ErrorMsgOOR);
720 return DAG.getMergeValues(
721 {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
722 }
723 return DAG.getMergeValues(
724 {DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, Op.getValueType(),
725 DAG.getConstant(Imm, DL, GRLenVT)),
726 Op.getOperand(0)},
727 DL);
728 }
729 }
730 }
731
732 // Helper function that emits error message for intrinsics with void return
733 // value.
emitIntrinsicErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)734 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
735 SelectionDAG &DAG) {
736
737 DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
738 ErrorMsg);
739 return Op.getOperand(0);
740 }
741
lowerINTRINSIC_VOID(SDValue Op,SelectionDAG & DAG) const742 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
743 SelectionDAG &DAG) const {
744 SDLoc DL(Op);
745 MVT GRLenVT = Subtarget.getGRLenVT();
746 SDValue Op0 = Op.getOperand(0);
747 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
748 SDValue Op2 = Op.getOperand(2);
749 const StringRef ErrorMsgOOR = "out of range";
750
751 switch (IntrinsicEnum) {
752 default:
753 // TODO: Add more Intrinsics.
754 return SDValue();
755 case Intrinsic::loongarch_cacop_d:
756 case Intrinsic::loongarch_cacop_w: {
757 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) {
758 DAG.getContext()->emitError(
759 "llvm.loongarch.cacop.d requires target: loongarch64");
760 return Op.getOperand(0);
761 }
762 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) {
763 DAG.getContext()->emitError(
764 "llvm.loongarch.cacop.w requires target: loongarch32");
765 return Op.getOperand(0);
766 }
767 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
768 unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue();
769 if (!isUInt<5>(Imm1))
770 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
771 SDValue Op4 = Op.getOperand(4);
772 int Imm2 = cast<ConstantSDNode>(Op4)->getSExtValue();
773 if (!isInt<12>(Imm2))
774 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
775
776 return Op;
777 }
778
779 case Intrinsic::loongarch_dbar: {
780 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
781 if (!isUInt<15>(Imm))
782 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
783
784 return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op0,
785 DAG.getConstant(Imm, DL, GRLenVT));
786 }
787 case Intrinsic::loongarch_ibar: {
788 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
789 if (!isUInt<15>(Imm))
790 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
791
792 return DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Op0,
793 DAG.getConstant(Imm, DL, GRLenVT));
794 }
795 case Intrinsic::loongarch_break: {
796 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
797 if (!isUInt<15>(Imm))
798 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
799
800 return DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Op0,
801 DAG.getConstant(Imm, DL, GRLenVT));
802 }
803 case Intrinsic::loongarch_movgr2fcsr: {
804 if (!Subtarget.hasBasicF()) {
805 DAG.getContext()->emitError(
806 "llvm.loongarch.movgr2fcsr expects basic f target feature");
807 return Op0;
808 }
809 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
810 if (!isUInt<2>(Imm))
811 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
812
813 return DAG.getNode(
814 LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Op0,
815 DAG.getConstant(Imm, DL, GRLenVT),
816 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Op.getOperand(3)));
817 }
818 case Intrinsic::loongarch_syscall: {
819 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
820 if (!isUInt<15>(Imm))
821 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
822
823 return DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Op0,
824 DAG.getConstant(Imm, DL, GRLenVT));
825 }
826 #define IOCSRWR_CASE(NAME, NODE) \
827 case Intrinsic::loongarch_##NAME: { \
828 SDValue Op3 = Op.getOperand(3); \
829 if (Subtarget.is64Bit()) \
830 return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, \
831 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
832 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)); \
833 else \
834 return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, Op2, Op3); \
835 }
836 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
837 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
838 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
839 #undef IOCSRWR_CASE
840 case Intrinsic::loongarch_iocsrwr_d: {
841 if (Subtarget.is64Bit())
842 return DAG.getNode(
843 LoongArchISD::IOCSRWR_D, DL, MVT::Other, Op0, Op2,
844 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(3)));
845 else {
846 DAG.getContext()->emitError(
847 "llvm.loongarch.iocsrwr.d requires target: loongarch64");
848 return Op.getOperand(0);
849 }
850 }
851 #define ASRT_LE_GT_CASE(NAME) \
852 case Intrinsic::loongarch_##NAME: { \
853 if (!Subtarget.is64Bit()) { \
854 DAG.getContext()->emitError(Op->getOperationName(0) + \
855 " requires target: loongarch64"); \
856 return Op.getOperand(0); \
857 } \
858 return Op; \
859 }
860 ASRT_LE_GT_CASE(asrtle_d)
861 ASRT_LE_GT_CASE(asrtgt_d)
862 #undef ASRT_LE_GT_CASE
863 case Intrinsic::loongarch_ldpte_d: {
864 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
865 if (!isUInt<8>(Imm))
866 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
867 if (!Subtarget.is64Bit()) {
868 DAG.getContext()->emitError(Op->getOperationName(0) +
869 " requires target: loongarch64");
870 return Op.getOperand(0);
871 }
872 return Op;
873 }
874 }
875 }
876
lowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const877 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
878 SelectionDAG &DAG) const {
879 SDLoc DL(Op);
880 SDValue Lo = Op.getOperand(0);
881 SDValue Hi = Op.getOperand(1);
882 SDValue Shamt = Op.getOperand(2);
883 EVT VT = Lo.getValueType();
884
885 // if Shamt-GRLen < 0: // Shamt < GRLen
886 // Lo = Lo << Shamt
887 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
888 // else:
889 // Lo = 0
890 // Hi = Lo << (Shamt-GRLen)
891
892 SDValue Zero = DAG.getConstant(0, DL, VT);
893 SDValue One = DAG.getConstant(1, DL, VT);
894 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
895 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
896 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
897 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
898
899 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
900 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
901 SDValue ShiftRightLo =
902 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
903 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
904 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
905 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
906
907 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
908
909 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
910 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
911
912 SDValue Parts[2] = {Lo, Hi};
913 return DAG.getMergeValues(Parts, DL);
914 }
915
lowerShiftRightParts(SDValue Op,SelectionDAG & DAG,bool IsSRA) const916 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
917 SelectionDAG &DAG,
918 bool IsSRA) const {
919 SDLoc DL(Op);
920 SDValue Lo = Op.getOperand(0);
921 SDValue Hi = Op.getOperand(1);
922 SDValue Shamt = Op.getOperand(2);
923 EVT VT = Lo.getValueType();
924
925 // SRA expansion:
926 // if Shamt-GRLen < 0: // Shamt < GRLen
927 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
928 // Hi = Hi >>s Shamt
929 // else:
930 // Lo = Hi >>s (Shamt-GRLen);
931 // Hi = Hi >>s (GRLen-1)
932 //
933 // SRL expansion:
934 // if Shamt-GRLen < 0: // Shamt < GRLen
935 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
936 // Hi = Hi >>u Shamt
937 // else:
938 // Lo = Hi >>u (Shamt-GRLen);
939 // Hi = 0;
940
941 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
942
943 SDValue Zero = DAG.getConstant(0, DL, VT);
944 SDValue One = DAG.getConstant(1, DL, VT);
945 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
946 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
947 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
948 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
949
950 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
951 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
952 SDValue ShiftLeftHi =
953 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
954 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
955 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
956 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
957 SDValue HiFalse =
958 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
959
960 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
961
962 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
963 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
964
965 SDValue Parts[2] = {Lo, Hi};
966 return DAG.getMergeValues(Parts, DL);
967 }
968
969 // Returns the opcode of the target-specific SDNode that implements the 32-bit
970 // form of the given Opcode.
getLoongArchWOpcode(unsigned Opcode)971 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
972 switch (Opcode) {
973 default:
974 llvm_unreachable("Unexpected opcode");
975 case ISD::SHL:
976 return LoongArchISD::SLL_W;
977 case ISD::SRA:
978 return LoongArchISD::SRA_W;
979 case ISD::SRL:
980 return LoongArchISD::SRL_W;
981 case ISD::ROTR:
982 return LoongArchISD::ROTR_W;
983 case ISD::ROTL:
984 return LoongArchISD::ROTL_W;
985 case ISD::CTTZ:
986 return LoongArchISD::CTZ_W;
987 case ISD::CTLZ:
988 return LoongArchISD::CLZ_W;
989 }
990 }
991
992 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
993 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
994 // otherwise be promoted to i64, making it difficult to select the
995 // SLL_W/.../*W later one because the fact the operation was originally of
996 // type i8/i16/i32 is lost.
customLegalizeToWOp(SDNode * N,SelectionDAG & DAG,int NumOp,unsigned ExtOpc=ISD::ANY_EXTEND)997 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
998 unsigned ExtOpc = ISD::ANY_EXTEND) {
999 SDLoc DL(N);
1000 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1001 SDValue NewOp0, NewRes;
1002
1003 switch (NumOp) {
1004 default:
1005 llvm_unreachable("Unexpected NumOp");
1006 case 1: {
1007 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1008 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1009 break;
1010 }
1011 case 2: {
1012 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1013 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1014 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1015 break;
1016 }
1017 // TODO:Handle more NumOp.
1018 }
1019
1020 // ReplaceNodeResults requires we maintain the same type for the return
1021 // value.
1022 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1023 }
1024
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const1025 void LoongArchTargetLowering::ReplaceNodeResults(
1026 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1027 SDLoc DL(N);
1028 EVT VT = N->getValueType(0);
1029 switch (N->getOpcode()) {
1030 default:
1031 llvm_unreachable("Don't know how to legalize this operation");
1032 case ISD::SHL:
1033 case ISD::SRA:
1034 case ISD::SRL:
1035 case ISD::ROTR:
1036 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1037 "Unexpected custom legalisation");
1038 if (N->getOperand(1).getOpcode() != ISD::Constant) {
1039 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1040 break;
1041 }
1042 break;
1043 case ISD::ROTL:
1044 ConstantSDNode *CN;
1045 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1046 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1047 break;
1048 }
1049 break;
1050 case ISD::FP_TO_SINT: {
1051 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1052 "Unexpected custom legalisation");
1053 SDValue Src = N->getOperand(0);
1054 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1055 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1056 TargetLowering::TypeSoftenFloat) {
1057 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1058 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1059 return;
1060 }
1061 // If the FP type needs to be softened, emit a library call using the 'si'
1062 // version. If we left it to default legalization we'd end up with 'di'.
1063 RTLIB::Libcall LC;
1064 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1065 MakeLibCallOptions CallOptions;
1066 EVT OpVT = Src.getValueType();
1067 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1068 SDValue Chain = SDValue();
1069 SDValue Result;
1070 std::tie(Result, Chain) =
1071 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1072 Results.push_back(Result);
1073 break;
1074 }
1075 case ISD::BITCAST: {
1076 SDValue Src = N->getOperand(0);
1077 EVT SrcVT = Src.getValueType();
1078 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1079 Subtarget.hasBasicF()) {
1080 SDValue Dst =
1081 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1082 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1083 }
1084 break;
1085 }
1086 case ISD::FP_TO_UINT: {
1087 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1088 "Unexpected custom legalisation");
1089 auto &TLI = DAG.getTargetLoweringInfo();
1090 SDValue Tmp1, Tmp2;
1091 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1092 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1093 break;
1094 }
1095 case ISD::BSWAP: {
1096 SDValue Src = N->getOperand(0);
1097 assert((VT == MVT::i16 || VT == MVT::i32) &&
1098 "Unexpected custom legalization");
1099 MVT GRLenVT = Subtarget.getGRLenVT();
1100 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1101 SDValue Tmp;
1102 switch (VT.getSizeInBits()) {
1103 default:
1104 llvm_unreachable("Unexpected operand width");
1105 case 16:
1106 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1107 break;
1108 case 32:
1109 // Only LA64 will get to here due to the size mismatch between VT and
1110 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1111 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1112 break;
1113 }
1114 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1115 break;
1116 }
1117 case ISD::BITREVERSE: {
1118 SDValue Src = N->getOperand(0);
1119 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1120 "Unexpected custom legalization");
1121 MVT GRLenVT = Subtarget.getGRLenVT();
1122 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1123 SDValue Tmp;
1124 switch (VT.getSizeInBits()) {
1125 default:
1126 llvm_unreachable("Unexpected operand width");
1127 case 8:
1128 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1129 break;
1130 case 32:
1131 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1132 break;
1133 }
1134 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1135 break;
1136 }
1137 case ISD::CTLZ:
1138 case ISD::CTTZ: {
1139 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1140 "Unexpected custom legalisation");
1141 Results.push_back(customLegalizeToWOp(N, DAG, 1));
1142 break;
1143 }
1144 case ISD::INTRINSIC_W_CHAIN: {
1145 SDValue Op0 = N->getOperand(0);
1146 EVT VT = N->getValueType(0);
1147 uint64_t Op1 = N->getConstantOperandVal(1);
1148 MVT GRLenVT = Subtarget.getGRLenVT();
1149 if (Op1 == Intrinsic::loongarch_movfcsr2gr) {
1150 if (!Subtarget.hasBasicF()) {
1151 DAG.getContext()->emitError(
1152 "llvm.loongarch.movfcsr2gr expects basic f target feature");
1153 Results.push_back(DAG.getMergeValues(
1154 {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
1155 Results.push_back(N->getOperand(0));
1156 return;
1157 }
1158 unsigned Imm = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
1159 if (!isUInt<2>(Imm)) {
1160 DAG.getContext()->emitError("argument to '" + N->getOperationName(0) +
1161 "' " + "out of range");
1162 Results.push_back(DAG.getMergeValues(
1163 {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
1164 Results.push_back(N->getOperand(0));
1165 return;
1166 }
1167 Results.push_back(
1168 DAG.getNode(ISD::TRUNCATE, DL, VT,
1169 DAG.getNode(LoongArchISD::MOVFCSR2GR, SDLoc(N), MVT::i64,
1170 DAG.getConstant(Imm, DL, GRLenVT))));
1171 Results.push_back(N->getOperand(0));
1172 return;
1173 }
1174 SDValue Op2 = N->getOperand(2);
1175 std::string Name = N->getOperationName(0);
1176
1177 switch (Op1) {
1178 default:
1179 llvm_unreachable("Unexpected Intrinsic.");
1180 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
1181 case Intrinsic::loongarch_##NAME: { \
1182 Results.push_back(DAG.getNode( \
1183 ISD::TRUNCATE, DL, VT, \
1184 DAG.getNode( \
1185 LoongArchISD::NODE, DL, MVT::i64, \
1186 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1187 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))))); \
1188 Results.push_back(N->getOperand(0)); \
1189 break; \
1190 }
1191 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1192 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1193 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1194 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1195 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1196 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1197 #undef CRC_CASE_EXT_BINARYOP
1198
1199 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
1200 case Intrinsic::loongarch_##NAME: { \
1201 Results.push_back( \
1202 DAG.getNode(ISD::TRUNCATE, DL, VT, \
1203 DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op2, \
1204 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, \
1205 N->getOperand(3))))); \
1206 Results.push_back(N->getOperand(0)); \
1207 break; \
1208 }
1209 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1210 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1211 #undef CRC_CASE_EXT_UNARYOP
1212 #define CSR_CASE(ID) \
1213 case Intrinsic::loongarch_##ID: { \
1214 if (!Subtarget.is64Bit()) { \
1215 DAG.getContext()->emitError(Name + " requires target: loongarch64"); \
1216 Results.push_back(DAG.getUNDEF(VT)); \
1217 Results.push_back(N->getOperand(0)); \
1218 } \
1219 break; \
1220 }
1221 CSR_CASE(csrrd_d);
1222 CSR_CASE(csrwr_d);
1223 CSR_CASE(csrxchg_d);
1224 CSR_CASE(iocsrrd_d);
1225 #undef CSR_CASE
1226 case Intrinsic::loongarch_csrrd_w: {
1227 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1228 if (!isUInt<14>(Imm)) {
1229 DAG.getContext()->emitError("argument to '" + Name + "' out of range");
1230 Results.push_back(DAG.getUNDEF(VT));
1231 Results.push_back(N->getOperand(0));
1232 break;
1233 }
1234
1235 Results.push_back(
1236 DAG.getNode(ISD::TRUNCATE, DL, VT,
1237 DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
1238 DAG.getConstant(Imm, DL, GRLenVT))));
1239 Results.push_back(N->getOperand(0));
1240 break;
1241 }
1242 case Intrinsic::loongarch_csrwr_w: {
1243 unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
1244 if (!isUInt<14>(Imm)) {
1245 DAG.getContext()->emitError("argument to '" + Name + "' out of range");
1246 Results.push_back(DAG.getUNDEF(VT));
1247 Results.push_back(N->getOperand(0));
1248 break;
1249 }
1250
1251 Results.push_back(DAG.getNode(
1252 ISD::TRUNCATE, DL, VT,
1253 DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0,
1254 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1255 DAG.getConstant(Imm, DL, GRLenVT))));
1256 Results.push_back(N->getOperand(0));
1257 break;
1258 }
1259 case Intrinsic::loongarch_csrxchg_w: {
1260 unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
1261 if (!isUInt<14>(Imm)) {
1262 DAG.getContext()->emitError("argument to '" + Name + "' out of range");
1263 Results.push_back(DAG.getUNDEF(VT));
1264 Results.push_back(N->getOperand(0));
1265 break;
1266 }
1267
1268 Results.push_back(DAG.getNode(
1269 ISD::TRUNCATE, DL, VT,
1270 DAG.getNode(
1271 LoongArchISD::CSRXCHG, DL, GRLenVT, Op0,
1272 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1273 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
1274 DAG.getConstant(Imm, DL, GRLenVT))));
1275 Results.push_back(N->getOperand(0));
1276 break;
1277 }
1278 #define IOCSRRD_CASE(NAME, NODE) \
1279 case Intrinsic::loongarch_##NAME: { \
1280 Results.push_back(DAG.getNode( \
1281 ISD::TRUNCATE, DL, N->getValueType(0), \
1282 DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op0, \
1283 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)))); \
1284 Results.push_back(N->getOperand(0)); \
1285 break; \
1286 }
1287 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1288 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1289 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1290 #undef IOCSRRD_CASE
1291 case Intrinsic::loongarch_cpucfg: {
1292 Results.push_back(DAG.getNode(
1293 ISD::TRUNCATE, DL, VT,
1294 DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0,
1295 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2))));
1296 Results.push_back(Op0);
1297 break;
1298 }
1299 case Intrinsic::loongarch_lddir_d: {
1300 if (!Subtarget.is64Bit()) {
1301 DAG.getContext()->emitError(N->getOperationName(0) +
1302 " requires target: loongarch64");
1303 Results.push_back(DAG.getUNDEF(VT));
1304 Results.push_back(Op0);
1305 break;
1306 }
1307 break;
1308 }
1309 }
1310 break;
1311 }
1312 case ISD::READ_REGISTER: {
1313 if (Subtarget.is64Bit())
1314 DAG.getContext()->emitError(
1315 "On LA64, only 64-bit registers can be read.");
1316 else
1317 DAG.getContext()->emitError(
1318 "On LA32, only 32-bit registers can be read.");
1319 Results.push_back(DAG.getUNDEF(VT));
1320 Results.push_back(N->getOperand(0));
1321 break;
1322 }
1323 }
1324 }
1325
performANDCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)1326 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
1327 TargetLowering::DAGCombinerInfo &DCI,
1328 const LoongArchSubtarget &Subtarget) {
1329 if (DCI.isBeforeLegalizeOps())
1330 return SDValue();
1331
1332 SDValue FirstOperand = N->getOperand(0);
1333 SDValue SecondOperand = N->getOperand(1);
1334 unsigned FirstOperandOpc = FirstOperand.getOpcode();
1335 EVT ValTy = N->getValueType(0);
1336 SDLoc DL(N);
1337 uint64_t lsb, msb;
1338 unsigned SMIdx, SMLen;
1339 ConstantSDNode *CN;
1340 SDValue NewOperand;
1341 MVT GRLenVT = Subtarget.getGRLenVT();
1342
1343 // Op's second operand must be a shifted mask.
1344 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
1345 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
1346 return SDValue();
1347
1348 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
1349 // Pattern match BSTRPICK.
1350 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
1351 // => BSTRPICK $dst, $src, msb, lsb
1352 // where msb = lsb + len - 1
1353
1354 // The second operand of the shift must be an immediate.
1355 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
1356 return SDValue();
1357
1358 lsb = CN->getZExtValue();
1359
1360 // Return if the shifted mask does not start at bit 0 or the sum of its
1361 // length and lsb exceeds the word's size.
1362 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
1363 return SDValue();
1364
1365 NewOperand = FirstOperand.getOperand(0);
1366 } else {
1367 // Pattern match BSTRPICK.
1368 // $dst = and $src, (2**len- 1) , if len > 12
1369 // => BSTRPICK $dst, $src, msb, lsb
1370 // where lsb = 0 and msb = len - 1
1371
1372 // If the mask is <= 0xfff, andi can be used instead.
1373 if (CN->getZExtValue() <= 0xfff)
1374 return SDValue();
1375
1376 // Return if the mask doesn't start at position 0.
1377 if (SMIdx)
1378 return SDValue();
1379
1380 lsb = 0;
1381 NewOperand = FirstOperand;
1382 }
1383 msb = lsb + SMLen - 1;
1384 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
1385 DAG.getConstant(msb, DL, GRLenVT),
1386 DAG.getConstant(lsb, DL, GRLenVT));
1387 }
1388
performSRLCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)1389 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
1390 TargetLowering::DAGCombinerInfo &DCI,
1391 const LoongArchSubtarget &Subtarget) {
1392 if (DCI.isBeforeLegalizeOps())
1393 return SDValue();
1394
1395 // $dst = srl (and $src, Mask), Shamt
1396 // =>
1397 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
1398 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
1399 //
1400
1401 SDValue FirstOperand = N->getOperand(0);
1402 ConstantSDNode *CN;
1403 EVT ValTy = N->getValueType(0);
1404 SDLoc DL(N);
1405 MVT GRLenVT = Subtarget.getGRLenVT();
1406 unsigned MaskIdx, MaskLen;
1407 uint64_t Shamt;
1408
1409 // The first operand must be an AND and the second operand of the AND must be
1410 // a shifted mask.
1411 if (FirstOperand.getOpcode() != ISD::AND ||
1412 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
1413 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
1414 return SDValue();
1415
1416 // The second operand (shift amount) must be an immediate.
1417 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
1418 return SDValue();
1419
1420 Shamt = CN->getZExtValue();
1421 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
1422 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
1423 FirstOperand->getOperand(0),
1424 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1425 DAG.getConstant(Shamt, DL, GRLenVT));
1426
1427 return SDValue();
1428 }
1429
performORCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)1430 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
1431 TargetLowering::DAGCombinerInfo &DCI,
1432 const LoongArchSubtarget &Subtarget) {
1433 MVT GRLenVT = Subtarget.getGRLenVT();
1434 EVT ValTy = N->getValueType(0);
1435 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
1436 ConstantSDNode *CN0, *CN1;
1437 SDLoc DL(N);
1438 unsigned ValBits = ValTy.getSizeInBits();
1439 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
1440 unsigned Shamt;
1441 bool SwapAndRetried = false;
1442
1443 if (DCI.isBeforeLegalizeOps())
1444 return SDValue();
1445
1446 if (ValBits != 32 && ValBits != 64)
1447 return SDValue();
1448
1449 Retry:
1450 // 1st pattern to match BSTRINS:
1451 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
1452 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
1453 // =>
1454 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
1455 if (N0.getOpcode() == ISD::AND &&
1456 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1457 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1458 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
1459 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1460 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
1461 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
1462 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1463 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
1464 (MaskIdx0 + MaskLen0 <= ValBits)) {
1465 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
1466 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1467 N1.getOperand(0).getOperand(0),
1468 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
1469 DAG.getConstant(MaskIdx0, DL, GRLenVT));
1470 }
1471
1472 // 2nd pattern to match BSTRINS:
1473 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
1474 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
1475 // =>
1476 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
1477 if (N0.getOpcode() == ISD::AND &&
1478 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1479 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1480 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
1481 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1482 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
1483 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1484 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
1485 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
1486 (MaskIdx0 + MaskLen0 <= ValBits)) {
1487 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
1488 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1489 N1.getOperand(0).getOperand(0),
1490 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
1491 DAG.getConstant(MaskIdx0, DL, GRLenVT));
1492 }
1493
1494 // 3rd pattern to match BSTRINS:
1495 // R = or (and X, mask0), (and Y, mask1)
1496 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
1497 // =>
1498 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
1499 // where msb = lsb + size - 1
1500 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
1501 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1502 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1503 (MaskIdx0 + MaskLen0 <= 64) &&
1504 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
1505 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
1506 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
1507 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1508 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
1509 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
1510 DAG.getConstant(ValBits == 32
1511 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
1512 : (MaskIdx0 + MaskLen0 - 1),
1513 DL, GRLenVT),
1514 DAG.getConstant(MaskIdx0, DL, GRLenVT));
1515 }
1516
1517 // 4th pattern to match BSTRINS:
1518 // R = or (and X, mask), (shl Y, shamt)
1519 // where mask = (2**shamt - 1)
1520 // =>
1521 // R = BSTRINS X, Y, ValBits - 1, shamt
1522 // where ValBits = 32 or 64
1523 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
1524 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1525 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
1526 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1527 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
1528 (MaskIdx0 + MaskLen0 <= ValBits)) {
1529 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
1530 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1531 N1.getOperand(0),
1532 DAG.getConstant((ValBits - 1), DL, GRLenVT),
1533 DAG.getConstant(Shamt, DL, GRLenVT));
1534 }
1535
1536 // 5th pattern to match BSTRINS:
1537 // R = or (and X, mask), const
1538 // where ~mask = (2**size - 1) << lsb, mask & const = 0
1539 // =>
1540 // R = BSTRINS X, (const >> lsb), msb, lsb
1541 // where msb = lsb + size - 1
1542 if (N0.getOpcode() == ISD::AND &&
1543 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1544 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1545 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
1546 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
1547 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
1548 return DAG.getNode(
1549 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
1550 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
1551 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
1552 DAG.getConstant(MaskIdx0, DL, GRLenVT));
1553 }
1554
1555 // 6th pattern.
1556 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
1557 // by the incoming bits are known to be zero.
1558 // =>
1559 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
1560 //
1561 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
1562 // pattern is more common than the 1st. So we put the 1st before the 6th in
1563 // order to match as many nodes as possible.
1564 ConstantSDNode *CNMask, *CNShamt;
1565 unsigned MaskIdx, MaskLen;
1566 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
1567 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1568 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
1569 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1570 CNShamt->getZExtValue() + MaskLen <= ValBits) {
1571 Shamt = CNShamt->getZExtValue();
1572 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
1573 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
1574 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
1575 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
1576 N1.getOperand(0).getOperand(0),
1577 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
1578 DAG.getConstant(Shamt, DL, GRLenVT));
1579 }
1580 }
1581
1582 // 7th pattern.
1583 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
1584 // overwritten by the incoming bits are known to be zero.
1585 // =>
1586 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
1587 //
1588 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
1589 // before the 7th in order to match as many nodes as possible.
1590 if (N1.getOpcode() == ISD::AND &&
1591 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1592 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
1593 N1.getOperand(0).getOpcode() == ISD::SHL &&
1594 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1595 CNShamt->getZExtValue() == MaskIdx) {
1596 APInt ShMask(ValBits, CNMask->getZExtValue());
1597 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
1598 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
1599 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
1600 N1.getOperand(0).getOperand(0),
1601 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1602 DAG.getConstant(MaskIdx, DL, GRLenVT));
1603 }
1604 }
1605
1606 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
1607 if (!SwapAndRetried) {
1608 std::swap(N0, N1);
1609 SwapAndRetried = true;
1610 goto Retry;
1611 }
1612
1613 SwapAndRetried = false;
1614 Retry2:
1615 // 8th pattern.
1616 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
1617 // the incoming bits are known to be zero.
1618 // =>
1619 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
1620 //
1621 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
1622 // we put it here in order to match as many nodes as possible or generate less
1623 // instructions.
1624 if (N1.getOpcode() == ISD::AND &&
1625 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1626 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
1627 APInt ShMask(ValBits, CNMask->getZExtValue());
1628 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
1629 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
1630 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
1631 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
1632 N1->getOperand(0),
1633 DAG.getConstant(MaskIdx, DL, GRLenVT)),
1634 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1635 DAG.getConstant(MaskIdx, DL, GRLenVT));
1636 }
1637 }
1638 // Swap N0/N1 and retry.
1639 if (!SwapAndRetried) {
1640 std::swap(N0, N1);
1641 SwapAndRetried = true;
1642 goto Retry2;
1643 }
1644
1645 return SDValue();
1646 }
1647
1648 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
performBITREV_WCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)1649 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
1650 TargetLowering::DAGCombinerInfo &DCI,
1651 const LoongArchSubtarget &Subtarget) {
1652 if (DCI.isBeforeLegalizeOps())
1653 return SDValue();
1654
1655 SDValue Src = N->getOperand(0);
1656 if (Src.getOpcode() != LoongArchISD::REVB_2W)
1657 return SDValue();
1658
1659 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
1660 Src.getOperand(0));
1661 }
1662
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const1663 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
1664 DAGCombinerInfo &DCI) const {
1665 SelectionDAG &DAG = DCI.DAG;
1666 switch (N->getOpcode()) {
1667 default:
1668 break;
1669 case ISD::AND:
1670 return performANDCombine(N, DAG, DCI, Subtarget);
1671 case ISD::OR:
1672 return performORCombine(N, DAG, DCI, Subtarget);
1673 case ISD::SRL:
1674 return performSRLCombine(N, DAG, DCI, Subtarget);
1675 case LoongArchISD::BITREV_W:
1676 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
1677 }
1678 return SDValue();
1679 }
1680
insertDivByZeroTrap(MachineInstr & MI,MachineBasicBlock * MBB)1681 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
1682 MachineBasicBlock *MBB) {
1683 if (!ZeroDivCheck)
1684 return MBB;
1685
1686 // Build instructions:
1687 // MBB:
1688 // div(or mod) $dst, $dividend, $divisor
1689 // bnez $divisor, SinkMBB
1690 // BreakMBB:
1691 // break 7 // BRK_DIVZERO
1692 // SinkMBB:
1693 // fallthrough
1694 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1695 MachineFunction::iterator It = ++MBB->getIterator();
1696 MachineFunction *MF = MBB->getParent();
1697 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
1698 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
1699 MF->insert(It, BreakMBB);
1700 MF->insert(It, SinkMBB);
1701
1702 // Transfer the remainder of MBB and its successor edges to SinkMBB.
1703 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
1704 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
1705
1706 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
1707 DebugLoc DL = MI.getDebugLoc();
1708 MachineOperand &Divisor = MI.getOperand(2);
1709 Register DivisorReg = Divisor.getReg();
1710
1711 // MBB:
1712 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
1713 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
1714 .addMBB(SinkMBB);
1715 MBB->addSuccessor(BreakMBB);
1716 MBB->addSuccessor(SinkMBB);
1717
1718 // BreakMBB:
1719 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
1720 // definition of BRK_DIVZERO.
1721 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
1722 BreakMBB->addSuccessor(SinkMBB);
1723
1724 // Clear Divisor's kill flag.
1725 Divisor.setIsKill(false);
1726
1727 return SinkMBB;
1728 }
1729
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const1730 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
1731 MachineInstr &MI, MachineBasicBlock *BB) const {
1732 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1733 DebugLoc DL = MI.getDebugLoc();
1734
1735 switch (MI.getOpcode()) {
1736 default:
1737 llvm_unreachable("Unexpected instr type to insert");
1738 case LoongArch::DIV_W:
1739 case LoongArch::DIV_WU:
1740 case LoongArch::MOD_W:
1741 case LoongArch::MOD_WU:
1742 case LoongArch::DIV_D:
1743 case LoongArch::DIV_DU:
1744 case LoongArch::MOD_D:
1745 case LoongArch::MOD_DU:
1746 return insertDivByZeroTrap(MI, BB);
1747 break;
1748 case LoongArch::WRFCSR: {
1749 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
1750 LoongArch::FCSR0 + MI.getOperand(0).getImm())
1751 .addReg(MI.getOperand(1).getReg());
1752 MI.eraseFromParent();
1753 return BB;
1754 }
1755 case LoongArch::RDFCSR: {
1756 MachineInstr *ReadFCSR =
1757 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
1758 MI.getOperand(0).getReg())
1759 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
1760 ReadFCSR->getOperand(1).setIsUndef();
1761 MI.eraseFromParent();
1762 return BB;
1763 }
1764 }
1765 }
1766
getTargetNodeName(unsigned Opcode) const1767 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
1768 switch ((LoongArchISD::NodeType)Opcode) {
1769 case LoongArchISD::FIRST_NUMBER:
1770 break;
1771
1772 #define NODE_NAME_CASE(node) \
1773 case LoongArchISD::node: \
1774 return "LoongArchISD::" #node;
1775
1776 // TODO: Add more target-dependent nodes later.
1777 NODE_NAME_CASE(CALL)
1778 NODE_NAME_CASE(RET)
1779 NODE_NAME_CASE(TAIL)
1780 NODE_NAME_CASE(SLL_W)
1781 NODE_NAME_CASE(SRA_W)
1782 NODE_NAME_CASE(SRL_W)
1783 NODE_NAME_CASE(BSTRINS)
1784 NODE_NAME_CASE(BSTRPICK)
1785 NODE_NAME_CASE(MOVGR2FR_W_LA64)
1786 NODE_NAME_CASE(MOVFR2GR_S_LA64)
1787 NODE_NAME_CASE(FTINT)
1788 NODE_NAME_CASE(REVB_2H)
1789 NODE_NAME_CASE(REVB_2W)
1790 NODE_NAME_CASE(BITREV_4B)
1791 NODE_NAME_CASE(BITREV_W)
1792 NODE_NAME_CASE(ROTR_W)
1793 NODE_NAME_CASE(ROTL_W)
1794 NODE_NAME_CASE(CLZ_W)
1795 NODE_NAME_CASE(CTZ_W)
1796 NODE_NAME_CASE(DBAR)
1797 NODE_NAME_CASE(IBAR)
1798 NODE_NAME_CASE(BREAK)
1799 NODE_NAME_CASE(SYSCALL)
1800 NODE_NAME_CASE(CRC_W_B_W)
1801 NODE_NAME_CASE(CRC_W_H_W)
1802 NODE_NAME_CASE(CRC_W_W_W)
1803 NODE_NAME_CASE(CRC_W_D_W)
1804 NODE_NAME_CASE(CRCC_W_B_W)
1805 NODE_NAME_CASE(CRCC_W_H_W)
1806 NODE_NAME_CASE(CRCC_W_W_W)
1807 NODE_NAME_CASE(CRCC_W_D_W)
1808 NODE_NAME_CASE(CSRRD)
1809 NODE_NAME_CASE(CSRWR)
1810 NODE_NAME_CASE(CSRXCHG)
1811 NODE_NAME_CASE(IOCSRRD_B)
1812 NODE_NAME_CASE(IOCSRRD_H)
1813 NODE_NAME_CASE(IOCSRRD_W)
1814 NODE_NAME_CASE(IOCSRRD_D)
1815 NODE_NAME_CASE(IOCSRWR_B)
1816 NODE_NAME_CASE(IOCSRWR_H)
1817 NODE_NAME_CASE(IOCSRWR_W)
1818 NODE_NAME_CASE(IOCSRWR_D)
1819 NODE_NAME_CASE(CPUCFG)
1820 NODE_NAME_CASE(MOVGR2FCSR)
1821 NODE_NAME_CASE(MOVFCSR2GR)
1822 NODE_NAME_CASE(CACOP_D)
1823 NODE_NAME_CASE(CACOP_W)
1824 }
1825 #undef NODE_NAME_CASE
1826 return nullptr;
1827 }
1828
1829 //===----------------------------------------------------------------------===//
1830 // Calling Convention Implementation
1831 //===----------------------------------------------------------------------===//
1832
1833 // Eight general-purpose registers a0-a7 used for passing integer arguments,
1834 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
1835 // fixed-point arguments, and floating-point arguments when no FPR is available
1836 // or with soft float ABI.
1837 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
1838 LoongArch::R7, LoongArch::R8, LoongArch::R9,
1839 LoongArch::R10, LoongArch::R11};
1840 // Eight floating-point registers fa0-fa7 used for passing floating-point
1841 // arguments, and fa0-fa1 are also used to return values.
1842 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
1843 LoongArch::F3, LoongArch::F4, LoongArch::F5,
1844 LoongArch::F6, LoongArch::F7};
1845 // FPR32 and FPR64 alias each other.
1846 const MCPhysReg ArgFPR64s[] = {
1847 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
1848 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
1849
1850 // Pass a 2*GRLen argument that has been split into two GRLen values through
1851 // registers or the stack as necessary.
CC_LoongArchAssign2GRLen(unsigned GRLen,CCState & State,CCValAssign VA1,ISD::ArgFlagsTy ArgFlags1,unsigned ValNo2,MVT ValVT2,MVT LocVT2,ISD::ArgFlagsTy ArgFlags2)1852 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
1853 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
1854 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
1855 ISD::ArgFlagsTy ArgFlags2) {
1856 unsigned GRLenInBytes = GRLen / 8;
1857 if (Register Reg = State.AllocateReg(ArgGPRs)) {
1858 // At least one half can be passed via register.
1859 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
1860 VA1.getLocVT(), CCValAssign::Full));
1861 } else {
1862 // Both halves must be passed on the stack, with proper alignment.
1863 Align StackAlign =
1864 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
1865 State.addLoc(
1866 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
1867 State.AllocateStack(GRLenInBytes, StackAlign),
1868 VA1.getLocVT(), CCValAssign::Full));
1869 State.addLoc(CCValAssign::getMem(
1870 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
1871 LocVT2, CCValAssign::Full));
1872 return false;
1873 }
1874 if (Register Reg = State.AllocateReg(ArgGPRs)) {
1875 // The second half can also be passed via register.
1876 State.addLoc(
1877 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
1878 } else {
1879 // The second half is passed via the stack, without additional alignment.
1880 State.addLoc(CCValAssign::getMem(
1881 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
1882 LocVT2, CCValAssign::Full));
1883 }
1884 return false;
1885 }
1886
1887 // Implements the LoongArch calling convention. Returns true upon failure.
CC_LoongArch(const DataLayout & DL,LoongArchABI::ABI ABI,unsigned ValNo,MVT ValVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State,bool IsFixed,bool IsRet,Type * OrigTy)1888 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
1889 unsigned ValNo, MVT ValVT,
1890 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
1891 CCState &State, bool IsFixed, bool IsRet,
1892 Type *OrigTy) {
1893 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
1894 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
1895 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
1896 MVT LocVT = ValVT;
1897
1898 // Any return value split into more than two values can't be returned
1899 // directly.
1900 if (IsRet && ValNo > 1)
1901 return true;
1902
1903 // If passing a variadic argument, or if no FPR is available.
1904 bool UseGPRForFloat = true;
1905
1906 switch (ABI) {
1907 default:
1908 llvm_unreachable("Unexpected ABI");
1909 case LoongArchABI::ABI_ILP32S:
1910 case LoongArchABI::ABI_LP64S:
1911 case LoongArchABI::ABI_ILP32F:
1912 case LoongArchABI::ABI_LP64F:
1913 report_fatal_error("Unimplemented ABI");
1914 break;
1915 case LoongArchABI::ABI_ILP32D:
1916 case LoongArchABI::ABI_LP64D:
1917 UseGPRForFloat = !IsFixed;
1918 break;
1919 }
1920
1921 // FPR32 and FPR64 alias each other.
1922 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
1923 UseGPRForFloat = true;
1924
1925 if (UseGPRForFloat && ValVT == MVT::f32) {
1926 LocVT = GRLenVT;
1927 LocInfo = CCValAssign::BCvt;
1928 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
1929 LocVT = MVT::i64;
1930 LocInfo = CCValAssign::BCvt;
1931 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
1932 // TODO: Handle passing f64 on LA32 with D feature.
1933 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
1934 }
1935
1936 // If this is a variadic argument, the LoongArch calling convention requires
1937 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
1938 // byte alignment. An aligned register should be used regardless of whether
1939 // the original argument was split during legalisation or not. The argument
1940 // will not be passed by registers if the original type is larger than
1941 // 2*GRLen, so the register alignment rule does not apply.
1942 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
1943 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
1944 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
1945 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
1946 // Skip 'odd' register if necessary.
1947 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
1948 State.AllocateReg(ArgGPRs);
1949 }
1950
1951 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
1952 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
1953 State.getPendingArgFlags();
1954
1955 assert(PendingLocs.size() == PendingArgFlags.size() &&
1956 "PendingLocs and PendingArgFlags out of sync");
1957
1958 // Split arguments might be passed indirectly, so keep track of the pending
1959 // values.
1960 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
1961 LocVT = GRLenVT;
1962 LocInfo = CCValAssign::Indirect;
1963 PendingLocs.push_back(
1964 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1965 PendingArgFlags.push_back(ArgFlags);
1966 if (!ArgFlags.isSplitEnd()) {
1967 return false;
1968 }
1969 }
1970
1971 // If the split argument only had two elements, it should be passed directly
1972 // in registers or on the stack.
1973 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
1974 PendingLocs.size() <= 2) {
1975 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1976 // Apply the normal calling convention rules to the first half of the
1977 // split argument.
1978 CCValAssign VA = PendingLocs[0];
1979 ISD::ArgFlagsTy AF = PendingArgFlags[0];
1980 PendingLocs.clear();
1981 PendingArgFlags.clear();
1982 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
1983 ArgFlags);
1984 }
1985
1986 // Allocate to a register if possible, or else a stack slot.
1987 Register Reg;
1988 unsigned StoreSizeBytes = GRLen / 8;
1989 Align StackAlign = Align(GRLen / 8);
1990
1991 if (ValVT == MVT::f32 && !UseGPRForFloat)
1992 Reg = State.AllocateReg(ArgFPR32s);
1993 else if (ValVT == MVT::f64 && !UseGPRForFloat)
1994 Reg = State.AllocateReg(ArgFPR64s);
1995 else
1996 Reg = State.AllocateReg(ArgGPRs);
1997
1998 unsigned StackOffset =
1999 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
2000
2001 // If we reach this point and PendingLocs is non-empty, we must be at the
2002 // end of a split argument that must be passed indirectly.
2003 if (!PendingLocs.empty()) {
2004 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
2005 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
2006 for (auto &It : PendingLocs) {
2007 if (Reg)
2008 It.convertToReg(Reg);
2009 else
2010 It.convertToMem(StackOffset);
2011 State.addLoc(It);
2012 }
2013 PendingLocs.clear();
2014 PendingArgFlags.clear();
2015 return false;
2016 }
2017 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
2018 "Expected an GRLenVT at this stage");
2019
2020 if (Reg) {
2021 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2022 return false;
2023 }
2024
2025 // When a floating-point value is passed on the stack, no bit-cast is needed.
2026 if (ValVT.isFloatingPoint()) {
2027 LocVT = ValVT;
2028 LocInfo = CCValAssign::Full;
2029 }
2030
2031 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2032 return false;
2033 }
2034
analyzeInputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::InputArg> & Ins,bool IsRet,LoongArchCCAssignFn Fn) const2035 void LoongArchTargetLowering::analyzeInputArgs(
2036 MachineFunction &MF, CCState &CCInfo,
2037 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
2038 LoongArchCCAssignFn Fn) const {
2039 FunctionType *FType = MF.getFunction().getFunctionType();
2040 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
2041 MVT ArgVT = Ins[i].VT;
2042 Type *ArgTy = nullptr;
2043 if (IsRet)
2044 ArgTy = FType->getReturnType();
2045 else if (Ins[i].isOrigArg())
2046 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
2047 LoongArchABI::ABI ABI =
2048 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
2049 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
2050 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
2051 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
2052 << EVT(ArgVT).getEVTString() << '\n');
2053 llvm_unreachable("");
2054 }
2055 }
2056 }
2057
analyzeOutputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::OutputArg> & Outs,bool IsRet,CallLoweringInfo * CLI,LoongArchCCAssignFn Fn) const2058 void LoongArchTargetLowering::analyzeOutputArgs(
2059 MachineFunction &MF, CCState &CCInfo,
2060 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
2061 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
2062 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2063 MVT ArgVT = Outs[i].VT;
2064 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
2065 LoongArchABI::ABI ABI =
2066 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
2067 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
2068 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
2069 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
2070 << EVT(ArgVT).getEVTString() << "\n");
2071 llvm_unreachable("");
2072 }
2073 }
2074 }
2075
2076 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
2077 // values.
convertLocVTToValVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)2078 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
2079 const CCValAssign &VA, const SDLoc &DL) {
2080 switch (VA.getLocInfo()) {
2081 default:
2082 llvm_unreachable("Unexpected CCValAssign::LocInfo");
2083 case CCValAssign::Full:
2084 case CCValAssign::Indirect:
2085 break;
2086 case CCValAssign::BCvt:
2087 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2088 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
2089 else
2090 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2091 break;
2092 }
2093 return Val;
2094 }
2095
unpackFromRegLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL,const LoongArchTargetLowering & TLI)2096 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
2097 const CCValAssign &VA, const SDLoc &DL,
2098 const LoongArchTargetLowering &TLI) {
2099 MachineFunction &MF = DAG.getMachineFunction();
2100 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2101 EVT LocVT = VA.getLocVT();
2102 SDValue Val;
2103 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
2104 Register VReg = RegInfo.createVirtualRegister(RC);
2105 RegInfo.addLiveIn(VA.getLocReg(), VReg);
2106 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2107
2108 return convertLocVTToValVT(DAG, Val, VA, DL);
2109 }
2110
2111 // The caller is responsible for loading the full value if the argument is
2112 // passed with CCValAssign::Indirect.
unpackFromMemLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL)2113 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
2114 const CCValAssign &VA, const SDLoc &DL) {
2115 MachineFunction &MF = DAG.getMachineFunction();
2116 MachineFrameInfo &MFI = MF.getFrameInfo();
2117 EVT ValVT = VA.getValVT();
2118 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
2119 /*IsImmutable=*/true);
2120 SDValue FIN = DAG.getFrameIndex(
2121 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
2122
2123 ISD::LoadExtType ExtType;
2124 switch (VA.getLocInfo()) {
2125 default:
2126 llvm_unreachable("Unexpected CCValAssign::LocInfo");
2127 case CCValAssign::Full:
2128 case CCValAssign::Indirect:
2129 case CCValAssign::BCvt:
2130 ExtType = ISD::NON_EXTLOAD;
2131 break;
2132 }
2133 return DAG.getExtLoad(
2134 ExtType, DL, VA.getLocVT(), Chain, FIN,
2135 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
2136 }
2137
convertValVTToLocVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)2138 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
2139 const CCValAssign &VA, const SDLoc &DL) {
2140 EVT LocVT = VA.getLocVT();
2141
2142 switch (VA.getLocInfo()) {
2143 default:
2144 llvm_unreachable("Unexpected CCValAssign::LocInfo");
2145 case CCValAssign::Full:
2146 break;
2147 case CCValAssign::BCvt:
2148 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2149 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
2150 else
2151 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
2152 break;
2153 }
2154 return Val;
2155 }
2156
CC_LoongArch_GHC(unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State)2157 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
2158 CCValAssign::LocInfo LocInfo,
2159 ISD::ArgFlagsTy ArgFlags, CCState &State) {
2160 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2161 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
2162 // s0 s1 s2 s3 s4 s5 s6 s7 s8
2163 static const MCPhysReg GPRList[] = {
2164 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27,
2165 LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31};
2166 if (unsigned Reg = State.AllocateReg(GPRList)) {
2167 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2168 return false;
2169 }
2170 }
2171
2172 if (LocVT == MVT::f32) {
2173 // Pass in STG registers: F1, F2, F3, F4
2174 // fs0,fs1,fs2,fs3
2175 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
2176 LoongArch::F26, LoongArch::F27};
2177 if (unsigned Reg = State.AllocateReg(FPR32List)) {
2178 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2179 return false;
2180 }
2181 }
2182
2183 if (LocVT == MVT::f64) {
2184 // Pass in STG registers: D1, D2, D3, D4
2185 // fs4,fs5,fs6,fs7
2186 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
2187 LoongArch::F30_64, LoongArch::F31_64};
2188 if (unsigned Reg = State.AllocateReg(FPR64List)) {
2189 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2190 return false;
2191 }
2192 }
2193
2194 report_fatal_error("No registers left in GHC calling convention");
2195 return true;
2196 }
2197
2198 // Transform physical registers into virtual registers.
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const2199 SDValue LoongArchTargetLowering::LowerFormalArguments(
2200 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2201 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2202 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2203
2204 MachineFunction &MF = DAG.getMachineFunction();
2205
2206 switch (CallConv) {
2207 default:
2208 llvm_unreachable("Unsupported calling convention");
2209 case CallingConv::C:
2210 case CallingConv::Fast:
2211 break;
2212 case CallingConv::GHC:
2213 if (!MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicF] ||
2214 !MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicD])
2215 report_fatal_error(
2216 "GHC calling convention requires the F and D extensions");
2217 }
2218
2219 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2220 MVT GRLenVT = Subtarget.getGRLenVT();
2221 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
2222 // Used with varargs to acumulate store chains.
2223 std::vector<SDValue> OutChains;
2224
2225 // Assign locations to all of the incoming arguments.
2226 SmallVector<CCValAssign> ArgLocs;
2227 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2228
2229 if (CallConv == CallingConv::GHC)
2230 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
2231 else
2232 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
2233
2234 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2235 CCValAssign &VA = ArgLocs[i];
2236 SDValue ArgValue;
2237 if (VA.isRegLoc())
2238 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
2239 else
2240 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
2241 if (VA.getLocInfo() == CCValAssign::Indirect) {
2242 // If the original argument was split and passed by reference, we need to
2243 // load all parts of it here (using the same address).
2244 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2245 MachinePointerInfo()));
2246 unsigned ArgIndex = Ins[i].OrigArgIndex;
2247 unsigned ArgPartOffset = Ins[i].PartOffset;
2248 assert(ArgPartOffset == 0);
2249 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
2250 CCValAssign &PartVA = ArgLocs[i + 1];
2251 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
2252 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
2253 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
2254 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2255 MachinePointerInfo()));
2256 ++i;
2257 }
2258 continue;
2259 }
2260 InVals.push_back(ArgValue);
2261 }
2262
2263 if (IsVarArg) {
2264 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
2265 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
2266 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
2267 MachineFrameInfo &MFI = MF.getFrameInfo();
2268 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2269 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
2270
2271 // Offset of the first variable argument from stack pointer, and size of
2272 // the vararg save area. For now, the varargs save area is either zero or
2273 // large enough to hold a0-a7.
2274 int VaArgOffset, VarArgsSaveSize;
2275
2276 // If all registers are allocated, then all varargs must be passed on the
2277 // stack and we don't need to save any argregs.
2278 if (ArgRegs.size() == Idx) {
2279 VaArgOffset = CCInfo.getNextStackOffset();
2280 VarArgsSaveSize = 0;
2281 } else {
2282 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
2283 VaArgOffset = -VarArgsSaveSize;
2284 }
2285
2286 // Record the frame index of the first variable argument
2287 // which is a value necessary to VASTART.
2288 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
2289 LoongArchFI->setVarArgsFrameIndex(FI);
2290
2291 // If saving an odd number of registers then create an extra stack slot to
2292 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
2293 // offsets to even-numbered registered remain 2*GRLen-aligned.
2294 if (Idx % 2) {
2295 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
2296 true);
2297 VarArgsSaveSize += GRLenInBytes;
2298 }
2299
2300 // Copy the integer registers that may have been used for passing varargs
2301 // to the vararg save area.
2302 for (unsigned I = Idx; I < ArgRegs.size();
2303 ++I, VaArgOffset += GRLenInBytes) {
2304 const Register Reg = RegInfo.createVirtualRegister(RC);
2305 RegInfo.addLiveIn(ArgRegs[I], Reg);
2306 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
2307 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
2308 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2309 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
2310 MachinePointerInfo::getFixedStack(MF, FI));
2311 cast<StoreSDNode>(Store.getNode())
2312 ->getMemOperand()
2313 ->setValue((Value *)nullptr);
2314 OutChains.push_back(Store);
2315 }
2316 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
2317 }
2318
2319 // All stores are grouped in one node to allow the matching between
2320 // the size of Ins and InVals. This only happens for vararg functions.
2321 if (!OutChains.empty()) {
2322 OutChains.push_back(Chain);
2323 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
2324 }
2325
2326 return Chain;
2327 }
2328
mayBeEmittedAsTailCall(const CallInst * CI) const2329 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2330 return CI->isTailCall();
2331 }
2332
2333 // Check whether the call is eligible for tail call optimization.
isEligibleForTailCallOptimization(CCState & CCInfo,CallLoweringInfo & CLI,MachineFunction & MF,const SmallVectorImpl<CCValAssign> & ArgLocs) const2334 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
2335 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
2336 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
2337
2338 auto CalleeCC = CLI.CallConv;
2339 auto &Outs = CLI.Outs;
2340 auto &Caller = MF.getFunction();
2341 auto CallerCC = Caller.getCallingConv();
2342
2343 // Do not tail call opt if the stack is used to pass parameters.
2344 if (CCInfo.getNextStackOffset() != 0)
2345 return false;
2346
2347 // Do not tail call opt if any parameters need to be passed indirectly.
2348 for (auto &VA : ArgLocs)
2349 if (VA.getLocInfo() == CCValAssign::Indirect)
2350 return false;
2351
2352 // Do not tail call opt if either caller or callee uses struct return
2353 // semantics.
2354 auto IsCallerStructRet = Caller.hasStructRetAttr();
2355 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
2356 if (IsCallerStructRet || IsCalleeStructRet)
2357 return false;
2358
2359 // Do not tail call opt if either the callee or caller has a byval argument.
2360 for (auto &Arg : Outs)
2361 if (Arg.Flags.isByVal())
2362 return false;
2363
2364 // The callee has to preserve all registers the caller needs to preserve.
2365 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
2366 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2367 if (CalleeCC != CallerCC) {
2368 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2369 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2370 return false;
2371 }
2372 return true;
2373 }
2374
getPrefTypeAlign(EVT VT,SelectionDAG & DAG)2375 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
2376 return DAG.getDataLayout().getPrefTypeAlign(
2377 VT.getTypeForEVT(*DAG.getContext()));
2378 }
2379
2380 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
2381 // and output parameter nodes.
2382 SDValue
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const2383 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
2384 SmallVectorImpl<SDValue> &InVals) const {
2385 SelectionDAG &DAG = CLI.DAG;
2386 SDLoc &DL = CLI.DL;
2387 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2388 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2389 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2390 SDValue Chain = CLI.Chain;
2391 SDValue Callee = CLI.Callee;
2392 CallingConv::ID CallConv = CLI.CallConv;
2393 bool IsVarArg = CLI.IsVarArg;
2394 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2395 MVT GRLenVT = Subtarget.getGRLenVT();
2396 bool &IsTailCall = CLI.IsTailCall;
2397
2398 MachineFunction &MF = DAG.getMachineFunction();
2399
2400 // Analyze the operands of the call, assigning locations to each operand.
2401 SmallVector<CCValAssign> ArgLocs;
2402 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2403
2404 if (CallConv == CallingConv::GHC)
2405 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
2406 else
2407 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
2408
2409 // Check if it's really possible to do a tail call.
2410 if (IsTailCall)
2411 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
2412
2413 if (IsTailCall)
2414 ++NumTailCalls;
2415 else if (CLI.CB && CLI.CB->isMustTailCall())
2416 report_fatal_error("failed to perform tail call elimination on a call "
2417 "site marked musttail");
2418
2419 // Get a count of how many bytes are to be pushed on the stack.
2420 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
2421
2422 // Create local copies for byval args.
2423 SmallVector<SDValue> ByValArgs;
2424 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2425 ISD::ArgFlagsTy Flags = Outs[i].Flags;
2426 if (!Flags.isByVal())
2427 continue;
2428
2429 SDValue Arg = OutVals[i];
2430 unsigned Size = Flags.getByValSize();
2431 Align Alignment = Flags.getNonZeroByValAlign();
2432
2433 int FI =
2434 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
2435 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2436 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
2437
2438 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
2439 /*IsVolatile=*/false,
2440 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
2441 MachinePointerInfo(), MachinePointerInfo());
2442 ByValArgs.push_back(FIPtr);
2443 }
2444
2445 if (!IsTailCall)
2446 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
2447
2448 // Copy argument values to their designated locations.
2449 SmallVector<std::pair<Register, SDValue>> RegsToPass;
2450 SmallVector<SDValue> MemOpChains;
2451 SDValue StackPtr;
2452 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
2453 CCValAssign &VA = ArgLocs[i];
2454 SDValue ArgValue = OutVals[i];
2455 ISD::ArgFlagsTy Flags = Outs[i].Flags;
2456
2457 // Promote the value if needed.
2458 // For now, only handle fully promoted and indirect arguments.
2459 if (VA.getLocInfo() == CCValAssign::Indirect) {
2460 // Store the argument in a stack slot and pass its address.
2461 Align StackAlign =
2462 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
2463 getPrefTypeAlign(ArgValue.getValueType(), DAG));
2464 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
2465 // If the original argument was split and passed by reference, we need to
2466 // store the required parts of it here (and pass just one address).
2467 unsigned ArgIndex = Outs[i].OrigArgIndex;
2468 unsigned ArgPartOffset = Outs[i].PartOffset;
2469 assert(ArgPartOffset == 0);
2470 // Calculate the total size to store. We don't have access to what we're
2471 // actually storing other than performing the loop and collecting the
2472 // info.
2473 SmallVector<std::pair<SDValue, SDValue>> Parts;
2474 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
2475 SDValue PartValue = OutVals[i + 1];
2476 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
2477 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
2478 EVT PartVT = PartValue.getValueType();
2479
2480 StoredSize += PartVT.getStoreSize();
2481 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
2482 Parts.push_back(std::make_pair(PartValue, Offset));
2483 ++i;
2484 }
2485 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
2486 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2487 MemOpChains.push_back(
2488 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2489 MachinePointerInfo::getFixedStack(MF, FI)));
2490 for (const auto &Part : Parts) {
2491 SDValue PartValue = Part.first;
2492 SDValue PartOffset = Part.second;
2493 SDValue Address =
2494 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
2495 MemOpChains.push_back(
2496 DAG.getStore(Chain, DL, PartValue, Address,
2497 MachinePointerInfo::getFixedStack(MF, FI)));
2498 }
2499 ArgValue = SpillSlot;
2500 } else {
2501 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
2502 }
2503
2504 // Use local copy if it is a byval arg.
2505 if (Flags.isByVal())
2506 ArgValue = ByValArgs[j++];
2507
2508 if (VA.isRegLoc()) {
2509 // Queue up the argument copies and emit them at the end.
2510 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2511 } else {
2512 assert(VA.isMemLoc() && "Argument not register or memory");
2513 assert(!IsTailCall && "Tail call not allowed if stack is used "
2514 "for passing parameters");
2515
2516 // Work out the address of the stack slot.
2517 if (!StackPtr.getNode())
2518 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
2519 SDValue Address =
2520 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2521 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
2522
2523 // Emit the store.
2524 MemOpChains.push_back(
2525 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2526 }
2527 }
2528
2529 // Join the stores, which are independent of one another.
2530 if (!MemOpChains.empty())
2531 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2532
2533 SDValue Glue;
2534
2535 // Build a sequence of copy-to-reg nodes, chained and glued together.
2536 for (auto &Reg : RegsToPass) {
2537 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
2538 Glue = Chain.getValue(1);
2539 }
2540
2541 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
2542 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
2543 // split it and then direct call can be matched by PseudoCALL.
2544 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
2545 const GlobalValue *GV = S->getGlobal();
2546 unsigned OpFlags =
2547 getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)
2548 ? LoongArchII::MO_CALL
2549 : LoongArchII::MO_CALL_PLT;
2550 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
2551 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2552 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
2553 *MF.getFunction().getParent(), nullptr)
2554 ? LoongArchII::MO_CALL
2555 : LoongArchII::MO_CALL_PLT;
2556 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
2557 }
2558
2559 // The first call operand is the chain and the second is the target address.
2560 SmallVector<SDValue> Ops;
2561 Ops.push_back(Chain);
2562 Ops.push_back(Callee);
2563
2564 // Add argument registers to the end of the list so that they are
2565 // known live into the call.
2566 for (auto &Reg : RegsToPass)
2567 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2568
2569 if (!IsTailCall) {
2570 // Add a register mask operand representing the call-preserved registers.
2571 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2572 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2573 assert(Mask && "Missing call preserved mask for calling convention");
2574 Ops.push_back(DAG.getRegisterMask(Mask));
2575 }
2576
2577 // Glue the call to the argument copies, if any.
2578 if (Glue.getNode())
2579 Ops.push_back(Glue);
2580
2581 // Emit the call.
2582 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2583
2584 if (IsTailCall) {
2585 MF.getFrameInfo().setHasTailCall();
2586 return DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops);
2587 }
2588
2589 Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
2590 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2591 Glue = Chain.getValue(1);
2592
2593 // Mark the end of the call, which is glued to the call itself.
2594 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2595 Glue = Chain.getValue(1);
2596
2597 // Assign locations to each value returned by this call.
2598 SmallVector<CCValAssign> RVLocs;
2599 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
2600 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
2601
2602 // Copy all of the result registers out of their specified physreg.
2603 for (auto &VA : RVLocs) {
2604 // Copy the value out.
2605 SDValue RetValue =
2606 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
2607 // Glue the RetValue to the end of the call sequence.
2608 Chain = RetValue.getValue(1);
2609 Glue = RetValue.getValue(2);
2610
2611 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
2612
2613 InVals.push_back(RetValue);
2614 }
2615
2616 return Chain;
2617 }
2618
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context) const2619 bool LoongArchTargetLowering::CanLowerReturn(
2620 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2621 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2622 SmallVector<CCValAssign> RVLocs;
2623 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
2624
2625 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2626 LoongArchABI::ABI ABI =
2627 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
2628 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
2629 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
2630 nullptr))
2631 return false;
2632 }
2633 return true;
2634 }
2635
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const2636 SDValue LoongArchTargetLowering::LowerReturn(
2637 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2638 const SmallVectorImpl<ISD::OutputArg> &Outs,
2639 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
2640 SelectionDAG &DAG) const {
2641 // Stores the assignment of the return value to a location.
2642 SmallVector<CCValAssign> RVLocs;
2643
2644 // Info about the registers and stack slot.
2645 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
2646 *DAG.getContext());
2647
2648 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
2649 nullptr, CC_LoongArch);
2650 if (CallConv == CallingConv::GHC && !RVLocs.empty())
2651 report_fatal_error("GHC functions return void only");
2652 SDValue Glue;
2653 SmallVector<SDValue, 4> RetOps(1, Chain);
2654
2655 // Copy the result values into the output registers.
2656 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
2657 CCValAssign &VA = RVLocs[i];
2658 assert(VA.isRegLoc() && "Can only return in registers!");
2659
2660 // Handle a 'normal' return.
2661 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
2662 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
2663
2664 // Guarantee that all emitted copies are stuck together.
2665 Glue = Chain.getValue(1);
2666 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2667 }
2668
2669 RetOps[0] = Chain; // Update chain.
2670
2671 // Add the glue node if we have it.
2672 if (Glue.getNode())
2673 RetOps.push_back(Glue);
2674
2675 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
2676 }
2677
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const2678 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2679 bool ForCodeSize) const {
2680 // TODO: Maybe need more checks here after vector extension is supported.
2681 if (VT == MVT::f32 && !Subtarget.hasBasicF())
2682 return false;
2683 if (VT == MVT::f64 && !Subtarget.hasBasicD())
2684 return false;
2685 return (Imm.isZero() || Imm.isExactlyValue(+1.0));
2686 }
2687
isCheapToSpeculateCttz(Type *) const2688 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
2689 return true;
2690 }
2691
isCheapToSpeculateCtlz(Type *) const2692 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
2693 return true;
2694 }
2695
shouldInsertFencesForAtomic(const Instruction * I) const2696 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
2697 const Instruction *I) const {
2698 if (!Subtarget.is64Bit())
2699 return isa<LoadInst>(I) || isa<StoreInst>(I);
2700
2701 if (isa<LoadInst>(I))
2702 return true;
2703
2704 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
2705 // require fences beacuse we can use amswap_db.[w/d].
2706 if (isa<StoreInst>(I)) {
2707 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
2708 return (Size == 8 || Size == 16);
2709 }
2710
2711 return false;
2712 }
2713
getSetCCResultType(const DataLayout & DL,LLVMContext & Context,EVT VT) const2714 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
2715 LLVMContext &Context,
2716 EVT VT) const {
2717 if (!VT.isVector())
2718 return getPointerTy(DL);
2719 return VT.changeVectorElementTypeToInteger();
2720 }
2721
hasAndNot(SDValue Y) const2722 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
2723 // TODO: Support vectors.
2724 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
2725 }
2726
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const2727 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
2728 const CallInst &I,
2729 MachineFunction &MF,
2730 unsigned Intrinsic) const {
2731 switch (Intrinsic) {
2732 default:
2733 return false;
2734 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
2735 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
2736 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
2737 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
2738 Info.opc = ISD::INTRINSIC_W_CHAIN;
2739 Info.memVT = MVT::i32;
2740 Info.ptrVal = I.getArgOperand(0);
2741 Info.offset = 0;
2742 Info.align = Align(4);
2743 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
2744 MachineMemOperand::MOVolatile;
2745 return true;
2746 // TODO: Add more Intrinsics later.
2747 }
2748 }
2749
2750 TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const2751 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2752 // TODO: Add more AtomicRMWInst that needs to be extended.
2753
2754 // Since floating-point operation requires a non-trivial set of data
2755 // operations, use CmpXChg to expand.
2756 if (AI->isFloatingPointOperation() ||
2757 AI->getOperation() == AtomicRMWInst::UIncWrap ||
2758 AI->getOperation() == AtomicRMWInst::UDecWrap)
2759 return AtomicExpansionKind::CmpXChg;
2760
2761 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
2762 if (Size == 8 || Size == 16)
2763 return AtomicExpansionKind::MaskedIntrinsic;
2764 return AtomicExpansionKind::None;
2765 }
2766
2767 static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,AtomicRMWInst::BinOp BinOp)2768 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
2769 AtomicRMWInst::BinOp BinOp) {
2770 if (GRLen == 64) {
2771 switch (BinOp) {
2772 default:
2773 llvm_unreachable("Unexpected AtomicRMW BinOp");
2774 case AtomicRMWInst::Xchg:
2775 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
2776 case AtomicRMWInst::Add:
2777 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
2778 case AtomicRMWInst::Sub:
2779 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
2780 case AtomicRMWInst::Nand:
2781 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
2782 case AtomicRMWInst::UMax:
2783 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
2784 case AtomicRMWInst::UMin:
2785 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
2786 case AtomicRMWInst::Max:
2787 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
2788 case AtomicRMWInst::Min:
2789 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
2790 // TODO: support other AtomicRMWInst.
2791 }
2792 }
2793
2794 if (GRLen == 32) {
2795 switch (BinOp) {
2796 default:
2797 llvm_unreachable("Unexpected AtomicRMW BinOp");
2798 case AtomicRMWInst::Xchg:
2799 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
2800 case AtomicRMWInst::Add:
2801 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
2802 case AtomicRMWInst::Sub:
2803 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
2804 case AtomicRMWInst::Nand:
2805 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
2806 // TODO: support other AtomicRMWInst.
2807 }
2808 }
2809
2810 llvm_unreachable("Unexpected GRLen\n");
2811 }
2812
2813 TargetLowering::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * CI) const2814 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
2815 AtomicCmpXchgInst *CI) const {
2816 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2817 if (Size == 8 || Size == 16)
2818 return AtomicExpansionKind::MaskedIntrinsic;
2819 return AtomicExpansionKind::None;
2820 }
2821
emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase & Builder,AtomicCmpXchgInst * CI,Value * AlignedAddr,Value * CmpVal,Value * NewVal,Value * Mask,AtomicOrdering Ord) const2822 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2823 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2824 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2825 Value *Ordering =
2826 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord));
2827
2828 // TODO: Support cmpxchg on LA32.
2829 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
2830 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2831 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2832 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2833 Type *Tys[] = {AlignedAddr->getType()};
2834 Function *MaskedCmpXchg =
2835 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2836 Value *Result = Builder.CreateCall(
2837 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2838 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2839 return Result;
2840 }
2841
emitMaskedAtomicRMWIntrinsic(IRBuilderBase & Builder,AtomicRMWInst * AI,Value * AlignedAddr,Value * Incr,Value * Mask,Value * ShiftAmt,AtomicOrdering Ord) const2842 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
2843 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
2844 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
2845 unsigned GRLen = Subtarget.getGRLen();
2846 Value *Ordering =
2847 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
2848 Type *Tys[] = {AlignedAddr->getType()};
2849 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
2850 AI->getModule(),
2851 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
2852
2853 if (GRLen == 64) {
2854 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
2855 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2856 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
2857 }
2858
2859 Value *Result;
2860
2861 // Must pass the shift amount needed to sign extend the loaded value prior
2862 // to performing a signed comparison for min/max. ShiftAmt is the number of
2863 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
2864 // is the number of bits to left+right shift the value in order to
2865 // sign-extend.
2866 if (AI->getOperation() == AtomicRMWInst::Min ||
2867 AI->getOperation() == AtomicRMWInst::Max) {
2868 const DataLayout &DL = AI->getModule()->getDataLayout();
2869 unsigned ValWidth =
2870 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
2871 Value *SextShamt =
2872 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
2873 Result = Builder.CreateCall(LlwOpScwLoop,
2874 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
2875 } else {
2876 Result =
2877 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
2878 }
2879
2880 if (GRLen == 64)
2881 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2882 return Result;
2883 }
2884
isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT VT) const2885 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
2886 const MachineFunction &MF, EVT VT) const {
2887 VT = VT.getScalarType();
2888
2889 if (!VT.isSimple())
2890 return false;
2891
2892 switch (VT.getSimpleVT().SimpleTy) {
2893 case MVT::f32:
2894 case MVT::f64:
2895 return true;
2896 default:
2897 break;
2898 }
2899
2900 return false;
2901 }
2902
getExceptionPointerRegister(const Constant * PersonalityFn) const2903 Register LoongArchTargetLowering::getExceptionPointerRegister(
2904 const Constant *PersonalityFn) const {
2905 return LoongArch::R4;
2906 }
2907
getExceptionSelectorRegister(const Constant * PersonalityFn) const2908 Register LoongArchTargetLowering::getExceptionSelectorRegister(
2909 const Constant *PersonalityFn) const {
2910 return LoongArch::R5;
2911 }
2912
2913 //===----------------------------------------------------------------------===//
2914 // LoongArch Inline Assembly Support
2915 //===----------------------------------------------------------------------===//
2916
2917 LoongArchTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const2918 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
2919 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
2920 //
2921 // 'f': A floating-point register (if available).
2922 // 'k': A memory operand whose address is formed by a base register and
2923 // (optionally scaled) index register.
2924 // 'l': A signed 16-bit constant.
2925 // 'm': A memory operand whose address is formed by a base register and
2926 // offset that is suitable for use in instructions with the same
2927 // addressing mode as st.w and ld.w.
2928 // 'I': A signed 12-bit constant (for arithmetic instructions).
2929 // 'J': Integer zero.
2930 // 'K': An unsigned 12-bit constant (for logic instructions).
2931 // "ZB": An address that is held in a general-purpose register. The offset is
2932 // zero.
2933 // "ZC": A memory operand whose address is formed by a base register and
2934 // offset that is suitable for use in instructions with the same
2935 // addressing mode as ll.w and sc.w.
2936 if (Constraint.size() == 1) {
2937 switch (Constraint[0]) {
2938 default:
2939 break;
2940 case 'f':
2941 return C_RegisterClass;
2942 case 'l':
2943 case 'I':
2944 case 'J':
2945 case 'K':
2946 return C_Immediate;
2947 case 'k':
2948 return C_Memory;
2949 }
2950 }
2951
2952 if (Constraint == "ZC" || Constraint == "ZB")
2953 return C_Memory;
2954
2955 // 'm' is handled here.
2956 return TargetLowering::getConstraintType(Constraint);
2957 }
2958
getInlineAsmMemConstraint(StringRef ConstraintCode) const2959 unsigned LoongArchTargetLowering::getInlineAsmMemConstraint(
2960 StringRef ConstraintCode) const {
2961 return StringSwitch<unsigned>(ConstraintCode)
2962 .Case("k", InlineAsm::Constraint_k)
2963 .Case("ZB", InlineAsm::Constraint_ZB)
2964 .Case("ZC", InlineAsm::Constraint_ZC)
2965 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
2966 }
2967
2968 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const2969 LoongArchTargetLowering::getRegForInlineAsmConstraint(
2970 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
2971 // First, see if this is a constraint that directly corresponds to a LoongArch
2972 // register class.
2973 if (Constraint.size() == 1) {
2974 switch (Constraint[0]) {
2975 case 'r':
2976 // TODO: Support fixed vectors up to GRLen?
2977 if (VT.isVector())
2978 break;
2979 return std::make_pair(0U, &LoongArch::GPRRegClass);
2980 case 'f':
2981 if (Subtarget.hasBasicF() && VT == MVT::f32)
2982 return std::make_pair(0U, &LoongArch::FPR32RegClass);
2983 if (Subtarget.hasBasicD() && VT == MVT::f64)
2984 return std::make_pair(0U, &LoongArch::FPR64RegClass);
2985 break;
2986 default:
2987 break;
2988 }
2989 }
2990
2991 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
2992 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
2993 // constraints while the official register name is prefixed with a '$'. So we
2994 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
2995 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
2996 // case insensitive, so no need to convert the constraint to upper case here.
2997 //
2998 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
2999 // decode the usage of register name aliases into their official names. And
3000 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
3001 // official register names.
3002 if (Constraint.startswith("{$r") || Constraint.startswith("{$f")) {
3003 bool IsFP = Constraint[2] == 'f';
3004 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
3005 std::pair<unsigned, const TargetRegisterClass *> R;
3006 R = TargetLowering::getRegForInlineAsmConstraint(
3007 TRI, join_items("", Temp.first, Temp.second), VT);
3008 // Match those names to the widest floating point register type available.
3009 if (IsFP) {
3010 unsigned RegNo = R.first;
3011 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
3012 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
3013 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
3014 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
3015 }
3016 }
3017 }
3018 return R;
3019 }
3020
3021 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3022 }
3023
LowerAsmOperandForConstraint(SDValue Op,std::string & Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const3024 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
3025 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
3026 SelectionDAG &DAG) const {
3027 // Currently only support length 1 constraints.
3028 if (Constraint.length() == 1) {
3029 switch (Constraint[0]) {
3030 case 'l':
3031 // Validate & create a 16-bit signed immediate operand.
3032 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3033 uint64_t CVal = C->getSExtValue();
3034 if (isInt<16>(CVal))
3035 Ops.push_back(
3036 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
3037 }
3038 return;
3039 case 'I':
3040 // Validate & create a 12-bit signed immediate operand.
3041 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3042 uint64_t CVal = C->getSExtValue();
3043 if (isInt<12>(CVal))
3044 Ops.push_back(
3045 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
3046 }
3047 return;
3048 case 'J':
3049 // Validate & create an integer zero operand.
3050 if (auto *C = dyn_cast<ConstantSDNode>(Op))
3051 if (C->getZExtValue() == 0)
3052 Ops.push_back(
3053 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
3054 return;
3055 case 'K':
3056 // Validate & create a 12-bit unsigned immediate operand.
3057 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3058 uint64_t CVal = C->getZExtValue();
3059 if (isUInt<12>(CVal))
3060 Ops.push_back(
3061 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
3062 }
3063 return;
3064 default:
3065 break;
3066 }
3067 }
3068 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3069 }
3070
3071 #define GET_REGISTER_MATCHER
3072 #include "LoongArchGenAsmMatcher.inc"
3073
3074 Register
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const3075 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
3076 const MachineFunction &MF) const {
3077 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
3078 std::string NewRegName = Name.second.str();
3079 Register Reg = MatchRegisterAltName(NewRegName);
3080 if (Reg == LoongArch::NoRegister)
3081 Reg = MatchRegisterName(NewRegName);
3082 if (Reg == LoongArch::NoRegister)
3083 report_fatal_error(
3084 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
3085 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
3086 if (!ReservedRegs.test(Reg))
3087 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
3088 StringRef(RegName) + "\"."));
3089 return Reg;
3090 }
3091
decomposeMulByConstant(LLVMContext & Context,EVT VT,SDValue C) const3092 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
3093 EVT VT, SDValue C) const {
3094 // TODO: Support vectors.
3095 if (!VT.isScalarInteger())
3096 return false;
3097
3098 // Omit the optimization if the data size exceeds GRLen.
3099 if (VT.getSizeInBits() > Subtarget.getGRLen())
3100 return false;
3101
3102 // Break MUL into (SLLI + ADD/SUB) or ALSL.
3103 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
3104 const APInt &Imm = ConstNode->getAPIntValue();
3105 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
3106 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
3107 return true;
3108 }
3109
3110 return false;
3111 }
3112