1 //===- subzero/src/IceTargetLoweringX8664.h - lowering for x86-64 -*- C++ -*-=// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Declares the TargetLoweringX8664 class, which implements the 12 /// TargetLowering interface for the X86 64-bit architecture. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8664_H 17 #define SUBZERO_SRC_ICETARGETLOWERINGX8664_H 18 19 #include "IceAssemblerX8664.h" 20 #include "IceCfg.h" 21 #include "IceDefs.h" 22 #include "IceGlobalContext.h" 23 #include "IceInst.h" 24 #include "IceInstX8664.h" 25 #include "IceSwitchLowering.h" 26 #include "IceTargetLoweringX86.h" 27 #include "IceTargetLoweringX86RegClass.h" 28 #include "IceUtils.h" 29 30 #include <array> 31 #include <type_traits> 32 #include <utility> 33 34 namespace Ice { 35 namespace X8664 { 36 37 using namespace ::Ice::X86; 38 39 constexpr Type WordType = IceType_i64; 40 41 class BoolFoldingEntry { 42 BoolFoldingEntry(const BoolFoldingEntry &) = delete; 43 44 public: 45 BoolFoldingEntry() = default; 46 explicit BoolFoldingEntry(Inst *I); 47 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; 48 /// Instr is the instruction producing the i1-type variable of interest. 49 Inst *Instr = nullptr; 50 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). 51 bool IsComplex = false; 52 /// IsLiveOut is initialized conservatively to true, and is set to false when 53 /// we encounter an instruction that ends Var's live range. We disable the 54 /// folding optimization when Var is live beyond this basic block. Note that 55 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will 56 /// always be true and the folding optimization will never be performed. 57 bool IsLiveOut = true; 58 // NumUses counts the number of times Var is used as a source operand in the 59 // basic block. If IsComplex is true and there is more than one use of Var, 60 // then the folding optimization is disabled for Var. 61 uint32_t NumUses = 0; 62 }; 63 64 class BoolFolding { 65 public: 66 enum BoolFoldingProducerKind { 67 PK_None, 68 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. 69 PK_Icmp32, 70 PK_Icmp64, 71 PK_Fcmp, 72 PK_Trunc, 73 PK_Arith // A flag-setting arithmetic instruction. 74 }; 75 76 /// Currently the actual enum values are not used (other than CK_None), but we 77 /// go ahead and produce them anyway for symmetry with the 78 /// BoolFoldingProducerKind. 79 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 80 81 private: 82 BoolFolding(const BoolFolding &) = delete; 83 BoolFolding &operator=(const BoolFolding &) = delete; 84 85 public: 86 BoolFolding() = default; 87 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); 88 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); 89 static bool hasComplexLowering(const Inst *Instr); 90 static bool isValidFolding(BoolFoldingProducerKind ProducerKind, 91 BoolFoldingConsumerKind ConsumerKind); 92 void init(CfgNode *Node); 93 const Inst *getProducerFor(const Operand *Opnd) const; 94 void dump(const Cfg *Func) const; 95 96 private: 97 /// Returns true if Producers contains a valid entry for the given VarNum. containsValid(SizeT VarNum)98 bool containsValid(SizeT VarNum) const { 99 auto Element = Producers.find(VarNum); 100 return Element != Producers.end() && Element->second.Instr != nullptr; 101 } setInvalid(SizeT VarNum)102 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } 103 void invalidateProducersOnStore(const Inst *Instr); 104 /// Producers maps Variable::Number to a BoolFoldingEntry. 105 CfgUnorderedMap<SizeT, BoolFoldingEntry> Producers; 106 }; 107 108 class TargetX8664 : public TargetX86 { 109 TargetX8664() = delete; 110 TargetX8664(const TargetX8664 &) = delete; 111 TargetX8664 &operator=(const TargetX8664 &) = delete; 112 113 friend class BoolFolding; 114 115 public: 116 using BrCond = CondX86::BrCond; 117 using CmppsCond = CondX86::CmppsCond; 118 119 using SegmentRegisters = X86OperandMem::SegmentRegisters; 120 121 using InstX86Br = Insts::Br; 122 using InstX86FakeRMW = Insts::FakeRMW; 123 using InstX86Label = Insts::Label; 124 125 ~TargetX8664() override = default; 126 127 static void staticInit(GlobalContext *Ctx); 128 static bool shouldBePooled(const Constant *C); 129 static ::Ice::Type getPointerType(); 130 131 void translateOm1() override; 132 void translateO2() override; 133 void doLoadOpt(); 134 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; 135 getNumRegisters()136 SizeT getNumRegisters() const override { return RegisterSet::Reg_NUM; } 137 createLoweredMove(Variable * Dest,Variable * SrcVar)138 Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) override { 139 if (isVectorType(Dest->getType())) { 140 return Insts::Movp::create(Func, Dest, SrcVar); 141 } 142 return Insts::Mov::create(Func, Dest, SrcVar); 143 (void)Dest; 144 (void)SrcVar; 145 return nullptr; 146 } 147 148 Variable *getPhysicalRegister(RegNumT RegNum, 149 Type Ty = IceType_void) override; 150 const char *getRegName(RegNumT RegNum, Type Ty) const override; getRegClassName(RegClass C)151 static const char *getRegClassName(RegClass C) { 152 auto ClassNum = static_cast<RegClassX86>(C); 153 assert(ClassNum < RCX86_NUM); 154 switch (ClassNum) { 155 default: 156 assert(C < RC_Target); 157 return regClassString(C); 158 case RCX86_Is64To8: 159 return "i64to8"; // 64-bit GPR truncable to i8 160 case RCX86_Is32To8: 161 return "i32to8"; // 32-bit GPR truncable to i8 162 case RCX86_Is16To8: 163 return "i16to8"; // 16-bit GPR truncable to i8 164 case RCX86_IsTrunc8Rcvr: 165 return "i8from"; // 8-bit GPR truncable from wider GPRs 166 case RCX86_IsAhRcvr: 167 return "i8fromah"; // 8-bit GPR that ah can be assigned to 168 } 169 } 170 SmallBitVector getRegisterSet(RegSetMask Include, 171 RegSetMask Exclude) const override; 172 const SmallBitVector & getRegistersForVariable(const Variable * Var)173 getRegistersForVariable(const Variable *Var) const override { 174 RegClass RC = Var->getRegClass(); 175 assert(static_cast<RegClassX86>(RC) < RCX86_NUM); 176 return TypeToRegisterSet[RC]; 177 } 178 179 const SmallBitVector & getAllRegistersForVariable(const Variable * Var)180 getAllRegistersForVariable(const Variable *Var) const override { 181 RegClass RC = Var->getRegClass(); 182 assert(static_cast<RegClassX86>(RC) < RCX86_NUM); 183 return TypeToRegisterSetUnfiltered[RC]; 184 } 185 getAliasesForRegister(RegNumT Reg)186 const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override { 187 Reg.assertIsValid(); 188 return RegisterAliases[Reg]; 189 } 190 hasFramePointer()191 bool hasFramePointer() const override { return IsEbpBasedFrame; } setHasFramePointer()192 void setHasFramePointer() override { IsEbpBasedFrame = true; } getStackReg()193 RegNumT getStackReg() const override { return RegX8664::Reg_rsp; } getFrameReg()194 RegNumT getFrameReg() const override { return RegX8664::Reg_rbp; } getFrameOrStackReg()195 RegNumT getFrameOrStackReg() const override { 196 // If the stack pointer needs to be aligned, then the frame pointer is 197 // unaligned, so always use the stack pointer. 198 if (needsStackPointerAlignment()) 199 return getStackReg(); 200 return IsEbpBasedFrame ? getFrameReg() : getStackReg(); 201 } typeWidthInBytesOnStack(Type Ty)202 size_t typeWidthInBytesOnStack(Type Ty) const override { 203 // Round up to the next multiple of WordType bytes. 204 const uint32_t WordSizeInBytes = typeWidthInBytes(WordType); 205 return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes); 206 } getStackAlignment()207 uint32_t getStackAlignment() const override { 208 return X86_STACK_ALIGNMENT_BYTES; 209 } needsStackPointerAlignment()210 bool needsStackPointerAlignment() const override { 211 // If the ABI's stack alignment is smaller than the vector size, 212 // use the (realigned) stack pointer for addressing any stack variables. 213 return X86_STACK_ALIGNMENT_BYTES < RequiredStackAlignment; 214 } reserveFixedAllocaArea(size_t Size,size_t Align)215 void reserveFixedAllocaArea(size_t Size, size_t Align) override { 216 FixedAllocaSizeBytes = Size; 217 assert(llvm::isPowerOf2_32(Align)); 218 FixedAllocaAlignBytes = Align; 219 PrologEmitsFixedAllocas = true; 220 } 221 /// Returns the (negative) offset from ebp/rbp where the fixed Allocas start. getFrameFixedAllocaOffset()222 int32_t getFrameFixedAllocaOffset() const override { 223 return FixedAllocaSizeBytes - (SpillAreaSizeBytes - maxOutArgsSizeBytes()); 224 } maxOutArgsSizeBytes()225 virtual uint32_t maxOutArgsSizeBytes() const override { 226 return MaxOutArgsSizeBytes; 227 } updateMaxOutArgsSizeBytes(uint32_t Size)228 virtual void updateMaxOutArgsSizeBytes(uint32_t Size) { 229 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, Size); 230 } 231 shouldSplitToVariable64On32(Type Ty)232 bool shouldSplitToVariable64On32(Type Ty) const override { return false; } 233 getMinJumpTableSize()234 SizeT getMinJumpTableSize() const override { return 4; } 235 236 void emitVariable(const Variable *Var) const override; 237 238 void emit(const ConstantInteger32 *C) const final; 239 void emit(const ConstantInteger64 *C) const final; 240 void emit(const ConstantFloat *C) const final; 241 void emit(const ConstantDouble *C) const final; 242 void emit(const ConstantUndef *C) const final; 243 void emit(const ConstantRelocatable *C) const final; 244 245 void initNodeForLowering(CfgNode *Node) override; 246 247 void addProlog(CfgNode *Node) override; 248 void finishArgumentLowering(Variable *Arg, Variable *FramePtr, 249 size_t BasicFrameOffset, size_t StackAdjBytes, 250 size_t &InArgsSizeBytes); 251 void addEpilog(CfgNode *Node) override; 252 253 Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); 254 255 protected: 256 void postLower() override; 257 258 void lowerAlloca(const InstAlloca *Instr) override; 259 void lowerArguments() override; 260 void lowerArithmetic(const InstArithmetic *Instr) override; 261 void lowerAssign(const InstAssign *Instr) override; 262 void lowerBr(const InstBr *Instr) override; 263 void lowerBreakpoint(const InstBreakpoint *Instr) override; 264 void lowerCall(const InstCall *Instr) override; 265 void lowerCast(const InstCast *Instr) override; 266 void lowerExtractElement(const InstExtractElement *Instr) override; 267 void lowerFcmp(const InstFcmp *Instr) override; 268 void lowerIcmp(const InstIcmp *Instr) override; 269 270 void lowerIntrinsic(const InstIntrinsic *Instr) override; 271 void lowerInsertElement(const InstInsertElement *Instr) override; 272 void lowerLoad(const InstLoad *Instr) override; 273 void lowerPhi(const InstPhi *Instr) override; 274 void lowerRet(const InstRet *Instr) override; 275 void lowerSelect(const InstSelect *Instr) override; 276 void lowerShuffleVector(const InstShuffleVector *Instr) override; 277 void lowerStore(const InstStore *Instr) override; 278 void lowerSwitch(const InstSwitch *Instr) override; 279 void lowerUnreachable(const InstUnreachable *Instr) override; 280 void lowerOther(const Inst *Instr) override; 281 void lowerRMW(const InstX86FakeRMW *RMW); 282 void prelowerPhis() override; 283 uint32_t getCallStackArgumentsSizeBytes(const CfgVector<Type> &ArgTypes, 284 Type ReturnType); 285 uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override; 286 void genTargetHelperCallFor(Inst *Instr) override; 287 288 /// OptAddr wraps all the possible operands that an x86 address might have. 289 struct OptAddr { 290 Variable *Base = nullptr; 291 Variable *Index = nullptr; 292 uint16_t Shift = 0; 293 int32_t Offset = 0; 294 ConstantRelocatable *Relocatable = nullptr; 295 }; 296 297 // Builds information for a canonical address expresion: 298 // <Relocatable + Offset>(Base, Index, Shift) 299 X86OperandMem *computeAddressOpt(const Inst *Instr, Type MemType, 300 Operand *Addr); 301 void doAddressOptOther() override; 302 void doAddressOptLoad() override; 303 void doAddressOptStore() override; 304 void doAddressOptLoadSubVector() override; 305 void doAddressOptStoreSubVector() override; 306 void doMockBoundsCheck(Operand *Opnd) override; 307 308 /// Naive lowering of cmpxchg. 309 void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected, 310 Operand *Desired); 311 /// Attempt a more optimized lowering of cmpxchg. Returns true if optimized. 312 bool tryOptimizedCmpxchgCmpBr(Variable *DestPrev, Operand *Ptr, 313 Operand *Expected, Operand *Desired); 314 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, 315 Operand *Val); 316 void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal, 317 Operand *SecondVal); 318 /// Load from memory for a given type. 319 void typedLoad(Type Ty, Variable *Dest, Variable *Base, Constant *Offset); 320 /// Store to memory for a given type. 321 void typedStore(Type Ty, Variable *Value, Variable *Base, Constant *Offset); 322 /// Copy memory of given type from Src to Dest using OffsetAmt on both. 323 void copyMemory(Type Ty, Variable *Dest, Variable *Src, int32_t OffsetAmt); 324 /// Replace some calls to memcpy with inline instructions. 325 void lowerMemcpy(Operand *Dest, Operand *Src, Operand *Count); 326 /// Replace some calls to memmove with inline instructions. 327 void lowerMemmove(Operand *Dest, Operand *Src, Operand *Count); 328 /// Replace some calls to memset with inline instructions. 329 void lowerMemset(Operand *Dest, Operand *Val, Operand *Count); 330 331 /// Lower an indirect jump . 332 void lowerIndirectJump(Variable *JumpTarget); 333 334 /// Check the comparison is in [Min,Max]. The flags register will be modified 335 /// with: 336 /// - below equal, if in range 337 /// - above, set if not in range 338 /// The index into the range is returned. 339 Operand *lowerCmpRange(Operand *Comparison, uint64_t Min, uint64_t Max); 340 /// Lowering of a cluster of switch cases. If the case is not matched control 341 /// will pass to the default label provided. If the default label is nullptr 342 /// then control will fall through to the next instruction. DoneCmp should be 343 /// true if the flags contain the result of a comparison with the Comparison. 344 void lowerCaseCluster(const CaseCluster &Case, Operand *Src0, bool DoneCmp, 345 CfgNode *DefaultLabel = nullptr); 346 347 using LowerBinOp = void (TargetX8664::*)(Variable *, Operand *); 348 void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi, 349 Variable *Dest, Operand *Ptr, Operand *Val); 350 351 void eliminateNextVectorSextInstruction(Variable *SignExtendedResult); 352 353 void emitStackProbe(size_t StackSizeBytes); 354 355 /// Emit just the call instruction (without argument or return variable 356 /// processing). 357 Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg, 358 size_t NumVariadicFpArgs = 0); 359 /// Materialize the moves needed to return a value of the specified type. 360 Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType); 361 362 /// Emit a jump table to the constant pool. 363 void emitJumpTable(const Cfg *Func, 364 const InstJumpTable *JumpTable) const override; 365 366 /// Emit a fake use of esp to make sure esp stays alive for the entire 367 /// function. Otherwise some esp adjustments get dead-code eliminated. keepEspLiveAtExit()368 void keepEspLiveAtExit() { 369 Variable *esp = 370 Func->getTarget()->getPhysicalRegister(getStackReg(), WordType); 371 Context.insert<InstFakeUse>(esp); 372 } 373 374 /// Operand legalization helpers. To deal with address mode constraints, the 375 /// helpers will create a new Operand and emit instructions that guarantee 376 /// that the Operand kind is one of those indicated by the LegalMask (a 377 /// bitmask of allowed kinds). If the input Operand is known to already meet 378 /// the constraints, it may be simply returned as the result, without creating 379 /// any new instructions or operands. 380 enum OperandLegalization { 381 Legal_None = 0, 382 Legal_Reg = 1 << 0, // physical register, not stack location 383 Legal_Imm = 1 << 1, 384 Legal_Mem = 1 << 2, // includes [eax+4*ecx] as well as [esp+12] 385 Legal_Rematerializable = 1 << 3, 386 Legal_AddrAbs = 1 << 4, // ConstantRelocatable doesn't have to add RebasePtr 387 Legal_Default = ~(Legal_Rematerializable | Legal_AddrAbs) 388 // TODO(stichnot): Figure out whether this default works for x86-64. 389 }; 390 using LegalMask = uint32_t; 391 Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default, 392 RegNumT RegNum = RegNumT()); 393 Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT()); 394 /// Legalize the first source operand for use in the cmp instruction. 395 Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1); 396 /// Turn a pointer operand into a memory operand that can be used by a real 397 /// load/store operation. Legalizes the operand as well. This is a nop if the 398 /// operand is already a legal memory operand. 399 X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty, 400 bool DoLegalize = true); 401 402 Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT()); 403 static Type stackSlotType(); 404 405 static constexpr uint32_t NoSizeLimit = 0; 406 /// Returns the largest type which is equal to or larger than Size bytes. The 407 /// type is suitable for copying memory i.e. a load and store will be a single 408 /// instruction (for example x86 will get f64 not i64). 409 static Type largestTypeInSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit); 410 /// Returns the smallest type which is equal to or larger than Size bytes. If 411 /// one doesn't exist then the largest type smaller than Size bytes is 412 /// returned. The type is suitable for memory copies as described at 413 /// largestTypeInSize. 414 static Type firstTypeThatFitsSize(uint32_t Size, 415 uint32_t MaxSize = NoSizeLimit); 416 417 Variable *copyToReg8(Operand *Src, RegNumT RegNum = RegNumT()); 418 Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); 419 420 /// Returns a register containing all zeros, without affecting the FLAGS 421 /// register, using the best instruction for the type. 422 Variable *makeZeroedRegister(Type Ty, RegNumT RegNum = RegNumT()); 423 424 /// \name Returns a vector in a register with the given constant entries. 425 /// @{ 426 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); 427 Variable *makeVectorOfOnes(Type Ty, RegNumT RegNum = RegNumT()); 428 Variable *makeVectorOfMinusOnes(Type Ty, RegNumT RegNum = RegNumT()); 429 Variable *makeVectorOfHighOrderBits(Type Ty, RegNumT RegNum = RegNumT()); 430 Variable *makeVectorOfFabsMask(Type Ty, RegNumT RegNum = RegNumT()); 431 /// @} 432 433 /// Return a memory operand corresponding to a stack allocated Variable. 434 X86OperandMem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 435 uint32_t Offset = 0); 436 437 /// The following are helpers that insert lowered x86 instructions with 438 /// minimal syntactic overhead, so that the lowering code can look as close to 439 /// assembly as practical. _adc(Variable * Dest,Operand * Src0)440 void _adc(Variable *Dest, Operand *Src0) { 441 Context.insert<Insts::Adc>(Dest, Src0); 442 } _adc_rmw(X86OperandMem * DestSrc0,Operand * Src1)443 void _adc_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 444 Context.insert<Insts::AdcRMW>(DestSrc0, Src1); 445 } _add(Variable * Dest,Operand * Src0)446 void _add(Variable *Dest, Operand *Src0) { 447 Context.insert<Insts::Add>(Dest, Src0); 448 } _add_rmw(X86OperandMem * DestSrc0,Operand * Src1)449 void _add_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 450 Context.insert<Insts::AddRMW>(DestSrc0, Src1); 451 } _addps(Variable * Dest,Operand * Src0)452 void _addps(Variable *Dest, Operand *Src0) { 453 Context.insert<Insts::Addps>(Dest, Src0); 454 } _addss(Variable * Dest,Operand * Src0)455 void _addss(Variable *Dest, Operand *Src0) { 456 Context.insert<Insts::Addss>(Dest, Src0); 457 } 458 void _add_sp(Operand *Adjustment); _and(Variable * Dest,Operand * Src0)459 void _and(Variable *Dest, Operand *Src0) { 460 Context.insert<Insts::And>(Dest, Src0); 461 } _andnps(Variable * Dest,Operand * Src0)462 void _andnps(Variable *Dest, Operand *Src0) { 463 Context.insert<Insts::Andnps>(Dest, Src0); 464 } _andps(Variable * Dest,Operand * Src0)465 void _andps(Variable *Dest, Operand *Src0) { 466 Context.insert<Insts::Andps>(Dest, Src0); 467 } _and_rmw(X86OperandMem * DestSrc0,Operand * Src1)468 void _and_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 469 Context.insert<Insts::AndRMW>(DestSrc0, Src1); 470 } _blendvps(Variable * Dest,Operand * Src0,Operand * Src1)471 void _blendvps(Variable *Dest, Operand *Src0, Operand *Src1) { 472 Context.insert<Insts::Blendvps>(Dest, Src0, Src1); 473 } _br(BrCond Condition,CfgNode * TargetTrue,CfgNode * TargetFalse)474 void _br(BrCond Condition, CfgNode *TargetTrue, CfgNode *TargetFalse) { 475 Context.insert<InstX86Br>(TargetTrue, TargetFalse, Condition, 476 InstX86Br::Far); 477 } _br(CfgNode * Target)478 void _br(CfgNode *Target) { 479 Context.insert<InstX86Br>(Target, InstX86Br::Far); 480 } _br(BrCond Condition,CfgNode * Target)481 void _br(BrCond Condition, CfgNode *Target) { 482 Context.insert<InstX86Br>(Target, Condition, InstX86Br::Far); 483 } 484 void _br(BrCond Condition, InstX86Label *Label, 485 InstX86Br::Mode Kind = InstX86Br::Near) { 486 Context.insert<InstX86Br>(Label, Condition, Kind); 487 } _bsf(Variable * Dest,Operand * Src0)488 void _bsf(Variable *Dest, Operand *Src0) { 489 Context.insert<Insts::Bsf>(Dest, Src0); 490 } _bsr(Variable * Dest,Operand * Src0)491 void _bsr(Variable *Dest, Operand *Src0) { 492 Context.insert<Insts::Bsr>(Dest, Src0); 493 } _bswap(Variable * SrcDest)494 void _bswap(Variable *SrcDest) { Context.insert<Insts::Bswap>(SrcDest); } _cbwdq(Variable * Dest,Operand * Src0)495 void _cbwdq(Variable *Dest, Operand *Src0) { 496 Context.insert<Insts::Cbwdq>(Dest, Src0); 497 } _cmov(Variable * Dest,Operand * Src0,BrCond Condition)498 void _cmov(Variable *Dest, Operand *Src0, BrCond Condition) { 499 Context.insert<Insts::Cmov>(Dest, Src0, Condition); 500 } _cmp(Operand * Src0,Operand * Src1)501 void _cmp(Operand *Src0, Operand *Src1) { 502 Context.insert<Insts::Icmp>(Src0, Src1); 503 } _cmpps(Variable * Dest,Operand * Src0,CmppsCond Condition)504 void _cmpps(Variable *Dest, Operand *Src0, CmppsCond Condition) { 505 Context.insert<Insts::Cmpps>(Dest, Src0, Condition); 506 } _cmpxchg(Operand * DestOrAddr,Variable * Eax,Variable * Desired,bool Locked)507 void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired, 508 bool Locked) { 509 Context.insert<Insts::Cmpxchg>(DestOrAddr, Eax, Desired, Locked); 510 // Mark eax as possibly modified by cmpxchg. 511 Context.insert<InstFakeDef>(Eax, llvm::dyn_cast<Variable>(DestOrAddr)); 512 _set_dest_redefined(); 513 Context.insert<InstFakeUse>(Eax); 514 } _cmpxchg8b(X86OperandMem * Addr,Variable * Edx,Variable * Eax,Variable * Ecx,Variable * Ebx,bool Locked)515 void _cmpxchg8b(X86OperandMem *Addr, Variable *Edx, Variable *Eax, 516 Variable *Ecx, Variable *Ebx, bool Locked) { 517 Context.insert<Insts::Cmpxchg8b>(Addr, Edx, Eax, Ecx, Ebx, Locked); 518 // Mark edx, and eax as possibly modified by cmpxchg8b. 519 Context.insert<InstFakeDef>(Edx); 520 _set_dest_redefined(); 521 Context.insert<InstFakeUse>(Edx); 522 Context.insert<InstFakeDef>(Eax); 523 _set_dest_redefined(); 524 Context.insert<InstFakeUse>(Eax); 525 } _cvt(Variable * Dest,Operand * Src0,Insts::Cvt::CvtVariant Variant)526 void _cvt(Variable *Dest, Operand *Src0, Insts::Cvt::CvtVariant Variant) { 527 Context.insert<Insts::Cvt>(Dest, Src0, Variant); 528 } _round(Variable * Dest,Operand * Src0,Operand * Imm)529 void _round(Variable *Dest, Operand *Src0, Operand *Imm) { 530 Context.insert<Insts::Round>(Dest, Src0, Imm); 531 } _div(Variable * Dest,Operand * Src0,Operand * Src1)532 void _div(Variable *Dest, Operand *Src0, Operand *Src1) { 533 Context.insert<Insts::Div>(Dest, Src0, Src1); 534 } _divps(Variable * Dest,Operand * Src0)535 void _divps(Variable *Dest, Operand *Src0) { 536 Context.insert<Insts::Divps>(Dest, Src0); 537 } _divss(Variable * Dest,Operand * Src0)538 void _divss(Variable *Dest, Operand *Src0) { 539 Context.insert<Insts::Divss>(Dest, Src0); 540 } _idiv(Variable * Dest,Operand * Src0,Operand * Src1)541 void _idiv(Variable *Dest, Operand *Src0, Operand *Src1) { 542 Context.insert<Insts::Idiv>(Dest, Src0, Src1); 543 } _imul(Variable * Dest,Operand * Src0)544 void _imul(Variable *Dest, Operand *Src0) { 545 Context.insert<Insts::Imul>(Dest, Src0); 546 } _imul_imm(Variable * Dest,Operand * Src0,Constant * Imm)547 void _imul_imm(Variable *Dest, Operand *Src0, Constant *Imm) { 548 Context.insert<Insts::ImulImm>(Dest, Src0, Imm); 549 } _insertps(Variable * Dest,Operand * Src0,Operand * Src1)550 void _insertps(Variable *Dest, Operand *Src0, Operand *Src1) { 551 Context.insert<Insts::Insertps>(Dest, Src0, Src1); 552 } _int3()553 void _int3() { Context.insert<Insts::Int3>(); } _jmp(Operand * Target)554 void _jmp(Operand *Target) { Context.insert<Insts::Jmp>(Target); } _lea(Variable * Dest,Operand * Src0)555 void _lea(Variable *Dest, Operand *Src0) { 556 Context.insert<Insts::Lea>(Dest, Src0); 557 } 558 void _link_bp(); 559 void _push_reg(RegNumT RegNum); 560 void _pop_reg(RegNumT RegNum); _mfence()561 void _mfence() { Context.insert<Insts::Mfence>(); } 562 /// Moves can be used to redefine registers, creating "partial kills" for 563 /// liveness. Mark where moves are used in this way. 564 void _redefined(Inst *MovInst, bool IsRedefinition = true) { 565 if (IsRedefinition) 566 MovInst->setDestRedefined(); 567 } 568 /// If Dest=nullptr is passed in, then a new variable is created, marked as 569 /// infinite register allocation weight, and returned through the in/out Dest 570 /// argument. 571 Insts::Mov *_mov(Variable *&Dest, Operand *Src0, RegNumT RegNum = RegNumT()) { 572 if (Dest == nullptr) 573 Dest = makeReg(Src0->getType(), RegNum); 574 return Context.insert<Insts::Mov>(Dest, Src0); 575 } 576 void _mov_sp(Operand *NewValue); _movp(Variable * Dest,Operand * Src0)577 Insts::Movp *_movp(Variable *Dest, Operand *Src0) { 578 return Context.insert<Insts::Movp>(Dest, Src0); 579 } _movd(Variable * Dest,Operand * Src0)580 void _movd(Variable *Dest, Operand *Src0) { 581 Context.insert<Insts::Movd>(Dest, Src0); 582 } _movq(Variable * Dest,Operand * Src0)583 void _movq(Variable *Dest, Operand *Src0) { 584 Context.insert<Insts::Movq>(Dest, Src0); 585 } _movss(Variable * Dest,Variable * Src0)586 void _movss(Variable *Dest, Variable *Src0) { 587 Context.insert<Insts::MovssRegs>(Dest, Src0); 588 } _movsx(Variable * Dest,Operand * Src0)589 void _movsx(Variable *Dest, Operand *Src0) { 590 Context.insert<Insts::Movsx>(Dest, Src0); 591 } _movzx(Variable * Dest,Operand * Src0)592 Insts::Movzx *_movzx(Variable *Dest, Operand *Src0) { 593 return Context.insert<Insts::Movzx>(Dest, Src0); 594 } _maxss(Variable * Dest,Operand * Src0)595 void _maxss(Variable *Dest, Operand *Src0) { 596 Context.insert<Insts::Maxss>(Dest, Src0); 597 } _minss(Variable * Dest,Operand * Src0)598 void _minss(Variable *Dest, Operand *Src0) { 599 Context.insert<Insts::Minss>(Dest, Src0); 600 } _maxps(Variable * Dest,Operand * Src0)601 void _maxps(Variable *Dest, Operand *Src0) { 602 Context.insert<Insts::Maxps>(Dest, Src0); 603 } _minps(Variable * Dest,Operand * Src0)604 void _minps(Variable *Dest, Operand *Src0) { 605 Context.insert<Insts::Minps>(Dest, Src0); 606 } _mul(Variable * Dest,Variable * Src0,Operand * Src1)607 void _mul(Variable *Dest, Variable *Src0, Operand *Src1) { 608 Context.insert<Insts::Mul>(Dest, Src0, Src1); 609 } _mulps(Variable * Dest,Operand * Src0)610 void _mulps(Variable *Dest, Operand *Src0) { 611 Context.insert<Insts::Mulps>(Dest, Src0); 612 } _mulss(Variable * Dest,Operand * Src0)613 void _mulss(Variable *Dest, Operand *Src0) { 614 Context.insert<Insts::Mulss>(Dest, Src0); 615 } _neg(Variable * SrcDest)616 void _neg(Variable *SrcDest) { Context.insert<Insts::Neg>(SrcDest); } _nop(SizeT Variant)617 void _nop(SizeT Variant) { Context.insert<Insts::Nop>(Variant); } _or(Variable * Dest,Operand * Src0)618 void _or(Variable *Dest, Operand *Src0) { 619 Context.insert<Insts::Or>(Dest, Src0); 620 } _orps(Variable * Dest,Operand * Src0)621 void _orps(Variable *Dest, Operand *Src0) { 622 Context.insert<Insts::Orps>(Dest, Src0); 623 } _or_rmw(X86OperandMem * DestSrc0,Operand * Src1)624 void _or_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 625 Context.insert<Insts::OrRMW>(DestSrc0, Src1); 626 } _padd(Variable * Dest,Operand * Src0)627 void _padd(Variable *Dest, Operand *Src0) { 628 Context.insert<Insts::Padd>(Dest, Src0); 629 } _padds(Variable * Dest,Operand * Src0)630 void _padds(Variable *Dest, Operand *Src0) { 631 Context.insert<Insts::Padds>(Dest, Src0); 632 } _paddus(Variable * Dest,Operand * Src0)633 void _paddus(Variable *Dest, Operand *Src0) { 634 Context.insert<Insts::Paddus>(Dest, Src0); 635 } _pand(Variable * Dest,Operand * Src0)636 void _pand(Variable *Dest, Operand *Src0) { 637 Context.insert<Insts::Pand>(Dest, Src0); 638 } _pandn(Variable * Dest,Operand * Src0)639 void _pandn(Variable *Dest, Operand *Src0) { 640 Context.insert<Insts::Pandn>(Dest, Src0); 641 } _pblendvb(Variable * Dest,Operand * Src0,Operand * Src1)642 void _pblendvb(Variable *Dest, Operand *Src0, Operand *Src1) { 643 Context.insert<Insts::Pblendvb>(Dest, Src0, Src1); 644 } 645 void _pcmpeq(Variable *Dest, Operand *Src0, 646 Type ArithmeticTypeOverride = IceType_void) { 647 Context.insert<Insts::Pcmpeq>(Dest, Src0, ArithmeticTypeOverride); 648 } _pcmpgt(Variable * Dest,Operand * Src0)649 void _pcmpgt(Variable *Dest, Operand *Src0) { 650 Context.insert<Insts::Pcmpgt>(Dest, Src0); 651 } _pextr(Variable * Dest,Operand * Src0,Operand * Src1)652 void _pextr(Variable *Dest, Operand *Src0, Operand *Src1) { 653 Context.insert<Insts::Pextr>(Dest, Src0, Src1); 654 } _pinsr(Variable * Dest,Operand * Src0,Operand * Src1)655 void _pinsr(Variable *Dest, Operand *Src0, Operand *Src1) { 656 Context.insert<Insts::Pinsr>(Dest, Src0, Src1); 657 } _pmull(Variable * Dest,Operand * Src0)658 void _pmull(Variable *Dest, Operand *Src0) { 659 Context.insert<Insts::Pmull>(Dest, Src0); 660 } _pmulhw(Variable * Dest,Operand * Src0)661 void _pmulhw(Variable *Dest, Operand *Src0) { 662 Context.insert<Insts::Pmulhw>(Dest, Src0); 663 } _pmulhuw(Variable * Dest,Operand * Src0)664 void _pmulhuw(Variable *Dest, Operand *Src0) { 665 Context.insert<Insts::Pmulhuw>(Dest, Src0); 666 } _pmaddwd(Variable * Dest,Operand * Src0)667 void _pmaddwd(Variable *Dest, Operand *Src0) { 668 Context.insert<Insts::Pmaddwd>(Dest, Src0); 669 } _pmuludq(Variable * Dest,Operand * Src0)670 void _pmuludq(Variable *Dest, Operand *Src0) { 671 Context.insert<Insts::Pmuludq>(Dest, Src0); 672 } _pop(Variable * Dest)673 void _pop(Variable *Dest) { Context.insert<Insts::Pop>(Dest); } _por(Variable * Dest,Operand * Src0)674 void _por(Variable *Dest, Operand *Src0) { 675 Context.insert<Insts::Por>(Dest, Src0); 676 } _punpckl(Variable * Dest,Operand * Src0)677 void _punpckl(Variable *Dest, Operand *Src0) { 678 Context.insert<Insts::Punpckl>(Dest, Src0); 679 } _punpckh(Variable * Dest,Operand * Src0)680 void _punpckh(Variable *Dest, Operand *Src0) { 681 Context.insert<Insts::Punpckh>(Dest, Src0); 682 } _packss(Variable * Dest,Operand * Src0)683 void _packss(Variable *Dest, Operand *Src0) { 684 Context.insert<Insts::Packss>(Dest, Src0); 685 } _packus(Variable * Dest,Operand * Src0)686 void _packus(Variable *Dest, Operand *Src0) { 687 Context.insert<Insts::Packus>(Dest, Src0); 688 } _pshufb(Variable * Dest,Operand * Src0)689 void _pshufb(Variable *Dest, Operand *Src0) { 690 Context.insert<Insts::Pshufb>(Dest, Src0); 691 } _pshufd(Variable * Dest,Operand * Src0,Operand * Src1)692 void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) { 693 Context.insert<Insts::Pshufd>(Dest, Src0, Src1); 694 } _psll(Variable * Dest,Operand * Src0)695 void _psll(Variable *Dest, Operand *Src0) { 696 Context.insert<Insts::Psll>(Dest, Src0); 697 } _psra(Variable * Dest,Operand * Src0)698 void _psra(Variable *Dest, Operand *Src0) { 699 Context.insert<Insts::Psra>(Dest, Src0); 700 } _psrl(Variable * Dest,Operand * Src0)701 void _psrl(Variable *Dest, Operand *Src0) { 702 Context.insert<Insts::Psrl>(Dest, Src0); 703 } _psub(Variable * Dest,Operand * Src0)704 void _psub(Variable *Dest, Operand *Src0) { 705 Context.insert<Insts::Psub>(Dest, Src0); 706 } _psubs(Variable * Dest,Operand * Src0)707 void _psubs(Variable *Dest, Operand *Src0) { 708 Context.insert<Insts::Psubs>(Dest, Src0); 709 } _psubus(Variable * Dest,Operand * Src0)710 void _psubus(Variable *Dest, Operand *Src0) { 711 Context.insert<Insts::Psubus>(Dest, Src0); 712 } _push(Operand * Src0)713 void _push(Operand *Src0) { Context.insert<Insts::Push>(Src0); } _pxor(Variable * Dest,Operand * Src0)714 void _pxor(Variable *Dest, Operand *Src0) { 715 Context.insert<Insts::Pxor>(Dest, Src0); 716 } 717 void _ret(Variable *Src0 = nullptr) { Context.insert<Insts::Ret>(Src0); } _rol(Variable * Dest,Operand * Src0)718 void _rol(Variable *Dest, Operand *Src0) { 719 Context.insert<Insts::Rol>(Dest, Src0); 720 } _round(Variable * Dest,Operand * Src,Constant * Imm)721 void _round(Variable *Dest, Operand *Src, Constant *Imm) { 722 Context.insert<Insts::Round>(Dest, Src, Imm); 723 } _sar(Variable * Dest,Operand * Src0)724 void _sar(Variable *Dest, Operand *Src0) { 725 Context.insert<Insts::Sar>(Dest, Src0); 726 } _sbb(Variable * Dest,Operand * Src0)727 void _sbb(Variable *Dest, Operand *Src0) { 728 Context.insert<Insts::Sbb>(Dest, Src0); 729 } _sbb_rmw(X86OperandMem * DestSrc0,Operand * Src1)730 void _sbb_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 731 Context.insert<Insts::SbbRMW>(DestSrc0, Src1); 732 } _setcc(Variable * Dest,BrCond Condition)733 void _setcc(Variable *Dest, BrCond Condition) { 734 Context.insert<Insts::Setcc>(Dest, Condition); 735 } _shl(Variable * Dest,Operand * Src0)736 void _shl(Variable *Dest, Operand *Src0) { 737 Context.insert<Insts::Shl>(Dest, Src0); 738 } _shld(Variable * Dest,Variable * Src0,Operand * Src1)739 void _shld(Variable *Dest, Variable *Src0, Operand *Src1) { 740 Context.insert<Insts::Shld>(Dest, Src0, Src1); 741 } _shr(Variable * Dest,Operand * Src0)742 void _shr(Variable *Dest, Operand *Src0) { 743 Context.insert<Insts::Shr>(Dest, Src0); 744 } _shrd(Variable * Dest,Variable * Src0,Operand * Src1)745 void _shrd(Variable *Dest, Variable *Src0, Operand *Src1) { 746 Context.insert<Insts::Shrd>(Dest, Src0, Src1); 747 } _shufps(Variable * Dest,Operand * Src0,Operand * Src1)748 void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) { 749 Context.insert<Insts::Shufps>(Dest, Src0, Src1); 750 } _movmsk(Variable * Dest,Operand * Src0)751 void _movmsk(Variable *Dest, Operand *Src0) { 752 Context.insert<Insts::Movmsk>(Dest, Src0); 753 } _sqrt(Variable * Dest,Operand * Src0)754 void _sqrt(Variable *Dest, Operand *Src0) { 755 Context.insert<Insts::Sqrt>(Dest, Src0); 756 } _store(Operand * Value,X86Operand * Mem)757 void _store(Operand *Value, X86Operand *Mem) { 758 Context.insert<Insts::Store>(Value, Mem); 759 } _storep(Variable * Value,X86OperandMem * Mem)760 void _storep(Variable *Value, X86OperandMem *Mem) { 761 Context.insert<Insts::StoreP>(Value, Mem); 762 } _storeq(Operand * Value,X86OperandMem * Mem)763 void _storeq(Operand *Value, X86OperandMem *Mem) { 764 Context.insert<Insts::StoreQ>(Value, Mem); 765 } _stored(Operand * Value,X86OperandMem * Mem)766 void _stored(Operand *Value, X86OperandMem *Mem) { 767 Context.insert<Insts::StoreD>(Value, Mem); 768 } _sub(Variable * Dest,Operand * Src0)769 void _sub(Variable *Dest, Operand *Src0) { 770 Context.insert<Insts::Sub>(Dest, Src0); 771 } _sub_rmw(X86OperandMem * DestSrc0,Operand * Src1)772 void _sub_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 773 Context.insert<Insts::SubRMW>(DestSrc0, Src1); 774 } 775 void _sub_sp(Operand *Adjustment); _subps(Variable * Dest,Operand * Src0)776 void _subps(Variable *Dest, Operand *Src0) { 777 Context.insert<Insts::Subps>(Dest, Src0); 778 } _subss(Variable * Dest,Operand * Src0)779 void _subss(Variable *Dest, Operand *Src0) { 780 Context.insert<Insts::Subss>(Dest, Src0); 781 } _test(Operand * Src0,Operand * Src1)782 void _test(Operand *Src0, Operand *Src1) { 783 Context.insert<Insts::Test>(Src0, Src1); 784 } _ucomiss(Operand * Src0,Operand * Src1)785 void _ucomiss(Operand *Src0, Operand *Src1) { 786 Context.insert<Insts::Ucomiss>(Src0, Src1); 787 } _ud2()788 void _ud2() { Context.insert<Insts::UD2>(); } 789 void _unlink_bp(); _xadd(Operand * Dest,Variable * Src,bool Locked)790 void _xadd(Operand *Dest, Variable *Src, bool Locked) { 791 Context.insert<Insts::Xadd>(Dest, Src, Locked); 792 // The xadd exchanges Dest and Src (modifying Src). Model that update with 793 // a FakeDef followed by a FakeUse. 794 Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest)); 795 _set_dest_redefined(); 796 Context.insert<InstFakeUse>(Src); 797 } _xchg(Operand * Dest,Variable * Src)798 void _xchg(Operand *Dest, Variable *Src) { 799 Context.insert<Insts::Xchg>(Dest, Src); 800 // The xchg modifies Dest and Src -- model that update with a 801 // FakeDef/FakeUse. 802 Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest)); 803 _set_dest_redefined(); 804 Context.insert<InstFakeUse>(Src); 805 } _xor(Variable * Dest,Operand * Src0)806 void _xor(Variable *Dest, Operand *Src0) { 807 Context.insert<Insts::Xor>(Dest, Src0); 808 } _xorps(Variable * Dest,Operand * Src0)809 void _xorps(Variable *Dest, Operand *Src0) { 810 Context.insert<Insts::Xorps>(Dest, Src0); 811 } _xor_rmw(X86OperandMem * DestSrc0,Operand * Src1)812 void _xor_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 813 Context.insert<Insts::XorRMW>(DestSrc0, Src1); 814 } 815 _iaca_start()816 void _iaca_start() { 817 if (!BuildDefs::minimal()) 818 Context.insert<Insts::IacaStart>(); 819 } _iaca_end()820 void _iaca_end() { 821 if (!BuildDefs::minimal()) 822 Context.insert<Insts::IacaEnd>(); 823 } 824 825 /// This class helps wrap IACA markers around the code generated by the 826 /// current scope. It means you don't need to put an end before each return. 827 class ScopedIacaMark { 828 ScopedIacaMark(const ScopedIacaMark &) = delete; 829 ScopedIacaMark &operator=(const ScopedIacaMark &) = delete; 830 831 public: ScopedIacaMark(TargetX8664 * Lowering)832 ScopedIacaMark(TargetX8664 *Lowering) : Lowering(Lowering) { 833 Lowering->_iaca_start(); 834 } ~ScopedIacaMark()835 ~ScopedIacaMark() { end(); } end()836 void end() { 837 if (!Lowering) 838 return; 839 Lowering->_iaca_end(); 840 Lowering = nullptr; 841 } 842 843 private: 844 TargetX8664 *Lowering; 845 }; 846 847 bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1); 848 void findRMW(); 849 850 static uint32_t applyStackAlignment(uint32_t Value); 851 852 bool IsEbpBasedFrame = false; 853 854 /// Stack alignment guaranteed by the ABI. 855 static constexpr uint32_t X86_STACK_ALIGNMENT_BYTES = 16; 856 /// Stack alignment required by the currently lowered function. 857 const uint32_t RequiredStackAlignment = X86_STACK_ALIGNMENT_BYTES; 858 size_t SpillAreaSizeBytes = 0; 859 size_t FixedAllocaSizeBytes = 0; 860 size_t FixedAllocaAlignBytes = 0; 861 bool PrologEmitsFixedAllocas = false; 862 uint32_t MaxOutArgsSizeBytes = 0; 863 static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSet; 864 static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSetUnfiltered; 865 static std::array<SmallBitVector, RegisterSet::Reg_NUM> RegisterAliases; 866 SmallBitVector RegsUsed; 867 std::array<VarList, IceType_NUM> PhysicalRegisters; 868 869 private: 870 void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi, 871 Operand *Src1Lo, Variable *DestLo, Variable *DestHi); 872 873 /// Emit the code for a combined operation and consumer instruction, or set 874 /// the destination variable of the operation if Consumer == nullptr. 875 void lowerIcmpAndConsumer(const InstIcmp *Icmp, const Inst *Consumer); 876 void lowerFcmpAndConsumer(const InstFcmp *Fcmp, const Inst *Consumer); 877 void lowerArithAndConsumer(const InstArithmetic *Arith, const Inst *Consumer); 878 879 /// Emit a setcc instruction if Consumer == nullptr; otherwise emit a 880 /// specialized version of Consumer. 881 void setccOrConsumer(BrCond Condition, Variable *Dest, const Inst *Consumer); 882 883 /// Emit a mov [1|0] instruction if Consumer == nullptr; otherwise emit a 884 /// specialized version of Consumer. 885 void movOrConsumer(bool IcmpResult, Variable *Dest, const Inst *Consumer); 886 887 /// Emit the code for instructions with a vector type. 888 void lowerIcmpVector(const InstIcmp *Icmp); 889 void lowerFcmpVector(const InstFcmp *Icmp); 890 void lowerSelectVector(const InstSelect *Instr); 891 892 /// Helpers for select lowering. 893 void lowerSelectMove(Variable *Dest, BrCond Cond, Operand *SrcT, 894 Operand *SrcF); 895 void lowerSelectIntMove(Variable *Dest, BrCond Cond, Operand *SrcT, 896 Operand *SrcF); 897 /// Generic helper to move an arbitrary type from Src to Dest. 898 void lowerMove(Variable *Dest, Operand *Src, bool IsRedefinition); 899 900 /// Optimizations for idiom recognition. 901 bool lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, const InstSelect *Select); 902 903 BoolFolding FoldingInfo; 904 905 /// Helpers for lowering ShuffleVector 906 /// @{ 907 Variable *lowerShuffleVector_AllFromSameSrc(Operand *Src, SizeT Index0, 908 SizeT Index1, SizeT Index2, 909 SizeT Index3); 910 static constexpr SizeT IGNORE_INDEX = 0x80000000u; 911 Variable *lowerShuffleVector_TwoFromSameSrc(Operand *Src0, SizeT Index0, 912 SizeT Index1, Operand *Src1, 913 SizeT Index2, SizeT Index3); 914 static constexpr SizeT UNIFIED_INDEX_0 = 0; 915 static constexpr SizeT UNIFIED_INDEX_1 = 2; 916 Variable *lowerShuffleVector_UnifyFromDifferentSrcs(Operand *Src0, 917 SizeT Index0, 918 Operand *Src1, 919 SizeT Index1); 920 static constexpr SizeT CLEAR_ALL_BITS = 0x80; 921 SizeT PshufbMaskCount = 0; 922 GlobalString lowerShuffleVector_NewMaskName(); 923 ConstantRelocatable *lowerShuffleVector_CreatePshufbMask( 924 int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4, 925 int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9, 926 int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14, 927 int8_t Idx15); 928 void lowerShuffleVector_UsingPshufb(Variable *Dest, Operand *Src0, 929 Operand *Src1, int8_t Idx0, int8_t Idx1, 930 int8_t Idx2, int8_t Idx3, int8_t Idx4, 931 int8_t Idx5, int8_t Idx6, int8_t Idx7, 932 int8_t Idx8, int8_t Idx9, int8_t Idx10, 933 int8_t Idx11, int8_t Idx12, int8_t Idx13, 934 int8_t Idx14, int8_t Idx15); 935 /// @} 936 937 /// The following table summarizes the logic for lowering the fcmp 938 /// instruction. There is one table entry for each of the 16 conditions. 939 /// 940 /// The first four columns describe the case when the operands are floating 941 /// point scalar values. A comment in lowerFcmp() describes the lowering 942 /// template. In the most general case, there is a compare followed by two 943 /// conditional branches, because some fcmp conditions don't map to a single 944 /// x86 conditional branch. However, in many cases it is possible to swap the 945 /// operands in the comparison and have a single conditional branch. Since 946 /// it's quite tedious to validate the table by hand, good execution tests are 947 /// helpful. 948 /// 949 /// The last two columns describe the case when the operands are vectors of 950 /// floating point values. For most fcmp conditions, there is a clear mapping 951 /// to a single x86 cmpps instruction variant. Some fcmp conditions require 952 /// special code to handle and these are marked in the table with a 953 /// Cmpps_Invalid predicate. 954 /// {@ 955 static const struct TableFcmpType { 956 uint32_t Default; 957 bool SwapScalarOperands; 958 CondX86::BrCond C1, C2; 959 bool SwapVectorOperands; 960 CondX86::CmppsCond Predicate; 961 } TableFcmp[]; 962 static const size_t TableFcmpSize; 963 /// @} 964 965 /// The following table summarizes the logic for lowering the icmp instruction 966 /// for i32 and narrower types. Each icmp condition has a clear mapping to an 967 /// x86 conditional branch instruction. 968 /// {@ 969 static const struct TableIcmp32Type { 970 CondX86::BrCond Mapping; 971 } TableIcmp32[]; 972 static const size_t TableIcmp32Size; 973 /// @} 974 975 /// The following table summarizes the logic for lowering the icmp instruction 976 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and 977 /// conditional branches are needed. For the other conditions, three separate 978 /// conditional branches are needed. 979 /// {@ 980 static const struct TableIcmp64Type { 981 CondX86::BrCond C1, C2, C3; 982 } TableIcmp64[]; 983 static const size_t TableIcmp64Size; 984 /// @} 985 getIcmp32Mapping(InstIcmp::ICond Cond)986 static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { 987 assert(Cond < TableIcmp32Size); 988 return TableIcmp32[Cond].Mapping; 989 } 990 991 public: create(Cfg * Func)992 static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) { 993 return makeUnique<TargetX8664>(Func); 994 } 995 createAssembler()996 std::unique_ptr<::Ice::Assembler> createAssembler() const override { 997 return makeUnique<X8664::AssemblerX8664>(); 998 } 999 1000 private: 1001 ENABLE_MAKE_UNIQUE; 1002 1003 explicit TargetX8664(Cfg *Func); 1004 }; 1005 1006 class TargetDataX8664 final : public TargetDataLowering { 1007 TargetDataX8664() = delete; 1008 TargetDataX8664(const TargetDataX8664 &) = delete; 1009 TargetDataX8664 &operator=(const TargetDataX8664 &) = delete; 1010 1011 public: 1012 ~TargetDataX8664() override = default; 1013 create(GlobalContext * Ctx)1014 static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { 1015 return makeUnique<TargetDataX8664>(Ctx); 1016 } 1017 1018 void lowerGlobals(const VariableDeclarationList &Vars, 1019 const std::string &SectionSuffix) override; 1020 void lowerConstants() override; 1021 void lowerJumpTables() override; 1022 1023 private: 1024 ENABLE_MAKE_UNIQUE; 1025 TargetDataX8664(GlobalContext * Ctx)1026 explicit TargetDataX8664(GlobalContext *Ctx) : TargetDataLowering(Ctx) {} 1027 template <typename T> static void emitConstantPool(GlobalContext *Ctx); 1028 }; 1029 1030 class TargetHeaderX86 : public TargetHeaderLowering { 1031 TargetHeaderX86() = delete; 1032 TargetHeaderX86(const TargetHeaderX86 &) = delete; 1033 TargetHeaderX86 &operator=(const TargetHeaderX86 &) = delete; 1034 1035 public: 1036 ~TargetHeaderX86() = default; 1037 create(GlobalContext * Ctx)1038 static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) { 1039 return makeUnique<TargetHeaderX86>(Ctx); 1040 } 1041 1042 private: 1043 ENABLE_MAKE_UNIQUE; 1044 TargetHeaderX86(GlobalContext * Ctx)1045 explicit TargetHeaderX86(GlobalContext *Ctx) : TargetHeaderLowering(Ctx) {} 1046 }; 1047 1048 } // end of namespace X8664 1049 } // end of namespace Ice 1050 1051 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8664_H 1052