1 //===- subzero/src/IceTargetLoweringX8632.h - x86-32 lowering ---*- C++ -*-===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Declares the TargetLoweringX8632 class, which implements the 12 /// TargetLowering interface for the x86-32 architecture. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8632_H 17 #define SUBZERO_SRC_ICETARGETLOWERINGX8632_H 18 19 #include "IceAssemblerX8632.h" 20 #include "IceDefs.h" 21 #include "IceInst.h" 22 #include "IceInstX8632.h" 23 #include "IceRegistersX8632.h" 24 #include "IceSwitchLowering.h" 25 #include "IceTargetLoweringX86.h" 26 #include "IceTargetLoweringX86RegClass.h" 27 #include "IceUtils.h" 28 29 #include <array> 30 #include <type_traits> 31 #include <utility> 32 33 namespace Ice { 34 namespace X8632 { 35 36 using namespace ::Ice::X86; 37 38 constexpr Type WordType = IceType_i32; 39 40 class BoolFoldingEntry { 41 BoolFoldingEntry(const BoolFoldingEntry &) = delete; 42 43 public: 44 BoolFoldingEntry() = default; 45 explicit BoolFoldingEntry(Inst *I); 46 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; 47 /// Instr is the instruction producing the i1-type variable of interest. 48 Inst *Instr = nullptr; 49 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). 50 bool IsComplex = false; 51 /// IsLiveOut is initialized conservatively to true, and is set to false when 52 /// we encounter an instruction that ends Var's live range. We disable the 53 /// folding optimization when Var is live beyond this basic block. Note that 54 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will 55 /// always be true and the folding optimization will never be performed. 56 bool IsLiveOut = true; 57 // NumUses counts the number of times Var is used as a source operand in the 58 // basic block. If IsComplex is true and there is more than one use of Var, 59 // then the folding optimization is disabled for Var. 60 uint32_t NumUses = 0; 61 }; 62 63 class BoolFolding { 64 public: 65 enum BoolFoldingProducerKind { 66 PK_None, 67 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. 68 PK_Icmp32, 69 PK_Icmp64, 70 PK_Fcmp, 71 PK_Trunc, 72 PK_Arith // A flag-setting arithmetic instruction. 73 }; 74 75 /// Currently the actual enum values are not used (other than CK_None), but we 76 /// go ahead and produce them anyway for symmetry with the 77 /// BoolFoldingProducerKind. 78 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 79 80 private: 81 BoolFolding(const BoolFolding &) = delete; 82 BoolFolding &operator=(const BoolFolding &) = delete; 83 84 public: 85 BoolFolding() = default; 86 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); 87 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); 88 static bool hasComplexLowering(const Inst *Instr); 89 static bool isValidFolding(BoolFoldingProducerKind ProducerKind, 90 BoolFoldingConsumerKind ConsumerKind); 91 void init(CfgNode *Node); 92 const Inst *getProducerFor(const Operand *Opnd) const; 93 void dump(const Cfg *Func) const; 94 95 private: 96 /// Returns true if Producers contains a valid entry for the given VarNum. containsValid(SizeT VarNum)97 bool containsValid(SizeT VarNum) const { 98 auto Element = Producers.find(VarNum); 99 return Element != Producers.end() && Element->second.Instr != nullptr; 100 } setInvalid(SizeT VarNum)101 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } 102 void invalidateProducersOnStore(const Inst *Instr); 103 /// Producers maps Variable::Number to a BoolFoldingEntry. 104 CfgUnorderedMap<SizeT, BoolFoldingEntry> Producers; 105 }; 106 107 class TargetX8632 : public TargetX86 { 108 TargetX8632() = delete; 109 TargetX8632(const TargetX8632 &) = delete; 110 TargetX8632 &operator=(const TargetX8632 &) = delete; 111 112 friend class BoolFolding; 113 114 public: 115 using BrCond = CondX86::BrCond; 116 using CmppsCond = CondX86::CmppsCond; 117 118 using SegmentRegisters = X86OperandMem::SegmentRegisters; 119 120 using InstX86Br = Insts::Br; 121 using InstX86FakeRMW = Insts::FakeRMW; 122 using InstX86Label = Insts::Label; 123 124 ~TargetX8632() override = default; 125 126 static void staticInit(GlobalContext *Ctx); 127 static bool shouldBePooled(const Constant *C); 128 static ::Ice::Type getPointerType(); 129 130 void translateOm1() override; 131 void translateO2() override; 132 void doLoadOpt(); 133 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; 134 getNumRegisters()135 SizeT getNumRegisters() const override { return RegisterSet::Reg_NUM; } 136 createLoweredMove(Variable * Dest,Variable * SrcVar)137 Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) override { 138 if (isVectorType(Dest->getType())) { 139 return Insts::Movp::create(Func, Dest, SrcVar); 140 } 141 return Insts::Mov::create(Func, Dest, SrcVar); 142 (void)Dest; 143 (void)SrcVar; 144 return nullptr; 145 } 146 147 Variable *getPhysicalRegister(RegNumT RegNum, 148 Type Ty = IceType_void) override; 149 const char *getRegName(RegNumT RegNum, Type Ty) const override; getRegClassName(RegClass C)150 static const char *getRegClassName(RegClass C) { 151 auto ClassNum = static_cast<RegClassX86>(C); 152 assert(ClassNum < RCX86_NUM); 153 switch (ClassNum) { 154 default: 155 assert(C < RC_Target); 156 return regClassString(C); 157 case RCX86_Is64To8: 158 return "i64to8"; // 64-bit GPR truncable to i8 159 case RCX86_Is32To8: 160 return "i32to8"; // 32-bit GPR truncable to i8 161 case RCX86_Is16To8: 162 return "i16to8"; // 16-bit GPR truncable to i8 163 case RCX86_IsTrunc8Rcvr: 164 return "i8from"; // 8-bit GPR truncable from wider GPRs 165 case RCX86_IsAhRcvr: 166 return "i8fromah"; // 8-bit GPR that ah can be assigned to 167 } 168 } 169 SmallBitVector getRegisterSet(RegSetMask Include, 170 RegSetMask Exclude) const override; 171 const SmallBitVector & getRegistersForVariable(const Variable * Var)172 getRegistersForVariable(const Variable *Var) const override { 173 RegClass RC = Var->getRegClass(); 174 assert(static_cast<RegClassX86>(RC) < RCX86_NUM); 175 return TypeToRegisterSet[RC]; 176 } 177 178 const SmallBitVector & getAllRegistersForVariable(const Variable * Var)179 getAllRegistersForVariable(const Variable *Var) const override { 180 RegClass RC = Var->getRegClass(); 181 assert(static_cast<RegClassX86>(RC) < RCX86_NUM); 182 return TypeToRegisterSetUnfiltered[RC]; 183 } 184 getAliasesForRegister(RegNumT Reg)185 const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override { 186 Reg.assertIsValid(); 187 return RegisterAliases[Reg]; 188 } 189 hasFramePointer()190 bool hasFramePointer() const override { return IsEbpBasedFrame; } setHasFramePointer()191 void setHasFramePointer() override { IsEbpBasedFrame = true; } getStackReg()192 RegNumT getStackReg() const override { return RegX8632::Reg_esp; } getFrameReg()193 RegNumT getFrameReg() const override { return RegX8632::Reg_ebp; } getFrameOrStackReg()194 RegNumT getFrameOrStackReg() const override { 195 // If the stack pointer needs to be aligned, then the frame pointer is 196 // unaligned, so always use the stack pointer. 197 if (needsStackPointerAlignment()) 198 return getStackReg(); 199 return IsEbpBasedFrame ? getFrameReg() : getStackReg(); 200 } typeWidthInBytesOnStack(Type Ty)201 size_t typeWidthInBytesOnStack(Type Ty) const override { 202 // Round up to the next multiple of WordType bytes. 203 const uint32_t WordSizeInBytes = typeWidthInBytes(WordType); 204 return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes); 205 } getStackAlignment()206 uint32_t getStackAlignment() const override { 207 return X86_STACK_ALIGNMENT_BYTES; 208 } needsStackPointerAlignment()209 bool needsStackPointerAlignment() const override { 210 // If the ABI's stack alignment is smaller than the vector size (16 bytes), 211 // use the (realigned) stack pointer for addressing any stack variables. 212 return X86_STACK_ALIGNMENT_BYTES < 16; 213 } reserveFixedAllocaArea(size_t Size,size_t Align)214 void reserveFixedAllocaArea(size_t Size, size_t Align) override { 215 FixedAllocaSizeBytes = Size; 216 assert(llvm::isPowerOf2_32(Align)); 217 FixedAllocaAlignBytes = Align; 218 PrologEmitsFixedAllocas = true; 219 } 220 /// Returns the (negative) offset from ebp/rbp where the fixed Allocas start. getFrameFixedAllocaOffset()221 int32_t getFrameFixedAllocaOffset() const override { 222 return FixedAllocaSizeBytes - (SpillAreaSizeBytes - maxOutArgsSizeBytes()); 223 } maxOutArgsSizeBytes()224 virtual uint32_t maxOutArgsSizeBytes() const override { 225 return MaxOutArgsSizeBytes; 226 } updateMaxOutArgsSizeBytes(uint32_t Size)227 virtual void updateMaxOutArgsSizeBytes(uint32_t Size) { 228 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, Size); 229 } 230 shouldSplitToVariable64On32(Type Ty)231 bool shouldSplitToVariable64On32(Type Ty) const override { 232 return Ty == IceType_i64; 233 } 234 getMinJumpTableSize()235 SizeT getMinJumpTableSize() const override { return 4; } 236 237 void emitVariable(const Variable *Var) const override; 238 239 void emit(const ConstantInteger32 *C) const final; 240 void emit(const ConstantInteger64 *C) const final; 241 void emit(const ConstantFloat *C) const final; 242 void emit(const ConstantDouble *C) const final; 243 void emit(const ConstantUndef *C) const final; 244 void emit(const ConstantRelocatable *C) const final; 245 246 void initNodeForLowering(CfgNode *Node) override; 247 248 Operand *loOperand(Operand *Operand); 249 Operand *hiOperand(Operand *Operand); 250 251 void addProlog(CfgNode *Node) override; 252 void finishArgumentLowering(Variable *Arg, Variable *FramePtr, 253 size_t BasicFrameOffset, size_t StackAdjBytes, 254 size_t &InArgsSizeBytes); 255 void addEpilog(CfgNode *Node) override; 256 257 Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); 258 259 protected: 260 void postLower() override; 261 262 void lowerAlloca(const InstAlloca *Instr) override; 263 void lowerArguments() override; 264 void lowerArithmetic(const InstArithmetic *Instr) override; 265 void lowerAssign(const InstAssign *Instr) override; 266 void lowerBr(const InstBr *Instr) override; 267 void lowerBreakpoint(const InstBreakpoint *Instr) override; 268 void lowerCall(const InstCall *Instr) override; 269 void lowerCast(const InstCast *Instr) override; 270 void lowerExtractElement(const InstExtractElement *Instr) override; 271 void lowerFcmp(const InstFcmp *Instr) override; 272 void lowerIcmp(const InstIcmp *Instr) override; 273 274 void lowerIntrinsic(const InstIntrinsic *Instr) override; 275 void lowerInsertElement(const InstInsertElement *Instr) override; 276 void lowerLoad(const InstLoad *Instr) override; 277 void lowerPhi(const InstPhi *Instr) override; 278 void lowerRet(const InstRet *Instr) override; 279 void lowerSelect(const InstSelect *Instr) override; 280 void lowerShuffleVector(const InstShuffleVector *Instr) override; 281 void lowerStore(const InstStore *Instr) override; 282 void lowerSwitch(const InstSwitch *Instr) override; 283 void lowerUnreachable(const InstUnreachable *Instr) override; 284 void lowerOther(const Inst *Instr) override; 285 void lowerRMW(const InstX86FakeRMW *RMW); 286 void prelowerPhis() override; 287 uint32_t getCallStackArgumentsSizeBytes(const CfgVector<Type> &ArgTypes, 288 Type ReturnType); 289 uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override; 290 void genTargetHelperCallFor(Inst *Instr) override; 291 292 /// OptAddr wraps all the possible operands that an x86 address might have. 293 struct OptAddr { 294 Variable *Base = nullptr; 295 Variable *Index = nullptr; 296 uint16_t Shift = 0; 297 int32_t Offset = 0; 298 ConstantRelocatable *Relocatable = nullptr; 299 }; 300 301 // Builds information for a canonical address expresion: 302 // <Relocatable + Offset>(Base, Index, Shift) 303 X86OperandMem *computeAddressOpt(const Inst *Instr, Type MemType, 304 Operand *Addr); 305 void doAddressOptOther() override; 306 void doAddressOptLoad() override; 307 void doAddressOptStore() override; 308 void doAddressOptLoadSubVector() override; 309 void doAddressOptStoreSubVector() override; 310 void doMockBoundsCheck(Operand *Opnd) override; 311 312 /// Naive lowering of cmpxchg. 313 void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected, 314 Operand *Desired); 315 /// Attempt a more optimized lowering of cmpxchg. Returns true if optimized. 316 bool tryOptimizedCmpxchgCmpBr(Variable *DestPrev, Operand *Ptr, 317 Operand *Expected, Operand *Desired); 318 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, 319 Operand *Val); 320 void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal, 321 Operand *SecondVal); 322 /// Load from memory for a given type. 323 void typedLoad(Type Ty, Variable *Dest, Variable *Base, Constant *Offset); 324 /// Store to memory for a given type. 325 void typedStore(Type Ty, Variable *Value, Variable *Base, Constant *Offset); 326 /// Copy memory of given type from Src to Dest using OffsetAmt on both. 327 void copyMemory(Type Ty, Variable *Dest, Variable *Src, int32_t OffsetAmt); 328 /// Replace some calls to memcpy with inline instructions. 329 void lowerMemcpy(Operand *Dest, Operand *Src, Operand *Count); 330 /// Replace some calls to memmove with inline instructions. 331 void lowerMemmove(Operand *Dest, Operand *Src, Operand *Count); 332 /// Replace some calls to memset with inline instructions. 333 void lowerMemset(Operand *Dest, Operand *Val, Operand *Count); 334 335 /// Lower an indirect jump adding sandboxing when needed. 336 void lowerIndirectJump(Variable *JumpTarget); 337 338 /// Check the comparison is in [Min,Max]. The flags register will be modified 339 /// with: 340 /// - below equal, if in range 341 /// - above, set if not in range 342 /// The index into the range is returned. 343 Operand *lowerCmpRange(Operand *Comparison, uint64_t Min, uint64_t Max); 344 /// Lowering of a cluster of switch cases. If the case is not matched control 345 /// will pass to the default label provided. If the default label is nullptr 346 /// then control will fall through to the next instruction. DoneCmp should be 347 /// true if the flags contain the result of a comparison with the Comparison. 348 void lowerCaseCluster(const CaseCluster &Case, Operand *Src0, bool DoneCmp, 349 CfgNode *DefaultLabel = nullptr); 350 351 using LowerBinOp = void (TargetX8632::*)(Variable *, Operand *); 352 void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi, 353 Variable *Dest, Operand *Ptr, Operand *Val); 354 355 void eliminateNextVectorSextInstruction(Variable *SignExtendedResult); 356 357 void emitStackProbe(size_t StackSizeBytes); 358 359 /// Emit just the call instruction (without argument or return variable 360 /// processing), sandboxing if needed. 361 Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg, 362 size_t NumVariadicFpArgs = 0); 363 /// Materialize the moves needed to return a value of the specified type. 364 Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType); 365 366 /// Emit a jump table to the constant pool. 367 void emitJumpTable(const Cfg *Func, 368 const InstJumpTable *JumpTable) const override; 369 370 /// Emit a fake use of esp to make sure esp stays alive for the entire 371 /// function. Otherwise some esp adjustments get dead-code eliminated. keepEspLiveAtExit()372 void keepEspLiveAtExit() { 373 Variable *esp = 374 Func->getTarget()->getPhysicalRegister(getStackReg(), WordType); 375 Context.insert<InstFakeUse>(esp); 376 } 377 378 /// Operand legalization helpers. To deal with address mode constraints, the 379 /// helpers will create a new Operand and emit instructions that guarantee 380 /// that the Operand kind is one of those indicated by the LegalMask (a 381 /// bitmask of allowed kinds). If the input Operand is known to already meet 382 /// the constraints, it may be simply returned as the result, without creating 383 /// any new instructions or operands. 384 enum OperandLegalization { 385 Legal_None = 0, 386 Legal_Reg = 1 << 0, // physical register, not stack location 387 Legal_Imm = 1 << 1, 388 Legal_Mem = 1 << 2, // includes [eax+4*ecx] as well as [esp+12] 389 Legal_Rematerializable = 1 << 3, 390 Legal_AddrAbs = 1 << 4, // ConstantRelocatable doesn't have to add RebasePtr 391 Legal_Default = ~(Legal_Rematerializable | Legal_AddrAbs) 392 // TODO(stichnot): Figure out whether this default works for x86-64. 393 }; 394 using LegalMask = uint32_t; 395 Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default, 396 RegNumT RegNum = RegNumT()); 397 Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT()); 398 /// Legalize the first source operand for use in the cmp instruction. 399 Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1); 400 /// Turn a pointer operand into a memory operand that can be used by a real 401 /// load/store operation. Legalizes the operand as well. This is a nop if the 402 /// operand is already a legal memory operand. 403 X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty, 404 bool DoLegalize = true); 405 406 Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT()); 407 static Type stackSlotType(); 408 409 static constexpr uint32_t NoSizeLimit = 0; 410 /// Returns the largest type which is equal to or larger than Size bytes. The 411 /// type is suitable for copying memory i.e. a load and store will be a single 412 /// instruction (for example x86 will get f64 not i64). 413 static Type largestTypeInSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit); 414 /// Returns the smallest type which is equal to or larger than Size bytes. If 415 /// one doesn't exist then the largest type smaller than Size bytes is 416 /// returned. The type is suitable for memory copies as described at 417 /// largestTypeInSize. 418 static Type firstTypeThatFitsSize(uint32_t Size, 419 uint32_t MaxSize = NoSizeLimit); 420 421 Variable *copyToReg8(Operand *Src, RegNumT RegNum = RegNumT()); 422 Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); 423 424 /// Returns a register containing all zeros, without affecting the FLAGS 425 /// register, using the best instruction for the type. 426 Variable *makeZeroedRegister(Type Ty, RegNumT RegNum = RegNumT()); 427 428 /// \name Returns a vector in a register with the given constant entries. 429 /// @{ 430 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); 431 Variable *makeVectorOfOnes(Type Ty, RegNumT RegNum = RegNumT()); 432 Variable *makeVectorOfMinusOnes(Type Ty, RegNumT RegNum = RegNumT()); 433 Variable *makeVectorOfHighOrderBits(Type Ty, RegNumT RegNum = RegNumT()); 434 Variable *makeVectorOfFabsMask(Type Ty, RegNumT RegNum = RegNumT()); 435 /// @} 436 437 /// Return a memory operand corresponding to a stack allocated Variable. 438 X86OperandMem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 439 uint32_t Offset = 0); 440 441 /// The following are helpers that insert lowered x86 instructions with 442 /// minimal syntactic overhead, so that the lowering code can look as close to 443 /// assembly as practical. _adc(Variable * Dest,Operand * Src0)444 void _adc(Variable *Dest, Operand *Src0) { 445 Context.insert<Insts::Adc>(Dest, Src0); 446 } _adc_rmw(X86OperandMem * DestSrc0,Operand * Src1)447 void _adc_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 448 Context.insert<Insts::AdcRMW>(DestSrc0, Src1); 449 } _add(Variable * Dest,Operand * Src0)450 void _add(Variable *Dest, Operand *Src0) { 451 Context.insert<Insts::Add>(Dest, Src0); 452 } _add_rmw(X86OperandMem * DestSrc0,Operand * Src1)453 void _add_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 454 Context.insert<Insts::AddRMW>(DestSrc0, Src1); 455 } _addps(Variable * Dest,Operand * Src0)456 void _addps(Variable *Dest, Operand *Src0) { 457 Context.insert<Insts::Addps>(Dest, Src0); 458 } _addss(Variable * Dest,Operand * Src0)459 void _addss(Variable *Dest, Operand *Src0) { 460 Context.insert<Insts::Addss>(Dest, Src0); 461 } 462 void _add_sp(Operand *Adjustment); _and(Variable * Dest,Operand * Src0)463 void _and(Variable *Dest, Operand *Src0) { 464 Context.insert<Insts::And>(Dest, Src0); 465 } _andnps(Variable * Dest,Operand * Src0)466 void _andnps(Variable *Dest, Operand *Src0) { 467 Context.insert<Insts::Andnps>(Dest, Src0); 468 } _andps(Variable * Dest,Operand * Src0)469 void _andps(Variable *Dest, Operand *Src0) { 470 Context.insert<Insts::Andps>(Dest, Src0); 471 } _and_rmw(X86OperandMem * DestSrc0,Operand * Src1)472 void _and_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 473 Context.insert<Insts::AndRMW>(DestSrc0, Src1); 474 } _blendvps(Variable * Dest,Operand * Src0,Operand * Src1)475 void _blendvps(Variable *Dest, Operand *Src0, Operand *Src1) { 476 Context.insert<Insts::Blendvps>(Dest, Src0, Src1); 477 } _br(BrCond Condition,CfgNode * TargetTrue,CfgNode * TargetFalse)478 void _br(BrCond Condition, CfgNode *TargetTrue, CfgNode *TargetFalse) { 479 Context.insert<InstX86Br>(TargetTrue, TargetFalse, Condition, 480 InstX86Br::Far); 481 } _br(CfgNode * Target)482 void _br(CfgNode *Target) { 483 Context.insert<InstX86Br>(Target, InstX86Br::Far); 484 } _br(BrCond Condition,CfgNode * Target)485 void _br(BrCond Condition, CfgNode *Target) { 486 Context.insert<InstX86Br>(Target, Condition, InstX86Br::Far); 487 } 488 void _br(BrCond Condition, InstX86Label *Label, 489 InstX86Br::Mode Kind = InstX86Br::Near) { 490 Context.insert<InstX86Br>(Label, Condition, Kind); 491 } _bsf(Variable * Dest,Operand * Src0)492 void _bsf(Variable *Dest, Operand *Src0) { 493 Context.insert<Insts::Bsf>(Dest, Src0); 494 } _bsr(Variable * Dest,Operand * Src0)495 void _bsr(Variable *Dest, Operand *Src0) { 496 Context.insert<Insts::Bsr>(Dest, Src0); 497 } _bswap(Variable * SrcDest)498 void _bswap(Variable *SrcDest) { Context.insert<Insts::Bswap>(SrcDest); } _cbwdq(Variable * Dest,Operand * Src0)499 void _cbwdq(Variable *Dest, Operand *Src0) { 500 Context.insert<Insts::Cbwdq>(Dest, Src0); 501 } _cmov(Variable * Dest,Operand * Src0,BrCond Condition)502 void _cmov(Variable *Dest, Operand *Src0, BrCond Condition) { 503 Context.insert<Insts::Cmov>(Dest, Src0, Condition); 504 } _cmp(Operand * Src0,Operand * Src1)505 void _cmp(Operand *Src0, Operand *Src1) { 506 Context.insert<Insts::Icmp>(Src0, Src1); 507 } _cmpps(Variable * Dest,Operand * Src0,CmppsCond Condition)508 void _cmpps(Variable *Dest, Operand *Src0, CmppsCond Condition) { 509 Context.insert<Insts::Cmpps>(Dest, Src0, Condition); 510 } _cmpxchg(Operand * DestOrAddr,Variable * Eax,Variable * Desired,bool Locked)511 void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired, 512 bool Locked) { 513 Context.insert<Insts::Cmpxchg>(DestOrAddr, Eax, Desired, Locked); 514 // Mark eax as possibly modified by cmpxchg. 515 Context.insert<InstFakeDef>(Eax, llvm::dyn_cast<Variable>(DestOrAddr)); 516 _set_dest_redefined(); 517 Context.insert<InstFakeUse>(Eax); 518 } _cmpxchg8b(X86OperandMem * Addr,Variable * Edx,Variable * Eax,Variable * Ecx,Variable * Ebx,bool Locked)519 void _cmpxchg8b(X86OperandMem *Addr, Variable *Edx, Variable *Eax, 520 Variable *Ecx, Variable *Ebx, bool Locked) { 521 Context.insert<Insts::Cmpxchg8b>(Addr, Edx, Eax, Ecx, Ebx, Locked); 522 // Mark edx, and eax as possibly modified by cmpxchg8b. 523 Context.insert<InstFakeDef>(Edx); 524 _set_dest_redefined(); 525 Context.insert<InstFakeUse>(Edx); 526 Context.insert<InstFakeDef>(Eax); 527 _set_dest_redefined(); 528 Context.insert<InstFakeUse>(Eax); 529 } _cvt(Variable * Dest,Operand * Src0,Insts::Cvt::CvtVariant Variant)530 void _cvt(Variable *Dest, Operand *Src0, Insts::Cvt::CvtVariant Variant) { 531 Context.insert<Insts::Cvt>(Dest, Src0, Variant); 532 } _round(Variable * Dest,Operand * Src0,Operand * Imm)533 void _round(Variable *Dest, Operand *Src0, Operand *Imm) { 534 Context.insert<Insts::Round>(Dest, Src0, Imm); 535 } _div(Variable * Dest,Operand * Src0,Operand * Src1)536 void _div(Variable *Dest, Operand *Src0, Operand *Src1) { 537 Context.insert<Insts::Div>(Dest, Src0, Src1); 538 } _divps(Variable * Dest,Operand * Src0)539 void _divps(Variable *Dest, Operand *Src0) { 540 Context.insert<Insts::Divps>(Dest, Src0); 541 } _divss(Variable * Dest,Operand * Src0)542 void _divss(Variable *Dest, Operand *Src0) { 543 Context.insert<Insts::Divss>(Dest, Src0); 544 } _fld(Operand * Src0)545 void _fld(Operand *Src0) { Context.insert<Insts::Fld>(Src0); } _fstp(Variable * Dest)546 void _fstp(Variable *Dest) { Context.insert<Insts::Fstp>(Dest); } _idiv(Variable * Dest,Operand * Src0,Operand * Src1)547 void _idiv(Variable *Dest, Operand *Src0, Operand *Src1) { 548 Context.insert<Insts::Idiv>(Dest, Src0, Src1); 549 } _imul(Variable * Dest,Operand * Src0)550 void _imul(Variable *Dest, Operand *Src0) { 551 Context.insert<Insts::Imul>(Dest, Src0); 552 } _imul_imm(Variable * Dest,Operand * Src0,Constant * Imm)553 void _imul_imm(Variable *Dest, Operand *Src0, Constant *Imm) { 554 Context.insert<Insts::ImulImm>(Dest, Src0, Imm); 555 } _insertps(Variable * Dest,Operand * Src0,Operand * Src1)556 void _insertps(Variable *Dest, Operand *Src0, Operand *Src1) { 557 Context.insert<Insts::Insertps>(Dest, Src0, Src1); 558 } _int3()559 void _int3() { Context.insert<Insts::Int3>(); } _jmp(Operand * Target)560 void _jmp(Operand *Target) { Context.insert<Insts::Jmp>(Target); } _lea(Variable * Dest,Operand * Src0)561 void _lea(Variable *Dest, Operand *Src0) { 562 Context.insert<Insts::Lea>(Dest, Src0); 563 } 564 void _link_bp(); 565 void _push_reg(RegNumT RegNum); 566 void _pop_reg(RegNumT RegNum); _mfence()567 void _mfence() { Context.insert<Insts::Mfence>(); } 568 /// Moves can be used to redefine registers, creating "partial kills" for 569 /// liveness. Mark where moves are used in this way. 570 void _redefined(Inst *MovInst, bool IsRedefinition = true) { 571 if (IsRedefinition) 572 MovInst->setDestRedefined(); 573 } 574 /// If Dest=nullptr is passed in, then a new variable is created, marked as 575 /// infinite register allocation weight, and returned through the in/out Dest 576 /// argument. 577 Insts::Mov *_mov(Variable *&Dest, Operand *Src0, RegNumT RegNum = RegNumT()) { 578 if (Dest == nullptr) 579 Dest = makeReg(Src0->getType(), RegNum); 580 return Context.insert<Insts::Mov>(Dest, Src0); 581 } 582 void _mov_sp(Operand *NewValue); _movp(Variable * Dest,Operand * Src0)583 Insts::Movp *_movp(Variable *Dest, Operand *Src0) { 584 return Context.insert<Insts::Movp>(Dest, Src0); 585 } _movd(Variable * Dest,Operand * Src0)586 void _movd(Variable *Dest, Operand *Src0) { 587 Context.insert<Insts::Movd>(Dest, Src0); 588 } _movq(Variable * Dest,Operand * Src0)589 void _movq(Variable *Dest, Operand *Src0) { 590 Context.insert<Insts::Movq>(Dest, Src0); 591 } _movss(Variable * Dest,Variable * Src0)592 void _movss(Variable *Dest, Variable *Src0) { 593 Context.insert<Insts::MovssRegs>(Dest, Src0); 594 } _movsx(Variable * Dest,Operand * Src0)595 void _movsx(Variable *Dest, Operand *Src0) { 596 Context.insert<Insts::Movsx>(Dest, Src0); 597 } _movzx(Variable * Dest,Operand * Src0)598 Insts::Movzx *_movzx(Variable *Dest, Operand *Src0) { 599 return Context.insert<Insts::Movzx>(Dest, Src0); 600 } _maxss(Variable * Dest,Operand * Src0)601 void _maxss(Variable *Dest, Operand *Src0) { 602 Context.insert<Insts::Maxss>(Dest, Src0); 603 } _minss(Variable * Dest,Operand * Src0)604 void _minss(Variable *Dest, Operand *Src0) { 605 Context.insert<Insts::Minss>(Dest, Src0); 606 } _maxps(Variable * Dest,Operand * Src0)607 void _maxps(Variable *Dest, Operand *Src0) { 608 Context.insert<Insts::Maxps>(Dest, Src0); 609 } _minps(Variable * Dest,Operand * Src0)610 void _minps(Variable *Dest, Operand *Src0) { 611 Context.insert<Insts::Minps>(Dest, Src0); 612 } _mul(Variable * Dest,Variable * Src0,Operand * Src1)613 void _mul(Variable *Dest, Variable *Src0, Operand *Src1) { 614 Context.insert<Insts::Mul>(Dest, Src0, Src1); 615 } _mulps(Variable * Dest,Operand * Src0)616 void _mulps(Variable *Dest, Operand *Src0) { 617 Context.insert<Insts::Mulps>(Dest, Src0); 618 } _mulss(Variable * Dest,Operand * Src0)619 void _mulss(Variable *Dest, Operand *Src0) { 620 Context.insert<Insts::Mulss>(Dest, Src0); 621 } _neg(Variable * SrcDest)622 void _neg(Variable *SrcDest) { Context.insert<Insts::Neg>(SrcDest); } _nop(SizeT Variant)623 void _nop(SizeT Variant) { Context.insert<Insts::Nop>(Variant); } _or(Variable * Dest,Operand * Src0)624 void _or(Variable *Dest, Operand *Src0) { 625 Context.insert<Insts::Or>(Dest, Src0); 626 } _orps(Variable * Dest,Operand * Src0)627 void _orps(Variable *Dest, Operand *Src0) { 628 Context.insert<Insts::Orps>(Dest, Src0); 629 } _or_rmw(X86OperandMem * DestSrc0,Operand * Src1)630 void _or_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 631 Context.insert<Insts::OrRMW>(DestSrc0, Src1); 632 } _padd(Variable * Dest,Operand * Src0)633 void _padd(Variable *Dest, Operand *Src0) { 634 Context.insert<Insts::Padd>(Dest, Src0); 635 } _padds(Variable * Dest,Operand * Src0)636 void _padds(Variable *Dest, Operand *Src0) { 637 Context.insert<Insts::Padds>(Dest, Src0); 638 } _paddus(Variable * Dest,Operand * Src0)639 void _paddus(Variable *Dest, Operand *Src0) { 640 Context.insert<Insts::Paddus>(Dest, Src0); 641 } _pand(Variable * Dest,Operand * Src0)642 void _pand(Variable *Dest, Operand *Src0) { 643 Context.insert<Insts::Pand>(Dest, Src0); 644 } _pandn(Variable * Dest,Operand * Src0)645 void _pandn(Variable *Dest, Operand *Src0) { 646 Context.insert<Insts::Pandn>(Dest, Src0); 647 } _pblendvb(Variable * Dest,Operand * Src0,Operand * Src1)648 void _pblendvb(Variable *Dest, Operand *Src0, Operand *Src1) { 649 Context.insert<Insts::Pblendvb>(Dest, Src0, Src1); 650 } 651 void _pcmpeq(Variable *Dest, Operand *Src0, 652 Type ArithmeticTypeOverride = IceType_void) { 653 Context.insert<Insts::Pcmpeq>(Dest, Src0, ArithmeticTypeOverride); 654 } _pcmpgt(Variable * Dest,Operand * Src0)655 void _pcmpgt(Variable *Dest, Operand *Src0) { 656 Context.insert<Insts::Pcmpgt>(Dest, Src0); 657 } _pextr(Variable * Dest,Operand * Src0,Operand * Src1)658 void _pextr(Variable *Dest, Operand *Src0, Operand *Src1) { 659 Context.insert<Insts::Pextr>(Dest, Src0, Src1); 660 } _pinsr(Variable * Dest,Operand * Src0,Operand * Src1)661 void _pinsr(Variable *Dest, Operand *Src0, Operand *Src1) { 662 Context.insert<Insts::Pinsr>(Dest, Src0, Src1); 663 } _pmull(Variable * Dest,Operand * Src0)664 void _pmull(Variable *Dest, Operand *Src0) { 665 Context.insert<Insts::Pmull>(Dest, Src0); 666 } _pmulhw(Variable * Dest,Operand * Src0)667 void _pmulhw(Variable *Dest, Operand *Src0) { 668 Context.insert<Insts::Pmulhw>(Dest, Src0); 669 } _pmulhuw(Variable * Dest,Operand * Src0)670 void _pmulhuw(Variable *Dest, Operand *Src0) { 671 Context.insert<Insts::Pmulhuw>(Dest, Src0); 672 } _pmaddwd(Variable * Dest,Operand * Src0)673 void _pmaddwd(Variable *Dest, Operand *Src0) { 674 Context.insert<Insts::Pmaddwd>(Dest, Src0); 675 } _pmuludq(Variable * Dest,Operand * Src0)676 void _pmuludq(Variable *Dest, Operand *Src0) { 677 Context.insert<Insts::Pmuludq>(Dest, Src0); 678 } _pop(Variable * Dest)679 void _pop(Variable *Dest) { Context.insert<Insts::Pop>(Dest); } _por(Variable * Dest,Operand * Src0)680 void _por(Variable *Dest, Operand *Src0) { 681 Context.insert<Insts::Por>(Dest, Src0); 682 } _punpckl(Variable * Dest,Operand * Src0)683 void _punpckl(Variable *Dest, Operand *Src0) { 684 Context.insert<Insts::Punpckl>(Dest, Src0); 685 } _punpckh(Variable * Dest,Operand * Src0)686 void _punpckh(Variable *Dest, Operand *Src0) { 687 Context.insert<Insts::Punpckh>(Dest, Src0); 688 } _packss(Variable * Dest,Operand * Src0)689 void _packss(Variable *Dest, Operand *Src0) { 690 Context.insert<Insts::Packss>(Dest, Src0); 691 } _packus(Variable * Dest,Operand * Src0)692 void _packus(Variable *Dest, Operand *Src0) { 693 Context.insert<Insts::Packus>(Dest, Src0); 694 } _pshufb(Variable * Dest,Operand * Src0)695 void _pshufb(Variable *Dest, Operand *Src0) { 696 Context.insert<Insts::Pshufb>(Dest, Src0); 697 } _pshufd(Variable * Dest,Operand * Src0,Operand * Src1)698 void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) { 699 Context.insert<Insts::Pshufd>(Dest, Src0, Src1); 700 } _psll(Variable * Dest,Operand * Src0)701 void _psll(Variable *Dest, Operand *Src0) { 702 Context.insert<Insts::Psll>(Dest, Src0); 703 } _psra(Variable * Dest,Operand * Src0)704 void _psra(Variable *Dest, Operand *Src0) { 705 Context.insert<Insts::Psra>(Dest, Src0); 706 } _psrl(Variable * Dest,Operand * Src0)707 void _psrl(Variable *Dest, Operand *Src0) { 708 Context.insert<Insts::Psrl>(Dest, Src0); 709 } _psub(Variable * Dest,Operand * Src0)710 void _psub(Variable *Dest, Operand *Src0) { 711 Context.insert<Insts::Psub>(Dest, Src0); 712 } _psubs(Variable * Dest,Operand * Src0)713 void _psubs(Variable *Dest, Operand *Src0) { 714 Context.insert<Insts::Psubs>(Dest, Src0); 715 } _psubus(Variable * Dest,Operand * Src0)716 void _psubus(Variable *Dest, Operand *Src0) { 717 Context.insert<Insts::Psubus>(Dest, Src0); 718 } _push(Operand * Src0)719 void _push(Operand *Src0) { Context.insert<Insts::Push>(Src0); } _pxor(Variable * Dest,Operand * Src0)720 void _pxor(Variable *Dest, Operand *Src0) { 721 Context.insert<Insts::Pxor>(Dest, Src0); 722 } 723 void _ret(Variable *Src0 = nullptr) { Context.insert<Insts::Ret>(Src0); } _rol(Variable * Dest,Operand * Src0)724 void _rol(Variable *Dest, Operand *Src0) { 725 Context.insert<Insts::Rol>(Dest, Src0); 726 } _round(Variable * Dest,Operand * Src,Constant * Imm)727 void _round(Variable *Dest, Operand *Src, Constant *Imm) { 728 Context.insert<Insts::Round>(Dest, Src, Imm); 729 } _sar(Variable * Dest,Operand * Src0)730 void _sar(Variable *Dest, Operand *Src0) { 731 Context.insert<Insts::Sar>(Dest, Src0); 732 } _sbb(Variable * Dest,Operand * Src0)733 void _sbb(Variable *Dest, Operand *Src0) { 734 Context.insert<Insts::Sbb>(Dest, Src0); 735 } _sbb_rmw(X86OperandMem * DestSrc0,Operand * Src1)736 void _sbb_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 737 Context.insert<Insts::SbbRMW>(DestSrc0, Src1); 738 } _setcc(Variable * Dest,BrCond Condition)739 void _setcc(Variable *Dest, BrCond Condition) { 740 Context.insert<Insts::Setcc>(Dest, Condition); 741 } _shl(Variable * Dest,Operand * Src0)742 void _shl(Variable *Dest, Operand *Src0) { 743 Context.insert<Insts::Shl>(Dest, Src0); 744 } _shld(Variable * Dest,Variable * Src0,Operand * Src1)745 void _shld(Variable *Dest, Variable *Src0, Operand *Src1) { 746 Context.insert<Insts::Shld>(Dest, Src0, Src1); 747 } _shr(Variable * Dest,Operand * Src0)748 void _shr(Variable *Dest, Operand *Src0) { 749 Context.insert<Insts::Shr>(Dest, Src0); 750 } _shrd(Variable * Dest,Variable * Src0,Operand * Src1)751 void _shrd(Variable *Dest, Variable *Src0, Operand *Src1) { 752 Context.insert<Insts::Shrd>(Dest, Src0, Src1); 753 } _shufps(Variable * Dest,Operand * Src0,Operand * Src1)754 void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) { 755 Context.insert<Insts::Shufps>(Dest, Src0, Src1); 756 } _movmsk(Variable * Dest,Operand * Src0)757 void _movmsk(Variable *Dest, Operand *Src0) { 758 Context.insert<Insts::Movmsk>(Dest, Src0); 759 } _sqrt(Variable * Dest,Operand * Src0)760 void _sqrt(Variable *Dest, Operand *Src0) { 761 Context.insert<Insts::Sqrt>(Dest, Src0); 762 } _store(Operand * Value,X86Operand * Mem)763 void _store(Operand *Value, X86Operand *Mem) { 764 Context.insert<Insts::Store>(Value, Mem); 765 } _storep(Variable * Value,X86OperandMem * Mem)766 void _storep(Variable *Value, X86OperandMem *Mem) { 767 Context.insert<Insts::StoreP>(Value, Mem); 768 } _storeq(Operand * Value,X86OperandMem * Mem)769 void _storeq(Operand *Value, X86OperandMem *Mem) { 770 Context.insert<Insts::StoreQ>(Value, Mem); 771 } _stored(Operand * Value,X86OperandMem * Mem)772 void _stored(Operand *Value, X86OperandMem *Mem) { 773 Context.insert<Insts::StoreD>(Value, Mem); 774 } _sub(Variable * Dest,Operand * Src0)775 void _sub(Variable *Dest, Operand *Src0) { 776 Context.insert<Insts::Sub>(Dest, Src0); 777 } _sub_rmw(X86OperandMem * DestSrc0,Operand * Src1)778 void _sub_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 779 Context.insert<Insts::SubRMW>(DestSrc0, Src1); 780 } 781 void _sub_sp(Operand *Adjustment); _subps(Variable * Dest,Operand * Src0)782 void _subps(Variable *Dest, Operand *Src0) { 783 Context.insert<Insts::Subps>(Dest, Src0); 784 } _subss(Variable * Dest,Operand * Src0)785 void _subss(Variable *Dest, Operand *Src0) { 786 Context.insert<Insts::Subss>(Dest, Src0); 787 } _test(Operand * Src0,Operand * Src1)788 void _test(Operand *Src0, Operand *Src1) { 789 Context.insert<Insts::Test>(Src0, Src1); 790 } _ucomiss(Operand * Src0,Operand * Src1)791 void _ucomiss(Operand *Src0, Operand *Src1) { 792 Context.insert<Insts::Ucomiss>(Src0, Src1); 793 } _ud2()794 void _ud2() { Context.insert<Insts::UD2>(); } 795 void _unlink_bp(); _xadd(Operand * Dest,Variable * Src,bool Locked)796 void _xadd(Operand *Dest, Variable *Src, bool Locked) { 797 Context.insert<Insts::Xadd>(Dest, Src, Locked); 798 // The xadd exchanges Dest and Src (modifying Src). Model that update with 799 // a FakeDef followed by a FakeUse. 800 Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest)); 801 _set_dest_redefined(); 802 Context.insert<InstFakeUse>(Src); 803 } _xchg(Operand * Dest,Variable * Src)804 void _xchg(Operand *Dest, Variable *Src) { 805 Context.insert<Insts::Xchg>(Dest, Src); 806 // The xchg modifies Dest and Src -- model that update with a 807 // FakeDef/FakeUse. 808 Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest)); 809 _set_dest_redefined(); 810 Context.insert<InstFakeUse>(Src); 811 } _xor(Variable * Dest,Operand * Src0)812 void _xor(Variable *Dest, Operand *Src0) { 813 Context.insert<Insts::Xor>(Dest, Src0); 814 } _xorps(Variable * Dest,Operand * Src0)815 void _xorps(Variable *Dest, Operand *Src0) { 816 Context.insert<Insts::Xorps>(Dest, Src0); 817 } _xor_rmw(X86OperandMem * DestSrc0,Operand * Src1)818 void _xor_rmw(X86OperandMem *DestSrc0, Operand *Src1) { 819 Context.insert<Insts::XorRMW>(DestSrc0, Src1); 820 } 821 _iaca_start()822 void _iaca_start() { 823 if (!BuildDefs::minimal()) 824 Context.insert<Insts::IacaStart>(); 825 } _iaca_end()826 void _iaca_end() { 827 if (!BuildDefs::minimal()) 828 Context.insert<Insts::IacaEnd>(); 829 } 830 831 /// This class helps wrap IACA markers around the code generated by the 832 /// current scope. It means you don't need to put an end before each return. 833 class ScopedIacaMark { 834 ScopedIacaMark(const ScopedIacaMark &) = delete; 835 ScopedIacaMark &operator=(const ScopedIacaMark &) = delete; 836 837 public: ScopedIacaMark(TargetX8632 * Lowering)838 ScopedIacaMark(TargetX8632 *Lowering) : Lowering(Lowering) { 839 Lowering->_iaca_start(); 840 } ~ScopedIacaMark()841 ~ScopedIacaMark() { end(); } end()842 void end() { 843 if (!Lowering) 844 return; 845 Lowering->_iaca_end(); 846 Lowering = nullptr; 847 } 848 849 private: 850 TargetX8632 *Lowering; 851 }; 852 853 bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1); 854 void findRMW(); 855 856 static uint32_t applyStackAlignment(uint32_t Value); 857 858 bool IsEbpBasedFrame = false; 859 860 #if defined(_WIN32) 861 // Windows 32-bit only guarantees 4 byte stack alignment 862 static constexpr uint32_t X86_STACK_ALIGNMENT_BYTES = 4; 863 #else 864 /// Stack alignment guaranteed by the System V ABI. 865 static constexpr uint32_t X86_STACK_ALIGNMENT_BYTES = 16; 866 #endif 867 /// Stack alignment required by the currently lowered function. 868 size_t RequiredStackAlignment = X86_STACK_ALIGNMENT_BYTES; 869 size_t SpillAreaSizeBytes = 0; 870 size_t FixedAllocaSizeBytes = 0; 871 size_t FixedAllocaAlignBytes = 0; 872 bool PrologEmitsFixedAllocas = false; 873 uint32_t MaxOutArgsSizeBytes = 0; 874 static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSet; 875 static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSetUnfiltered; 876 static std::array<SmallBitVector, RegisterSet::Reg_NUM> RegisterAliases; 877 SmallBitVector RegsUsed; 878 std::array<VarList, IceType_NUM> PhysicalRegisters; 879 // RebasePtr is a Variable that holds the Rebasing pointer (if any) for the 880 // current sandboxing type. 881 Variable *RebasePtr = nullptr; 882 883 private: 884 void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi, 885 Operand *Src1Lo, Variable *DestLo, Variable *DestHi); 886 887 /// Emit the code for a combined operation and consumer instruction, or set 888 /// the destination variable of the operation if Consumer == nullptr. 889 void lowerIcmpAndConsumer(const InstIcmp *Icmp, const Inst *Consumer); 890 void lowerFcmpAndConsumer(const InstFcmp *Fcmp, const Inst *Consumer); 891 void lowerArithAndConsumer(const InstArithmetic *Arith, const Inst *Consumer); 892 893 /// Emit a setcc instruction if Consumer == nullptr; otherwise emit a 894 /// specialized version of Consumer. 895 void setccOrConsumer(BrCond Condition, Variable *Dest, const Inst *Consumer); 896 897 /// Emit a mov [1|0] instruction if Consumer == nullptr; otherwise emit a 898 /// specialized version of Consumer. 899 void movOrConsumer(bool IcmpResult, Variable *Dest, const Inst *Consumer); 900 901 /// Emit the code for instructions with a vector type. 902 void lowerIcmpVector(const InstIcmp *Icmp); 903 void lowerFcmpVector(const InstFcmp *Icmp); 904 void lowerSelectVector(const InstSelect *Instr); 905 906 /// Helpers for select lowering. 907 void lowerSelectMove(Variable *Dest, BrCond Cond, Operand *SrcT, 908 Operand *SrcF); 909 void lowerSelectIntMove(Variable *Dest, BrCond Cond, Operand *SrcT, 910 Operand *SrcF); 911 /// Generic helper to move an arbitrary type from Src to Dest. 912 void lowerMove(Variable *Dest, Operand *Src, bool IsRedefinition); 913 914 /// Optimizations for idiom recognition. 915 bool lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, const InstSelect *Select); 916 917 /// x86lowerIcmp64 handles 64-bit icmp lowering. 918 void lowerIcmp64(const InstIcmp *Icmp, const Inst *Consumer); 919 920 BoolFolding FoldingInfo; 921 922 /// Helpers for lowering ShuffleVector 923 /// @{ 924 Variable *lowerShuffleVector_AllFromSameSrc(Operand *Src, SizeT Index0, 925 SizeT Index1, SizeT Index2, 926 SizeT Index3); 927 static constexpr SizeT IGNORE_INDEX = 0x80000000u; 928 Variable *lowerShuffleVector_TwoFromSameSrc(Operand *Src0, SizeT Index0, 929 SizeT Index1, Operand *Src1, 930 SizeT Index2, SizeT Index3); 931 static constexpr SizeT UNIFIED_INDEX_0 = 0; 932 static constexpr SizeT UNIFIED_INDEX_1 = 2; 933 Variable *lowerShuffleVector_UnifyFromDifferentSrcs(Operand *Src0, 934 SizeT Index0, 935 Operand *Src1, 936 SizeT Index1); 937 static constexpr SizeT CLEAR_ALL_BITS = 0x80; 938 SizeT PshufbMaskCount = 0; 939 GlobalString lowerShuffleVector_NewMaskName(); 940 ConstantRelocatable *lowerShuffleVector_CreatePshufbMask( 941 int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4, 942 int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9, 943 int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14, 944 int8_t Idx15); 945 void lowerShuffleVector_UsingPshufb(Variable *Dest, Operand *Src0, 946 Operand *Src1, int8_t Idx0, int8_t Idx1, 947 int8_t Idx2, int8_t Idx3, int8_t Idx4, 948 int8_t Idx5, int8_t Idx6, int8_t Idx7, 949 int8_t Idx8, int8_t Idx9, int8_t Idx10, 950 int8_t Idx11, int8_t Idx12, int8_t Idx13, 951 int8_t Idx14, int8_t Idx15); 952 /// @} 953 954 /// The following table summarizes the logic for lowering the fcmp 955 /// instruction. There is one table entry for each of the 16 conditions. 956 /// 957 /// The first four columns describe the case when the operands are floating 958 /// point scalar values. A comment in lowerFcmp() describes the lowering 959 /// template. In the most general case, there is a compare followed by two 960 /// conditional branches, because some fcmp conditions don't map to a single 961 /// x86 conditional branch. However, in many cases it is possible to swap the 962 /// operands in the comparison and have a single conditional branch. Since 963 /// it's quite tedious to validate the table by hand, good execution tests are 964 /// helpful. 965 /// 966 /// The last two columns describe the case when the operands are vectors of 967 /// floating point values. For most fcmp conditions, there is a clear mapping 968 /// to a single x86 cmpps instruction variant. Some fcmp conditions require 969 /// special code to handle and these are marked in the table with a 970 /// Cmpps_Invalid predicate. 971 /// {@ 972 static const struct TableFcmpType { 973 uint32_t Default; 974 bool SwapScalarOperands; 975 CondX86::BrCond C1, C2; 976 bool SwapVectorOperands; 977 CondX86::CmppsCond Predicate; 978 } TableFcmp[]; 979 static const size_t TableFcmpSize; 980 /// @} 981 982 /// The following table summarizes the logic for lowering the icmp instruction 983 /// for i32 and narrower types. Each icmp condition has a clear mapping to an 984 /// x86 conditional branch instruction. 985 /// {@ 986 static const struct TableIcmp32Type { 987 CondX86::BrCond Mapping; 988 } TableIcmp32[]; 989 static const size_t TableIcmp32Size; 990 /// @} 991 992 /// The following table summarizes the logic for lowering the icmp instruction 993 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and 994 /// conditional branches are needed. For the other conditions, three separate 995 /// conditional branches are needed. 996 /// {@ 997 static const struct TableIcmp64Type { 998 CondX86::BrCond C1, C2, C3; 999 } TableIcmp64[]; 1000 static const size_t TableIcmp64Size; 1001 /// @} 1002 getIcmp32Mapping(InstIcmp::ICond Cond)1003 static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { 1004 assert(static_cast<size_t>(Cond) < TableIcmp32Size); 1005 return TableIcmp32[Cond].Mapping; 1006 } 1007 1008 public: create(Cfg * Func)1009 static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) { 1010 return makeUnique<TargetX8632>(Func); 1011 } 1012 createAssembler()1013 std::unique_ptr<::Ice::Assembler> createAssembler() const override { 1014 return makeUnique<X8632::AssemblerX8632>(); 1015 } 1016 1017 private: 1018 ENABLE_MAKE_UNIQUE; 1019 1020 explicit TargetX8632(Cfg *Func); 1021 }; 1022 1023 class TargetDataX8632 final : public TargetDataLowering { 1024 TargetDataX8632() = delete; 1025 TargetDataX8632(const TargetDataX8632 &) = delete; 1026 TargetDataX8632 &operator=(const TargetDataX8632 &) = delete; 1027 1028 public: 1029 ~TargetDataX8632() override = default; 1030 create(GlobalContext * Ctx)1031 static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { 1032 return makeUnique<TargetDataX8632>(Ctx); 1033 } 1034 1035 void lowerGlobals(const VariableDeclarationList &Vars, 1036 const std::string &SectionSuffix) override; 1037 void lowerConstants() override; 1038 void lowerJumpTables() override; 1039 1040 private: 1041 ENABLE_MAKE_UNIQUE; 1042 TargetDataX8632(GlobalContext * Ctx)1043 explicit TargetDataX8632(GlobalContext *Ctx) : TargetDataLowering(Ctx) {} 1044 template <typename T> static void emitConstantPool(GlobalContext *Ctx); 1045 }; 1046 1047 class TargetHeaderX86 : public TargetHeaderLowering { 1048 TargetHeaderX86() = delete; 1049 TargetHeaderX86(const TargetHeaderX86 &) = delete; 1050 TargetHeaderX86 &operator=(const TargetHeaderX86 &) = delete; 1051 1052 public: 1053 ~TargetHeaderX86() = default; 1054 create(GlobalContext * Ctx)1055 static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) { 1056 return makeUnique<TargetHeaderX86>(Ctx); 1057 } 1058 1059 private: 1060 ENABLE_MAKE_UNIQUE; 1061 TargetHeaderX86(GlobalContext * Ctx)1062 explicit TargetHeaderX86(GlobalContext *Ctx) : TargetHeaderLowering(Ctx) {} 1063 }; 1064 1065 } // end of namespace X8632 1066 } // end of namespace Ice 1067 1068 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8632_H 1069