xref: /aosp_15_r20/external/swiftshader/third_party/subzero/src/IceTargetLoweringX8664.h (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 //===- subzero/src/IceTargetLoweringX8664.h - lowering for x86-64 -*- C++ -*-=//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares the TargetLoweringX8664 class, which implements the
12 /// TargetLowering interface for the X86 64-bit architecture.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8664_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGX8664_H
18 
19 #include "IceAssemblerX8664.h"
20 #include "IceCfg.h"
21 #include "IceDefs.h"
22 #include "IceGlobalContext.h"
23 #include "IceInst.h"
24 #include "IceInstX8664.h"
25 #include "IceSwitchLowering.h"
26 #include "IceTargetLoweringX86.h"
27 #include "IceTargetLoweringX86RegClass.h"
28 #include "IceUtils.h"
29 
30 #include <array>
31 #include <type_traits>
32 #include <utility>
33 
34 namespace Ice {
35 namespace X8664 {
36 
37 using namespace ::Ice::X86;
38 
39 constexpr Type WordType = IceType_i64;
40 
41 class BoolFoldingEntry {
42   BoolFoldingEntry(const BoolFoldingEntry &) = delete;
43 
44 public:
45   BoolFoldingEntry() = default;
46   explicit BoolFoldingEntry(Inst *I);
47   BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
48   /// Instr is the instruction producing the i1-type variable of interest.
49   Inst *Instr = nullptr;
50   /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
51   bool IsComplex = false;
52   /// IsLiveOut is initialized conservatively to true, and is set to false when
53   /// we encounter an instruction that ends Var's live range. We disable the
54   /// folding optimization when Var is live beyond this basic block. Note that
55   /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
56   /// always be true and the folding optimization will never be performed.
57   bool IsLiveOut = true;
58   // NumUses counts the number of times Var is used as a source operand in the
59   // basic block. If IsComplex is true and there is more than one use of Var,
60   // then the folding optimization is disabled for Var.
61   uint32_t NumUses = 0;
62 };
63 
64 class BoolFolding {
65 public:
66   enum BoolFoldingProducerKind {
67     PK_None,
68     // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
69     PK_Icmp32,
70     PK_Icmp64,
71     PK_Fcmp,
72     PK_Trunc,
73     PK_Arith // A flag-setting arithmetic instruction.
74   };
75 
76   /// Currently the actual enum values are not used (other than CK_None), but we
77   /// go ahead and produce them anyway for symmetry with the
78   /// BoolFoldingProducerKind.
79   enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
80 
81 private:
82   BoolFolding(const BoolFolding &) = delete;
83   BoolFolding &operator=(const BoolFolding &) = delete;
84 
85 public:
86   BoolFolding() = default;
87   static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
88   static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
89   static bool hasComplexLowering(const Inst *Instr);
90   static bool isValidFolding(BoolFoldingProducerKind ProducerKind,
91                              BoolFoldingConsumerKind ConsumerKind);
92   void init(CfgNode *Node);
93   const Inst *getProducerFor(const Operand *Opnd) const;
94   void dump(const Cfg *Func) const;
95 
96 private:
97   /// Returns true if Producers contains a valid entry for the given VarNum.
containsValid(SizeT VarNum)98   bool containsValid(SizeT VarNum) const {
99     auto Element = Producers.find(VarNum);
100     return Element != Producers.end() && Element->second.Instr != nullptr;
101   }
setInvalid(SizeT VarNum)102   void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
103   void invalidateProducersOnStore(const Inst *Instr);
104   /// Producers maps Variable::Number to a BoolFoldingEntry.
105   CfgUnorderedMap<SizeT, BoolFoldingEntry> Producers;
106 };
107 
108 class TargetX8664 : public TargetX86 {
109   TargetX8664() = delete;
110   TargetX8664(const TargetX8664 &) = delete;
111   TargetX8664 &operator=(const TargetX8664 &) = delete;
112 
113   friend class BoolFolding;
114 
115 public:
116   using BrCond = CondX86::BrCond;
117   using CmppsCond = CondX86::CmppsCond;
118 
119   using SegmentRegisters = X86OperandMem::SegmentRegisters;
120 
121   using InstX86Br = Insts::Br;
122   using InstX86FakeRMW = Insts::FakeRMW;
123   using InstX86Label = Insts::Label;
124 
125   ~TargetX8664() override = default;
126 
127   static void staticInit(GlobalContext *Ctx);
128   static bool shouldBePooled(const Constant *C);
129   static ::Ice::Type getPointerType();
130 
131   void translateOm1() override;
132   void translateO2() override;
133   void doLoadOpt();
134   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
135 
getNumRegisters()136   SizeT getNumRegisters() const override { return RegisterSet::Reg_NUM; }
137 
createLoweredMove(Variable * Dest,Variable * SrcVar)138   Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) override {
139     if (isVectorType(Dest->getType())) {
140       return Insts::Movp::create(Func, Dest, SrcVar);
141     }
142     return Insts::Mov::create(Func, Dest, SrcVar);
143     (void)Dest;
144     (void)SrcVar;
145     return nullptr;
146   }
147 
148   Variable *getPhysicalRegister(RegNumT RegNum,
149                                 Type Ty = IceType_void) override;
150   const char *getRegName(RegNumT RegNum, Type Ty) const override;
getRegClassName(RegClass C)151   static const char *getRegClassName(RegClass C) {
152     auto ClassNum = static_cast<RegClassX86>(C);
153     assert(ClassNum < RCX86_NUM);
154     switch (ClassNum) {
155     default:
156       assert(C < RC_Target);
157       return regClassString(C);
158     case RCX86_Is64To8:
159       return "i64to8"; // 64-bit GPR truncable to i8
160     case RCX86_Is32To8:
161       return "i32to8"; // 32-bit GPR truncable to i8
162     case RCX86_Is16To8:
163       return "i16to8"; // 16-bit GPR truncable to i8
164     case RCX86_IsTrunc8Rcvr:
165       return "i8from"; // 8-bit GPR truncable from wider GPRs
166     case RCX86_IsAhRcvr:
167       return "i8fromah"; // 8-bit GPR that ah can be assigned to
168     }
169   }
170   SmallBitVector getRegisterSet(RegSetMask Include,
171                                 RegSetMask Exclude) const override;
172   const SmallBitVector &
getRegistersForVariable(const Variable * Var)173   getRegistersForVariable(const Variable *Var) const override {
174     RegClass RC = Var->getRegClass();
175     assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
176     return TypeToRegisterSet[RC];
177   }
178 
179   const SmallBitVector &
getAllRegistersForVariable(const Variable * Var)180   getAllRegistersForVariable(const Variable *Var) const override {
181     RegClass RC = Var->getRegClass();
182     assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
183     return TypeToRegisterSetUnfiltered[RC];
184   }
185 
getAliasesForRegister(RegNumT Reg)186   const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
187     Reg.assertIsValid();
188     return RegisterAliases[Reg];
189   }
190 
hasFramePointer()191   bool hasFramePointer() const override { return IsEbpBasedFrame; }
setHasFramePointer()192   void setHasFramePointer() override { IsEbpBasedFrame = true; }
getStackReg()193   RegNumT getStackReg() const override { return RegX8664::Reg_rsp; }
getFrameReg()194   RegNumT getFrameReg() const override { return RegX8664::Reg_rbp; }
getFrameOrStackReg()195   RegNumT getFrameOrStackReg() const override {
196     // If the stack pointer needs to be aligned, then the frame pointer is
197     // unaligned, so always use the stack pointer.
198     if (needsStackPointerAlignment())
199       return getStackReg();
200     return IsEbpBasedFrame ? getFrameReg() : getStackReg();
201   }
typeWidthInBytesOnStack(Type Ty)202   size_t typeWidthInBytesOnStack(Type Ty) const override {
203     // Round up to the next multiple of WordType bytes.
204     const uint32_t WordSizeInBytes = typeWidthInBytes(WordType);
205     return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes);
206   }
getStackAlignment()207   uint32_t getStackAlignment() const override {
208     return X86_STACK_ALIGNMENT_BYTES;
209   }
needsStackPointerAlignment()210   bool needsStackPointerAlignment() const override {
211     // If the ABI's stack alignment is smaller than the vector size,
212     // use the (realigned) stack pointer for addressing any stack variables.
213     return X86_STACK_ALIGNMENT_BYTES < RequiredStackAlignment;
214   }
reserveFixedAllocaArea(size_t Size,size_t Align)215   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
216     FixedAllocaSizeBytes = Size;
217     assert(llvm::isPowerOf2_32(Align));
218     FixedAllocaAlignBytes = Align;
219     PrologEmitsFixedAllocas = true;
220   }
221   /// Returns the (negative) offset from ebp/rbp where the fixed Allocas start.
getFrameFixedAllocaOffset()222   int32_t getFrameFixedAllocaOffset() const override {
223     return FixedAllocaSizeBytes - (SpillAreaSizeBytes - maxOutArgsSizeBytes());
224   }
maxOutArgsSizeBytes()225   virtual uint32_t maxOutArgsSizeBytes() const override {
226     return MaxOutArgsSizeBytes;
227   }
updateMaxOutArgsSizeBytes(uint32_t Size)228   virtual void updateMaxOutArgsSizeBytes(uint32_t Size) {
229     MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, Size);
230   }
231 
shouldSplitToVariable64On32(Type Ty)232   bool shouldSplitToVariable64On32(Type Ty) const override { return false; }
233 
getMinJumpTableSize()234   SizeT getMinJumpTableSize() const override { return 4; }
235 
236   void emitVariable(const Variable *Var) const override;
237 
238   void emit(const ConstantInteger32 *C) const final;
239   void emit(const ConstantInteger64 *C) const final;
240   void emit(const ConstantFloat *C) const final;
241   void emit(const ConstantDouble *C) const final;
242   void emit(const ConstantUndef *C) const final;
243   void emit(const ConstantRelocatable *C) const final;
244 
245   void initNodeForLowering(CfgNode *Node) override;
246 
247   void addProlog(CfgNode *Node) override;
248   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
249                               size_t BasicFrameOffset, size_t StackAdjBytes,
250                               size_t &InArgsSizeBytes);
251   void addEpilog(CfgNode *Node) override;
252 
253   Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
254 
255 protected:
256   void postLower() override;
257 
258   void lowerAlloca(const InstAlloca *Instr) override;
259   void lowerArguments() override;
260   void lowerArithmetic(const InstArithmetic *Instr) override;
261   void lowerAssign(const InstAssign *Instr) override;
262   void lowerBr(const InstBr *Instr) override;
263   void lowerBreakpoint(const InstBreakpoint *Instr) override;
264   void lowerCall(const InstCall *Instr) override;
265   void lowerCast(const InstCast *Instr) override;
266   void lowerExtractElement(const InstExtractElement *Instr) override;
267   void lowerFcmp(const InstFcmp *Instr) override;
268   void lowerIcmp(const InstIcmp *Instr) override;
269 
270   void lowerIntrinsic(const InstIntrinsic *Instr) override;
271   void lowerInsertElement(const InstInsertElement *Instr) override;
272   void lowerLoad(const InstLoad *Instr) override;
273   void lowerPhi(const InstPhi *Instr) override;
274   void lowerRet(const InstRet *Instr) override;
275   void lowerSelect(const InstSelect *Instr) override;
276   void lowerShuffleVector(const InstShuffleVector *Instr) override;
277   void lowerStore(const InstStore *Instr) override;
278   void lowerSwitch(const InstSwitch *Instr) override;
279   void lowerUnreachable(const InstUnreachable *Instr) override;
280   void lowerOther(const Inst *Instr) override;
281   void lowerRMW(const InstX86FakeRMW *RMW);
282   void prelowerPhis() override;
283   uint32_t getCallStackArgumentsSizeBytes(const CfgVector<Type> &ArgTypes,
284                                           Type ReturnType);
285   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
286   void genTargetHelperCallFor(Inst *Instr) override;
287 
288   /// OptAddr wraps all the possible operands that an x86 address might have.
289   struct OptAddr {
290     Variable *Base = nullptr;
291     Variable *Index = nullptr;
292     uint16_t Shift = 0;
293     int32_t Offset = 0;
294     ConstantRelocatable *Relocatable = nullptr;
295   };
296 
297   // Builds information for a canonical address expresion:
298   //   <Relocatable + Offset>(Base, Index, Shift)
299   X86OperandMem *computeAddressOpt(const Inst *Instr, Type MemType,
300                                    Operand *Addr);
301   void doAddressOptOther() override;
302   void doAddressOptLoad() override;
303   void doAddressOptStore() override;
304   void doAddressOptLoadSubVector() override;
305   void doAddressOptStoreSubVector() override;
306   void doMockBoundsCheck(Operand *Opnd) override;
307 
308   /// Naive lowering of cmpxchg.
309   void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected,
310                           Operand *Desired);
311   /// Attempt a more optimized lowering of cmpxchg. Returns true if optimized.
312   bool tryOptimizedCmpxchgCmpBr(Variable *DestPrev, Operand *Ptr,
313                                 Operand *Expected, Operand *Desired);
314   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
315                       Operand *Val);
316   void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal,
317                        Operand *SecondVal);
318   /// Load from memory for a given type.
319   void typedLoad(Type Ty, Variable *Dest, Variable *Base, Constant *Offset);
320   /// Store to memory for a given type.
321   void typedStore(Type Ty, Variable *Value, Variable *Base, Constant *Offset);
322   /// Copy memory of given type from Src to Dest using OffsetAmt on both.
323   void copyMemory(Type Ty, Variable *Dest, Variable *Src, int32_t OffsetAmt);
324   /// Replace some calls to memcpy with inline instructions.
325   void lowerMemcpy(Operand *Dest, Operand *Src, Operand *Count);
326   /// Replace some calls to memmove with inline instructions.
327   void lowerMemmove(Operand *Dest, Operand *Src, Operand *Count);
328   /// Replace some calls to memset with inline instructions.
329   void lowerMemset(Operand *Dest, Operand *Val, Operand *Count);
330 
331   /// Lower an indirect jump .
332   void lowerIndirectJump(Variable *JumpTarget);
333 
334   /// Check the comparison is in [Min,Max]. The flags register will be modified
335   /// with:
336   ///   - below equal, if in range
337   ///   - above, set if not in range
338   /// The index into the range is returned.
339   Operand *lowerCmpRange(Operand *Comparison, uint64_t Min, uint64_t Max);
340   /// Lowering of a cluster of switch cases. If the case is not matched control
341   /// will pass to the default label provided. If the default label is nullptr
342   /// then control will fall through to the next instruction. DoneCmp should be
343   /// true if the flags contain the result of a comparison with the Comparison.
344   void lowerCaseCluster(const CaseCluster &Case, Operand *Src0, bool DoneCmp,
345                         CfgNode *DefaultLabel = nullptr);
346 
347   using LowerBinOp = void (TargetX8664::*)(Variable *, Operand *);
348   void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi,
349                                 Variable *Dest, Operand *Ptr, Operand *Val);
350 
351   void eliminateNextVectorSextInstruction(Variable *SignExtendedResult);
352 
353   void emitStackProbe(size_t StackSizeBytes);
354 
355   /// Emit just the call instruction (without argument or return variable
356   /// processing).
357   Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg,
358                          size_t NumVariadicFpArgs = 0);
359   /// Materialize the moves needed to return a value of the specified type.
360   Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType);
361 
362   /// Emit a jump table to the constant pool.
363   void emitJumpTable(const Cfg *Func,
364                      const InstJumpTable *JumpTable) const override;
365 
366   /// Emit a fake use of esp to make sure esp stays alive for the entire
367   /// function. Otherwise some esp adjustments get dead-code eliminated.
keepEspLiveAtExit()368   void keepEspLiveAtExit() {
369     Variable *esp =
370         Func->getTarget()->getPhysicalRegister(getStackReg(), WordType);
371     Context.insert<InstFakeUse>(esp);
372   }
373 
374   /// Operand legalization helpers. To deal with address mode constraints, the
375   /// helpers will create a new Operand and emit instructions that guarantee
376   /// that the Operand kind is one of those indicated by the LegalMask (a
377   /// bitmask of allowed kinds). If the input Operand is known to already meet
378   /// the constraints, it may be simply returned as the result, without creating
379   /// any new instructions or operands.
380   enum OperandLegalization {
381     Legal_None = 0,
382     Legal_Reg = 1 << 0, // physical register, not stack location
383     Legal_Imm = 1 << 1,
384     Legal_Mem = 1 << 2, // includes [eax+4*ecx] as well as [esp+12]
385     Legal_Rematerializable = 1 << 3,
386     Legal_AddrAbs = 1 << 4, // ConstantRelocatable doesn't have to add RebasePtr
387     Legal_Default = ~(Legal_Rematerializable | Legal_AddrAbs)
388     // TODO(stichnot): Figure out whether this default works for x86-64.
389   };
390   using LegalMask = uint32_t;
391   Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
392                     RegNumT RegNum = RegNumT());
393   Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT());
394   /// Legalize the first source operand for use in the cmp instruction.
395   Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1);
396   /// Turn a pointer operand into a memory operand that can be used by a real
397   /// load/store operation. Legalizes the operand as well. This is a nop if the
398   /// operand is already a legal memory operand.
399   X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty,
400                                    bool DoLegalize = true);
401 
402   Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
403   static Type stackSlotType();
404 
405   static constexpr uint32_t NoSizeLimit = 0;
406   /// Returns the largest type which is equal to or larger than Size bytes. The
407   /// type is suitable for copying memory i.e. a load and store will be a single
408   /// instruction (for example x86 will get f64 not i64).
409   static Type largestTypeInSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit);
410   /// Returns the smallest type which is equal to or larger than Size bytes. If
411   /// one doesn't exist then the largest type smaller than Size bytes is
412   /// returned. The type is suitable for memory copies as described at
413   /// largestTypeInSize.
414   static Type firstTypeThatFitsSize(uint32_t Size,
415                                     uint32_t MaxSize = NoSizeLimit);
416 
417   Variable *copyToReg8(Operand *Src, RegNumT RegNum = RegNumT());
418   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
419 
420   /// Returns a register containing all zeros, without affecting the FLAGS
421   /// register, using the best instruction for the type.
422   Variable *makeZeroedRegister(Type Ty, RegNumT RegNum = RegNumT());
423 
424   /// \name Returns a vector in a register with the given constant entries.
425   /// @{
426   Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
427   Variable *makeVectorOfOnes(Type Ty, RegNumT RegNum = RegNumT());
428   Variable *makeVectorOfMinusOnes(Type Ty, RegNumT RegNum = RegNumT());
429   Variable *makeVectorOfHighOrderBits(Type Ty, RegNumT RegNum = RegNumT());
430   Variable *makeVectorOfFabsMask(Type Ty, RegNumT RegNum = RegNumT());
431   /// @}
432 
433   /// Return a memory operand corresponding to a stack allocated Variable.
434   X86OperandMem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
435                                               uint32_t Offset = 0);
436 
437   /// The following are helpers that insert lowered x86 instructions with
438   /// minimal syntactic overhead, so that the lowering code can look as close to
439   /// assembly as practical.
_adc(Variable * Dest,Operand * Src0)440   void _adc(Variable *Dest, Operand *Src0) {
441     Context.insert<Insts::Adc>(Dest, Src0);
442   }
_adc_rmw(X86OperandMem * DestSrc0,Operand * Src1)443   void _adc_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
444     Context.insert<Insts::AdcRMW>(DestSrc0, Src1);
445   }
_add(Variable * Dest,Operand * Src0)446   void _add(Variable *Dest, Operand *Src0) {
447     Context.insert<Insts::Add>(Dest, Src0);
448   }
_add_rmw(X86OperandMem * DestSrc0,Operand * Src1)449   void _add_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
450     Context.insert<Insts::AddRMW>(DestSrc0, Src1);
451   }
_addps(Variable * Dest,Operand * Src0)452   void _addps(Variable *Dest, Operand *Src0) {
453     Context.insert<Insts::Addps>(Dest, Src0);
454   }
_addss(Variable * Dest,Operand * Src0)455   void _addss(Variable *Dest, Operand *Src0) {
456     Context.insert<Insts::Addss>(Dest, Src0);
457   }
458   void _add_sp(Operand *Adjustment);
_and(Variable * Dest,Operand * Src0)459   void _and(Variable *Dest, Operand *Src0) {
460     Context.insert<Insts::And>(Dest, Src0);
461   }
_andnps(Variable * Dest,Operand * Src0)462   void _andnps(Variable *Dest, Operand *Src0) {
463     Context.insert<Insts::Andnps>(Dest, Src0);
464   }
_andps(Variable * Dest,Operand * Src0)465   void _andps(Variable *Dest, Operand *Src0) {
466     Context.insert<Insts::Andps>(Dest, Src0);
467   }
_and_rmw(X86OperandMem * DestSrc0,Operand * Src1)468   void _and_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
469     Context.insert<Insts::AndRMW>(DestSrc0, Src1);
470   }
_blendvps(Variable * Dest,Operand * Src0,Operand * Src1)471   void _blendvps(Variable *Dest, Operand *Src0, Operand *Src1) {
472     Context.insert<Insts::Blendvps>(Dest, Src0, Src1);
473   }
_br(BrCond Condition,CfgNode * TargetTrue,CfgNode * TargetFalse)474   void _br(BrCond Condition, CfgNode *TargetTrue, CfgNode *TargetFalse) {
475     Context.insert<InstX86Br>(TargetTrue, TargetFalse, Condition,
476                               InstX86Br::Far);
477   }
_br(CfgNode * Target)478   void _br(CfgNode *Target) {
479     Context.insert<InstX86Br>(Target, InstX86Br::Far);
480   }
_br(BrCond Condition,CfgNode * Target)481   void _br(BrCond Condition, CfgNode *Target) {
482     Context.insert<InstX86Br>(Target, Condition, InstX86Br::Far);
483   }
484   void _br(BrCond Condition, InstX86Label *Label,
485            InstX86Br::Mode Kind = InstX86Br::Near) {
486     Context.insert<InstX86Br>(Label, Condition, Kind);
487   }
_bsf(Variable * Dest,Operand * Src0)488   void _bsf(Variable *Dest, Operand *Src0) {
489     Context.insert<Insts::Bsf>(Dest, Src0);
490   }
_bsr(Variable * Dest,Operand * Src0)491   void _bsr(Variable *Dest, Operand *Src0) {
492     Context.insert<Insts::Bsr>(Dest, Src0);
493   }
_bswap(Variable * SrcDest)494   void _bswap(Variable *SrcDest) { Context.insert<Insts::Bswap>(SrcDest); }
_cbwdq(Variable * Dest,Operand * Src0)495   void _cbwdq(Variable *Dest, Operand *Src0) {
496     Context.insert<Insts::Cbwdq>(Dest, Src0);
497   }
_cmov(Variable * Dest,Operand * Src0,BrCond Condition)498   void _cmov(Variable *Dest, Operand *Src0, BrCond Condition) {
499     Context.insert<Insts::Cmov>(Dest, Src0, Condition);
500   }
_cmp(Operand * Src0,Operand * Src1)501   void _cmp(Operand *Src0, Operand *Src1) {
502     Context.insert<Insts::Icmp>(Src0, Src1);
503   }
_cmpps(Variable * Dest,Operand * Src0,CmppsCond Condition)504   void _cmpps(Variable *Dest, Operand *Src0, CmppsCond Condition) {
505     Context.insert<Insts::Cmpps>(Dest, Src0, Condition);
506   }
_cmpxchg(Operand * DestOrAddr,Variable * Eax,Variable * Desired,bool Locked)507   void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired,
508                 bool Locked) {
509     Context.insert<Insts::Cmpxchg>(DestOrAddr, Eax, Desired, Locked);
510     // Mark eax as possibly modified by cmpxchg.
511     Context.insert<InstFakeDef>(Eax, llvm::dyn_cast<Variable>(DestOrAddr));
512     _set_dest_redefined();
513     Context.insert<InstFakeUse>(Eax);
514   }
_cmpxchg8b(X86OperandMem * Addr,Variable * Edx,Variable * Eax,Variable * Ecx,Variable * Ebx,bool Locked)515   void _cmpxchg8b(X86OperandMem *Addr, Variable *Edx, Variable *Eax,
516                   Variable *Ecx, Variable *Ebx, bool Locked) {
517     Context.insert<Insts::Cmpxchg8b>(Addr, Edx, Eax, Ecx, Ebx, Locked);
518     // Mark edx, and eax as possibly modified by cmpxchg8b.
519     Context.insert<InstFakeDef>(Edx);
520     _set_dest_redefined();
521     Context.insert<InstFakeUse>(Edx);
522     Context.insert<InstFakeDef>(Eax);
523     _set_dest_redefined();
524     Context.insert<InstFakeUse>(Eax);
525   }
_cvt(Variable * Dest,Operand * Src0,Insts::Cvt::CvtVariant Variant)526   void _cvt(Variable *Dest, Operand *Src0, Insts::Cvt::CvtVariant Variant) {
527     Context.insert<Insts::Cvt>(Dest, Src0, Variant);
528   }
_round(Variable * Dest,Operand * Src0,Operand * Imm)529   void _round(Variable *Dest, Operand *Src0, Operand *Imm) {
530     Context.insert<Insts::Round>(Dest, Src0, Imm);
531   }
_div(Variable * Dest,Operand * Src0,Operand * Src1)532   void _div(Variable *Dest, Operand *Src0, Operand *Src1) {
533     Context.insert<Insts::Div>(Dest, Src0, Src1);
534   }
_divps(Variable * Dest,Operand * Src0)535   void _divps(Variable *Dest, Operand *Src0) {
536     Context.insert<Insts::Divps>(Dest, Src0);
537   }
_divss(Variable * Dest,Operand * Src0)538   void _divss(Variable *Dest, Operand *Src0) {
539     Context.insert<Insts::Divss>(Dest, Src0);
540   }
_idiv(Variable * Dest,Operand * Src0,Operand * Src1)541   void _idiv(Variable *Dest, Operand *Src0, Operand *Src1) {
542     Context.insert<Insts::Idiv>(Dest, Src0, Src1);
543   }
_imul(Variable * Dest,Operand * Src0)544   void _imul(Variable *Dest, Operand *Src0) {
545     Context.insert<Insts::Imul>(Dest, Src0);
546   }
_imul_imm(Variable * Dest,Operand * Src0,Constant * Imm)547   void _imul_imm(Variable *Dest, Operand *Src0, Constant *Imm) {
548     Context.insert<Insts::ImulImm>(Dest, Src0, Imm);
549   }
_insertps(Variable * Dest,Operand * Src0,Operand * Src1)550   void _insertps(Variable *Dest, Operand *Src0, Operand *Src1) {
551     Context.insert<Insts::Insertps>(Dest, Src0, Src1);
552   }
_int3()553   void _int3() { Context.insert<Insts::Int3>(); }
_jmp(Operand * Target)554   void _jmp(Operand *Target) { Context.insert<Insts::Jmp>(Target); }
_lea(Variable * Dest,Operand * Src0)555   void _lea(Variable *Dest, Operand *Src0) {
556     Context.insert<Insts::Lea>(Dest, Src0);
557   }
558   void _link_bp();
559   void _push_reg(RegNumT RegNum);
560   void _pop_reg(RegNumT RegNum);
_mfence()561   void _mfence() { Context.insert<Insts::Mfence>(); }
562   /// Moves can be used to redefine registers, creating "partial kills" for
563   /// liveness.  Mark where moves are used in this way.
564   void _redefined(Inst *MovInst, bool IsRedefinition = true) {
565     if (IsRedefinition)
566       MovInst->setDestRedefined();
567   }
568   /// If Dest=nullptr is passed in, then a new variable is created, marked as
569   /// infinite register allocation weight, and returned through the in/out Dest
570   /// argument.
571   Insts::Mov *_mov(Variable *&Dest, Operand *Src0, RegNumT RegNum = RegNumT()) {
572     if (Dest == nullptr)
573       Dest = makeReg(Src0->getType(), RegNum);
574     return Context.insert<Insts::Mov>(Dest, Src0);
575   }
576   void _mov_sp(Operand *NewValue);
_movp(Variable * Dest,Operand * Src0)577   Insts::Movp *_movp(Variable *Dest, Operand *Src0) {
578     return Context.insert<Insts::Movp>(Dest, Src0);
579   }
_movd(Variable * Dest,Operand * Src0)580   void _movd(Variable *Dest, Operand *Src0) {
581     Context.insert<Insts::Movd>(Dest, Src0);
582   }
_movq(Variable * Dest,Operand * Src0)583   void _movq(Variable *Dest, Operand *Src0) {
584     Context.insert<Insts::Movq>(Dest, Src0);
585   }
_movss(Variable * Dest,Variable * Src0)586   void _movss(Variable *Dest, Variable *Src0) {
587     Context.insert<Insts::MovssRegs>(Dest, Src0);
588   }
_movsx(Variable * Dest,Operand * Src0)589   void _movsx(Variable *Dest, Operand *Src0) {
590     Context.insert<Insts::Movsx>(Dest, Src0);
591   }
_movzx(Variable * Dest,Operand * Src0)592   Insts::Movzx *_movzx(Variable *Dest, Operand *Src0) {
593     return Context.insert<Insts::Movzx>(Dest, Src0);
594   }
_maxss(Variable * Dest,Operand * Src0)595   void _maxss(Variable *Dest, Operand *Src0) {
596     Context.insert<Insts::Maxss>(Dest, Src0);
597   }
_minss(Variable * Dest,Operand * Src0)598   void _minss(Variable *Dest, Operand *Src0) {
599     Context.insert<Insts::Minss>(Dest, Src0);
600   }
_maxps(Variable * Dest,Operand * Src0)601   void _maxps(Variable *Dest, Operand *Src0) {
602     Context.insert<Insts::Maxps>(Dest, Src0);
603   }
_minps(Variable * Dest,Operand * Src0)604   void _minps(Variable *Dest, Operand *Src0) {
605     Context.insert<Insts::Minps>(Dest, Src0);
606   }
_mul(Variable * Dest,Variable * Src0,Operand * Src1)607   void _mul(Variable *Dest, Variable *Src0, Operand *Src1) {
608     Context.insert<Insts::Mul>(Dest, Src0, Src1);
609   }
_mulps(Variable * Dest,Operand * Src0)610   void _mulps(Variable *Dest, Operand *Src0) {
611     Context.insert<Insts::Mulps>(Dest, Src0);
612   }
_mulss(Variable * Dest,Operand * Src0)613   void _mulss(Variable *Dest, Operand *Src0) {
614     Context.insert<Insts::Mulss>(Dest, Src0);
615   }
_neg(Variable * SrcDest)616   void _neg(Variable *SrcDest) { Context.insert<Insts::Neg>(SrcDest); }
_nop(SizeT Variant)617   void _nop(SizeT Variant) { Context.insert<Insts::Nop>(Variant); }
_or(Variable * Dest,Operand * Src0)618   void _or(Variable *Dest, Operand *Src0) {
619     Context.insert<Insts::Or>(Dest, Src0);
620   }
_orps(Variable * Dest,Operand * Src0)621   void _orps(Variable *Dest, Operand *Src0) {
622     Context.insert<Insts::Orps>(Dest, Src0);
623   }
_or_rmw(X86OperandMem * DestSrc0,Operand * Src1)624   void _or_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
625     Context.insert<Insts::OrRMW>(DestSrc0, Src1);
626   }
_padd(Variable * Dest,Operand * Src0)627   void _padd(Variable *Dest, Operand *Src0) {
628     Context.insert<Insts::Padd>(Dest, Src0);
629   }
_padds(Variable * Dest,Operand * Src0)630   void _padds(Variable *Dest, Operand *Src0) {
631     Context.insert<Insts::Padds>(Dest, Src0);
632   }
_paddus(Variable * Dest,Operand * Src0)633   void _paddus(Variable *Dest, Operand *Src0) {
634     Context.insert<Insts::Paddus>(Dest, Src0);
635   }
_pand(Variable * Dest,Operand * Src0)636   void _pand(Variable *Dest, Operand *Src0) {
637     Context.insert<Insts::Pand>(Dest, Src0);
638   }
_pandn(Variable * Dest,Operand * Src0)639   void _pandn(Variable *Dest, Operand *Src0) {
640     Context.insert<Insts::Pandn>(Dest, Src0);
641   }
_pblendvb(Variable * Dest,Operand * Src0,Operand * Src1)642   void _pblendvb(Variable *Dest, Operand *Src0, Operand *Src1) {
643     Context.insert<Insts::Pblendvb>(Dest, Src0, Src1);
644   }
645   void _pcmpeq(Variable *Dest, Operand *Src0,
646                Type ArithmeticTypeOverride = IceType_void) {
647     Context.insert<Insts::Pcmpeq>(Dest, Src0, ArithmeticTypeOverride);
648   }
_pcmpgt(Variable * Dest,Operand * Src0)649   void _pcmpgt(Variable *Dest, Operand *Src0) {
650     Context.insert<Insts::Pcmpgt>(Dest, Src0);
651   }
_pextr(Variable * Dest,Operand * Src0,Operand * Src1)652   void _pextr(Variable *Dest, Operand *Src0, Operand *Src1) {
653     Context.insert<Insts::Pextr>(Dest, Src0, Src1);
654   }
_pinsr(Variable * Dest,Operand * Src0,Operand * Src1)655   void _pinsr(Variable *Dest, Operand *Src0, Operand *Src1) {
656     Context.insert<Insts::Pinsr>(Dest, Src0, Src1);
657   }
_pmull(Variable * Dest,Operand * Src0)658   void _pmull(Variable *Dest, Operand *Src0) {
659     Context.insert<Insts::Pmull>(Dest, Src0);
660   }
_pmulhw(Variable * Dest,Operand * Src0)661   void _pmulhw(Variable *Dest, Operand *Src0) {
662     Context.insert<Insts::Pmulhw>(Dest, Src0);
663   }
_pmulhuw(Variable * Dest,Operand * Src0)664   void _pmulhuw(Variable *Dest, Operand *Src0) {
665     Context.insert<Insts::Pmulhuw>(Dest, Src0);
666   }
_pmaddwd(Variable * Dest,Operand * Src0)667   void _pmaddwd(Variable *Dest, Operand *Src0) {
668     Context.insert<Insts::Pmaddwd>(Dest, Src0);
669   }
_pmuludq(Variable * Dest,Operand * Src0)670   void _pmuludq(Variable *Dest, Operand *Src0) {
671     Context.insert<Insts::Pmuludq>(Dest, Src0);
672   }
_pop(Variable * Dest)673   void _pop(Variable *Dest) { Context.insert<Insts::Pop>(Dest); }
_por(Variable * Dest,Operand * Src0)674   void _por(Variable *Dest, Operand *Src0) {
675     Context.insert<Insts::Por>(Dest, Src0);
676   }
_punpckl(Variable * Dest,Operand * Src0)677   void _punpckl(Variable *Dest, Operand *Src0) {
678     Context.insert<Insts::Punpckl>(Dest, Src0);
679   }
_punpckh(Variable * Dest,Operand * Src0)680   void _punpckh(Variable *Dest, Operand *Src0) {
681     Context.insert<Insts::Punpckh>(Dest, Src0);
682   }
_packss(Variable * Dest,Operand * Src0)683   void _packss(Variable *Dest, Operand *Src0) {
684     Context.insert<Insts::Packss>(Dest, Src0);
685   }
_packus(Variable * Dest,Operand * Src0)686   void _packus(Variable *Dest, Operand *Src0) {
687     Context.insert<Insts::Packus>(Dest, Src0);
688   }
_pshufb(Variable * Dest,Operand * Src0)689   void _pshufb(Variable *Dest, Operand *Src0) {
690     Context.insert<Insts::Pshufb>(Dest, Src0);
691   }
_pshufd(Variable * Dest,Operand * Src0,Operand * Src1)692   void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
693     Context.insert<Insts::Pshufd>(Dest, Src0, Src1);
694   }
_psll(Variable * Dest,Operand * Src0)695   void _psll(Variable *Dest, Operand *Src0) {
696     Context.insert<Insts::Psll>(Dest, Src0);
697   }
_psra(Variable * Dest,Operand * Src0)698   void _psra(Variable *Dest, Operand *Src0) {
699     Context.insert<Insts::Psra>(Dest, Src0);
700   }
_psrl(Variable * Dest,Operand * Src0)701   void _psrl(Variable *Dest, Operand *Src0) {
702     Context.insert<Insts::Psrl>(Dest, Src0);
703   }
_psub(Variable * Dest,Operand * Src0)704   void _psub(Variable *Dest, Operand *Src0) {
705     Context.insert<Insts::Psub>(Dest, Src0);
706   }
_psubs(Variable * Dest,Operand * Src0)707   void _psubs(Variable *Dest, Operand *Src0) {
708     Context.insert<Insts::Psubs>(Dest, Src0);
709   }
_psubus(Variable * Dest,Operand * Src0)710   void _psubus(Variable *Dest, Operand *Src0) {
711     Context.insert<Insts::Psubus>(Dest, Src0);
712   }
_push(Operand * Src0)713   void _push(Operand *Src0) { Context.insert<Insts::Push>(Src0); }
_pxor(Variable * Dest,Operand * Src0)714   void _pxor(Variable *Dest, Operand *Src0) {
715     Context.insert<Insts::Pxor>(Dest, Src0);
716   }
717   void _ret(Variable *Src0 = nullptr) { Context.insert<Insts::Ret>(Src0); }
_rol(Variable * Dest,Operand * Src0)718   void _rol(Variable *Dest, Operand *Src0) {
719     Context.insert<Insts::Rol>(Dest, Src0);
720   }
_round(Variable * Dest,Operand * Src,Constant * Imm)721   void _round(Variable *Dest, Operand *Src, Constant *Imm) {
722     Context.insert<Insts::Round>(Dest, Src, Imm);
723   }
_sar(Variable * Dest,Operand * Src0)724   void _sar(Variable *Dest, Operand *Src0) {
725     Context.insert<Insts::Sar>(Dest, Src0);
726   }
_sbb(Variable * Dest,Operand * Src0)727   void _sbb(Variable *Dest, Operand *Src0) {
728     Context.insert<Insts::Sbb>(Dest, Src0);
729   }
_sbb_rmw(X86OperandMem * DestSrc0,Operand * Src1)730   void _sbb_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
731     Context.insert<Insts::SbbRMW>(DestSrc0, Src1);
732   }
_setcc(Variable * Dest,BrCond Condition)733   void _setcc(Variable *Dest, BrCond Condition) {
734     Context.insert<Insts::Setcc>(Dest, Condition);
735   }
_shl(Variable * Dest,Operand * Src0)736   void _shl(Variable *Dest, Operand *Src0) {
737     Context.insert<Insts::Shl>(Dest, Src0);
738   }
_shld(Variable * Dest,Variable * Src0,Operand * Src1)739   void _shld(Variable *Dest, Variable *Src0, Operand *Src1) {
740     Context.insert<Insts::Shld>(Dest, Src0, Src1);
741   }
_shr(Variable * Dest,Operand * Src0)742   void _shr(Variable *Dest, Operand *Src0) {
743     Context.insert<Insts::Shr>(Dest, Src0);
744   }
_shrd(Variable * Dest,Variable * Src0,Operand * Src1)745   void _shrd(Variable *Dest, Variable *Src0, Operand *Src1) {
746     Context.insert<Insts::Shrd>(Dest, Src0, Src1);
747   }
_shufps(Variable * Dest,Operand * Src0,Operand * Src1)748   void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) {
749     Context.insert<Insts::Shufps>(Dest, Src0, Src1);
750   }
_movmsk(Variable * Dest,Operand * Src0)751   void _movmsk(Variable *Dest, Operand *Src0) {
752     Context.insert<Insts::Movmsk>(Dest, Src0);
753   }
_sqrt(Variable * Dest,Operand * Src0)754   void _sqrt(Variable *Dest, Operand *Src0) {
755     Context.insert<Insts::Sqrt>(Dest, Src0);
756   }
_store(Operand * Value,X86Operand * Mem)757   void _store(Operand *Value, X86Operand *Mem) {
758     Context.insert<Insts::Store>(Value, Mem);
759   }
_storep(Variable * Value,X86OperandMem * Mem)760   void _storep(Variable *Value, X86OperandMem *Mem) {
761     Context.insert<Insts::StoreP>(Value, Mem);
762   }
_storeq(Operand * Value,X86OperandMem * Mem)763   void _storeq(Operand *Value, X86OperandMem *Mem) {
764     Context.insert<Insts::StoreQ>(Value, Mem);
765   }
_stored(Operand * Value,X86OperandMem * Mem)766   void _stored(Operand *Value, X86OperandMem *Mem) {
767     Context.insert<Insts::StoreD>(Value, Mem);
768   }
_sub(Variable * Dest,Operand * Src0)769   void _sub(Variable *Dest, Operand *Src0) {
770     Context.insert<Insts::Sub>(Dest, Src0);
771   }
_sub_rmw(X86OperandMem * DestSrc0,Operand * Src1)772   void _sub_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
773     Context.insert<Insts::SubRMW>(DestSrc0, Src1);
774   }
775   void _sub_sp(Operand *Adjustment);
_subps(Variable * Dest,Operand * Src0)776   void _subps(Variable *Dest, Operand *Src0) {
777     Context.insert<Insts::Subps>(Dest, Src0);
778   }
_subss(Variable * Dest,Operand * Src0)779   void _subss(Variable *Dest, Operand *Src0) {
780     Context.insert<Insts::Subss>(Dest, Src0);
781   }
_test(Operand * Src0,Operand * Src1)782   void _test(Operand *Src0, Operand *Src1) {
783     Context.insert<Insts::Test>(Src0, Src1);
784   }
_ucomiss(Operand * Src0,Operand * Src1)785   void _ucomiss(Operand *Src0, Operand *Src1) {
786     Context.insert<Insts::Ucomiss>(Src0, Src1);
787   }
_ud2()788   void _ud2() { Context.insert<Insts::UD2>(); }
789   void _unlink_bp();
_xadd(Operand * Dest,Variable * Src,bool Locked)790   void _xadd(Operand *Dest, Variable *Src, bool Locked) {
791     Context.insert<Insts::Xadd>(Dest, Src, Locked);
792     // The xadd exchanges Dest and Src (modifying Src). Model that update with
793     // a FakeDef followed by a FakeUse.
794     Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest));
795     _set_dest_redefined();
796     Context.insert<InstFakeUse>(Src);
797   }
_xchg(Operand * Dest,Variable * Src)798   void _xchg(Operand *Dest, Variable *Src) {
799     Context.insert<Insts::Xchg>(Dest, Src);
800     // The xchg modifies Dest and Src -- model that update with a
801     // FakeDef/FakeUse.
802     Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest));
803     _set_dest_redefined();
804     Context.insert<InstFakeUse>(Src);
805   }
_xor(Variable * Dest,Operand * Src0)806   void _xor(Variable *Dest, Operand *Src0) {
807     Context.insert<Insts::Xor>(Dest, Src0);
808   }
_xorps(Variable * Dest,Operand * Src0)809   void _xorps(Variable *Dest, Operand *Src0) {
810     Context.insert<Insts::Xorps>(Dest, Src0);
811   }
_xor_rmw(X86OperandMem * DestSrc0,Operand * Src1)812   void _xor_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
813     Context.insert<Insts::XorRMW>(DestSrc0, Src1);
814   }
815 
_iaca_start()816   void _iaca_start() {
817     if (!BuildDefs::minimal())
818       Context.insert<Insts::IacaStart>();
819   }
_iaca_end()820   void _iaca_end() {
821     if (!BuildDefs::minimal())
822       Context.insert<Insts::IacaEnd>();
823   }
824 
825   /// This class helps wrap IACA markers around the code generated by the
826   /// current scope. It means you don't need to put an end before each return.
827   class ScopedIacaMark {
828     ScopedIacaMark(const ScopedIacaMark &) = delete;
829     ScopedIacaMark &operator=(const ScopedIacaMark &) = delete;
830 
831   public:
ScopedIacaMark(TargetX8664 * Lowering)832     ScopedIacaMark(TargetX8664 *Lowering) : Lowering(Lowering) {
833       Lowering->_iaca_start();
834     }
~ScopedIacaMark()835     ~ScopedIacaMark() { end(); }
end()836     void end() {
837       if (!Lowering)
838         return;
839       Lowering->_iaca_end();
840       Lowering = nullptr;
841     }
842 
843   private:
844     TargetX8664 *Lowering;
845   };
846 
847   bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1);
848   void findRMW();
849 
850   static uint32_t applyStackAlignment(uint32_t Value);
851 
852   bool IsEbpBasedFrame = false;
853 
854   /// Stack alignment guaranteed by the ABI.
855   static constexpr uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
856   /// Stack alignment required by the currently lowered function.
857   const uint32_t RequiredStackAlignment = X86_STACK_ALIGNMENT_BYTES;
858   size_t SpillAreaSizeBytes = 0;
859   size_t FixedAllocaSizeBytes = 0;
860   size_t FixedAllocaAlignBytes = 0;
861   bool PrologEmitsFixedAllocas = false;
862   uint32_t MaxOutArgsSizeBytes = 0;
863   static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSet;
864   static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSetUnfiltered;
865   static std::array<SmallBitVector, RegisterSet::Reg_NUM> RegisterAliases;
866   SmallBitVector RegsUsed;
867   std::array<VarList, IceType_NUM> PhysicalRegisters;
868 
869 private:
870   void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi,
871                     Operand *Src1Lo, Variable *DestLo, Variable *DestHi);
872 
873   /// Emit the code for a combined operation and consumer instruction, or set
874   /// the destination variable of the operation if Consumer == nullptr.
875   void lowerIcmpAndConsumer(const InstIcmp *Icmp, const Inst *Consumer);
876   void lowerFcmpAndConsumer(const InstFcmp *Fcmp, const Inst *Consumer);
877   void lowerArithAndConsumer(const InstArithmetic *Arith, const Inst *Consumer);
878 
879   /// Emit a setcc instruction if Consumer == nullptr; otherwise emit a
880   /// specialized version of Consumer.
881   void setccOrConsumer(BrCond Condition, Variable *Dest, const Inst *Consumer);
882 
883   /// Emit a mov [1|0] instruction if Consumer == nullptr; otherwise emit a
884   /// specialized version of Consumer.
885   void movOrConsumer(bool IcmpResult, Variable *Dest, const Inst *Consumer);
886 
887   /// Emit the code for instructions with a vector type.
888   void lowerIcmpVector(const InstIcmp *Icmp);
889   void lowerFcmpVector(const InstFcmp *Icmp);
890   void lowerSelectVector(const InstSelect *Instr);
891 
892   /// Helpers for select lowering.
893   void lowerSelectMove(Variable *Dest, BrCond Cond, Operand *SrcT,
894                        Operand *SrcF);
895   void lowerSelectIntMove(Variable *Dest, BrCond Cond, Operand *SrcT,
896                           Operand *SrcF);
897   /// Generic helper to move an arbitrary type from Src to Dest.
898   void lowerMove(Variable *Dest, Operand *Src, bool IsRedefinition);
899 
900   /// Optimizations for idiom recognition.
901   bool lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, const InstSelect *Select);
902 
903   BoolFolding FoldingInfo;
904 
905   /// Helpers for lowering ShuffleVector
906   /// @{
907   Variable *lowerShuffleVector_AllFromSameSrc(Operand *Src, SizeT Index0,
908                                               SizeT Index1, SizeT Index2,
909                                               SizeT Index3);
910   static constexpr SizeT IGNORE_INDEX = 0x80000000u;
911   Variable *lowerShuffleVector_TwoFromSameSrc(Operand *Src0, SizeT Index0,
912                                               SizeT Index1, Operand *Src1,
913                                               SizeT Index2, SizeT Index3);
914   static constexpr SizeT UNIFIED_INDEX_0 = 0;
915   static constexpr SizeT UNIFIED_INDEX_1 = 2;
916   Variable *lowerShuffleVector_UnifyFromDifferentSrcs(Operand *Src0,
917                                                       SizeT Index0,
918                                                       Operand *Src1,
919                                                       SizeT Index1);
920   static constexpr SizeT CLEAR_ALL_BITS = 0x80;
921   SizeT PshufbMaskCount = 0;
922   GlobalString lowerShuffleVector_NewMaskName();
923   ConstantRelocatable *lowerShuffleVector_CreatePshufbMask(
924       int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4,
925       int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9,
926       int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14,
927       int8_t Idx15);
928   void lowerShuffleVector_UsingPshufb(Variable *Dest, Operand *Src0,
929                                       Operand *Src1, int8_t Idx0, int8_t Idx1,
930                                       int8_t Idx2, int8_t Idx3, int8_t Idx4,
931                                       int8_t Idx5, int8_t Idx6, int8_t Idx7,
932                                       int8_t Idx8, int8_t Idx9, int8_t Idx10,
933                                       int8_t Idx11, int8_t Idx12, int8_t Idx13,
934                                       int8_t Idx14, int8_t Idx15);
935   /// @}
936 
937   /// The following table summarizes the logic for lowering the fcmp
938   /// instruction. There is one table entry for each of the 16 conditions.
939   ///
940   /// The first four columns describe the case when the operands are floating
941   /// point scalar values. A comment in lowerFcmp() describes the lowering
942   /// template. In the most general case, there is a compare followed by two
943   /// conditional branches, because some fcmp conditions don't map to a single
944   /// x86 conditional branch. However, in many cases it is possible to swap the
945   /// operands in the comparison and have a single conditional branch. Since
946   /// it's quite tedious to validate the table by hand, good execution tests are
947   /// helpful.
948   ///
949   /// The last two columns describe the case when the operands are vectors of
950   /// floating point values. For most fcmp conditions, there is a clear mapping
951   /// to a single x86 cmpps instruction variant. Some fcmp conditions require
952   /// special code to handle and these are marked in the table with a
953   /// Cmpps_Invalid predicate.
954   /// {@
955   static const struct TableFcmpType {
956     uint32_t Default;
957     bool SwapScalarOperands;
958     CondX86::BrCond C1, C2;
959     bool SwapVectorOperands;
960     CondX86::CmppsCond Predicate;
961   } TableFcmp[];
962   static const size_t TableFcmpSize;
963   /// @}
964 
965   /// The following table summarizes the logic for lowering the icmp instruction
966   /// for i32 and narrower types. Each icmp condition has a clear mapping to an
967   /// x86 conditional branch instruction.
968   /// {@
969   static const struct TableIcmp32Type {
970     CondX86::BrCond Mapping;
971   } TableIcmp32[];
972   static const size_t TableIcmp32Size;
973   /// @}
974 
975   /// The following table summarizes the logic for lowering the icmp instruction
976   /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
977   /// conditional branches are needed. For the other conditions, three separate
978   /// conditional branches are needed.
979   /// {@
980   static const struct TableIcmp64Type {
981     CondX86::BrCond C1, C2, C3;
982   } TableIcmp64[];
983   static const size_t TableIcmp64Size;
984   /// @}
985 
getIcmp32Mapping(InstIcmp::ICond Cond)986   static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
987     assert(Cond < TableIcmp32Size);
988     return TableIcmp32[Cond].Mapping;
989   }
990 
991 public:
create(Cfg * Func)992   static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
993     return makeUnique<TargetX8664>(Func);
994   }
995 
createAssembler()996   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
997     return makeUnique<X8664::AssemblerX8664>();
998   }
999 
1000 private:
1001   ENABLE_MAKE_UNIQUE;
1002 
1003   explicit TargetX8664(Cfg *Func);
1004 };
1005 
1006 class TargetDataX8664 final : public TargetDataLowering {
1007   TargetDataX8664() = delete;
1008   TargetDataX8664(const TargetDataX8664 &) = delete;
1009   TargetDataX8664 &operator=(const TargetDataX8664 &) = delete;
1010 
1011 public:
1012   ~TargetDataX8664() override = default;
1013 
create(GlobalContext * Ctx)1014   static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
1015     return makeUnique<TargetDataX8664>(Ctx);
1016   }
1017 
1018   void lowerGlobals(const VariableDeclarationList &Vars,
1019                     const std::string &SectionSuffix) override;
1020   void lowerConstants() override;
1021   void lowerJumpTables() override;
1022 
1023 private:
1024   ENABLE_MAKE_UNIQUE;
1025 
TargetDataX8664(GlobalContext * Ctx)1026   explicit TargetDataX8664(GlobalContext *Ctx) : TargetDataLowering(Ctx) {}
1027   template <typename T> static void emitConstantPool(GlobalContext *Ctx);
1028 };
1029 
1030 class TargetHeaderX86 : public TargetHeaderLowering {
1031   TargetHeaderX86() = delete;
1032   TargetHeaderX86(const TargetHeaderX86 &) = delete;
1033   TargetHeaderX86 &operator=(const TargetHeaderX86 &) = delete;
1034 
1035 public:
1036   ~TargetHeaderX86() = default;
1037 
create(GlobalContext * Ctx)1038   static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
1039     return makeUnique<TargetHeaderX86>(Ctx);
1040   }
1041 
1042 private:
1043   ENABLE_MAKE_UNIQUE;
1044 
TargetHeaderX86(GlobalContext * Ctx)1045   explicit TargetHeaderX86(GlobalContext *Ctx) : TargetHeaderLowering(Ctx) {}
1046 };
1047 
1048 } // end of namespace X8664
1049 } // end of namespace Ice
1050 
1051 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8664_H
1052