xref: /aosp_15_r20/external/swiftshader/third_party/subzero/src/IceTargetLoweringX8632.h (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 //===- subzero/src/IceTargetLoweringX8632.h - x86-32 lowering ---*- C++ -*-===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares the TargetLoweringX8632 class, which implements the
12 /// TargetLowering interface for the x86-32 architecture.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8632_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGX8632_H
18 
19 #include "IceAssemblerX8632.h"
20 #include "IceDefs.h"
21 #include "IceInst.h"
22 #include "IceInstX8632.h"
23 #include "IceRegistersX8632.h"
24 #include "IceSwitchLowering.h"
25 #include "IceTargetLoweringX86.h"
26 #include "IceTargetLoweringX86RegClass.h"
27 #include "IceUtils.h"
28 
29 #include <array>
30 #include <type_traits>
31 #include <utility>
32 
33 namespace Ice {
34 namespace X8632 {
35 
36 using namespace ::Ice::X86;
37 
38 constexpr Type WordType = IceType_i32;
39 
40 class BoolFoldingEntry {
41   BoolFoldingEntry(const BoolFoldingEntry &) = delete;
42 
43 public:
44   BoolFoldingEntry() = default;
45   explicit BoolFoldingEntry(Inst *I);
46   BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
47   /// Instr is the instruction producing the i1-type variable of interest.
48   Inst *Instr = nullptr;
49   /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
50   bool IsComplex = false;
51   /// IsLiveOut is initialized conservatively to true, and is set to false when
52   /// we encounter an instruction that ends Var's live range. We disable the
53   /// folding optimization when Var is live beyond this basic block. Note that
54   /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
55   /// always be true and the folding optimization will never be performed.
56   bool IsLiveOut = true;
57   // NumUses counts the number of times Var is used as a source operand in the
58   // basic block. If IsComplex is true and there is more than one use of Var,
59   // then the folding optimization is disabled for Var.
60   uint32_t NumUses = 0;
61 };
62 
63 class BoolFolding {
64 public:
65   enum BoolFoldingProducerKind {
66     PK_None,
67     // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
68     PK_Icmp32,
69     PK_Icmp64,
70     PK_Fcmp,
71     PK_Trunc,
72     PK_Arith // A flag-setting arithmetic instruction.
73   };
74 
75   /// Currently the actual enum values are not used (other than CK_None), but we
76   /// go ahead and produce them anyway for symmetry with the
77   /// BoolFoldingProducerKind.
78   enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
79 
80 private:
81   BoolFolding(const BoolFolding &) = delete;
82   BoolFolding &operator=(const BoolFolding &) = delete;
83 
84 public:
85   BoolFolding() = default;
86   static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
87   static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
88   static bool hasComplexLowering(const Inst *Instr);
89   static bool isValidFolding(BoolFoldingProducerKind ProducerKind,
90                              BoolFoldingConsumerKind ConsumerKind);
91   void init(CfgNode *Node);
92   const Inst *getProducerFor(const Operand *Opnd) const;
93   void dump(const Cfg *Func) const;
94 
95 private:
96   /// Returns true if Producers contains a valid entry for the given VarNum.
containsValid(SizeT VarNum)97   bool containsValid(SizeT VarNum) const {
98     auto Element = Producers.find(VarNum);
99     return Element != Producers.end() && Element->second.Instr != nullptr;
100   }
setInvalid(SizeT VarNum)101   void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
102   void invalidateProducersOnStore(const Inst *Instr);
103   /// Producers maps Variable::Number to a BoolFoldingEntry.
104   CfgUnorderedMap<SizeT, BoolFoldingEntry> Producers;
105 };
106 
107 class TargetX8632 : public TargetX86 {
108   TargetX8632() = delete;
109   TargetX8632(const TargetX8632 &) = delete;
110   TargetX8632 &operator=(const TargetX8632 &) = delete;
111 
112   friend class BoolFolding;
113 
114 public:
115   using BrCond = CondX86::BrCond;
116   using CmppsCond = CondX86::CmppsCond;
117 
118   using SegmentRegisters = X86OperandMem::SegmentRegisters;
119 
120   using InstX86Br = Insts::Br;
121   using InstX86FakeRMW = Insts::FakeRMW;
122   using InstX86Label = Insts::Label;
123 
124   ~TargetX8632() override = default;
125 
126   static void staticInit(GlobalContext *Ctx);
127   static bool shouldBePooled(const Constant *C);
128   static ::Ice::Type getPointerType();
129 
130   void translateOm1() override;
131   void translateO2() override;
132   void doLoadOpt();
133   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
134 
getNumRegisters()135   SizeT getNumRegisters() const override { return RegisterSet::Reg_NUM; }
136 
createLoweredMove(Variable * Dest,Variable * SrcVar)137   Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) override {
138     if (isVectorType(Dest->getType())) {
139       return Insts::Movp::create(Func, Dest, SrcVar);
140     }
141     return Insts::Mov::create(Func, Dest, SrcVar);
142     (void)Dest;
143     (void)SrcVar;
144     return nullptr;
145   }
146 
147   Variable *getPhysicalRegister(RegNumT RegNum,
148                                 Type Ty = IceType_void) override;
149   const char *getRegName(RegNumT RegNum, Type Ty) const override;
getRegClassName(RegClass C)150   static const char *getRegClassName(RegClass C) {
151     auto ClassNum = static_cast<RegClassX86>(C);
152     assert(ClassNum < RCX86_NUM);
153     switch (ClassNum) {
154     default:
155       assert(C < RC_Target);
156       return regClassString(C);
157     case RCX86_Is64To8:
158       return "i64to8"; // 64-bit GPR truncable to i8
159     case RCX86_Is32To8:
160       return "i32to8"; // 32-bit GPR truncable to i8
161     case RCX86_Is16To8:
162       return "i16to8"; // 16-bit GPR truncable to i8
163     case RCX86_IsTrunc8Rcvr:
164       return "i8from"; // 8-bit GPR truncable from wider GPRs
165     case RCX86_IsAhRcvr:
166       return "i8fromah"; // 8-bit GPR that ah can be assigned to
167     }
168   }
169   SmallBitVector getRegisterSet(RegSetMask Include,
170                                 RegSetMask Exclude) const override;
171   const SmallBitVector &
getRegistersForVariable(const Variable * Var)172   getRegistersForVariable(const Variable *Var) const override {
173     RegClass RC = Var->getRegClass();
174     assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
175     return TypeToRegisterSet[RC];
176   }
177 
178   const SmallBitVector &
getAllRegistersForVariable(const Variable * Var)179   getAllRegistersForVariable(const Variable *Var) const override {
180     RegClass RC = Var->getRegClass();
181     assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
182     return TypeToRegisterSetUnfiltered[RC];
183   }
184 
getAliasesForRegister(RegNumT Reg)185   const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
186     Reg.assertIsValid();
187     return RegisterAliases[Reg];
188   }
189 
hasFramePointer()190   bool hasFramePointer() const override { return IsEbpBasedFrame; }
setHasFramePointer()191   void setHasFramePointer() override { IsEbpBasedFrame = true; }
getStackReg()192   RegNumT getStackReg() const override { return RegX8632::Reg_esp; }
getFrameReg()193   RegNumT getFrameReg() const override { return RegX8632::Reg_ebp; }
getFrameOrStackReg()194   RegNumT getFrameOrStackReg() const override {
195     // If the stack pointer needs to be aligned, then the frame pointer is
196     // unaligned, so always use the stack pointer.
197     if (needsStackPointerAlignment())
198       return getStackReg();
199     return IsEbpBasedFrame ? getFrameReg() : getStackReg();
200   }
typeWidthInBytesOnStack(Type Ty)201   size_t typeWidthInBytesOnStack(Type Ty) const override {
202     // Round up to the next multiple of WordType bytes.
203     const uint32_t WordSizeInBytes = typeWidthInBytes(WordType);
204     return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes);
205   }
getStackAlignment()206   uint32_t getStackAlignment() const override {
207     return X86_STACK_ALIGNMENT_BYTES;
208   }
needsStackPointerAlignment()209   bool needsStackPointerAlignment() const override {
210     // If the ABI's stack alignment is smaller than the vector size (16 bytes),
211     // use the (realigned) stack pointer for addressing any stack variables.
212     return X86_STACK_ALIGNMENT_BYTES < 16;
213   }
reserveFixedAllocaArea(size_t Size,size_t Align)214   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
215     FixedAllocaSizeBytes = Size;
216     assert(llvm::isPowerOf2_32(Align));
217     FixedAllocaAlignBytes = Align;
218     PrologEmitsFixedAllocas = true;
219   }
220   /// Returns the (negative) offset from ebp/rbp where the fixed Allocas start.
getFrameFixedAllocaOffset()221   int32_t getFrameFixedAllocaOffset() const override {
222     return FixedAllocaSizeBytes - (SpillAreaSizeBytes - maxOutArgsSizeBytes());
223   }
maxOutArgsSizeBytes()224   virtual uint32_t maxOutArgsSizeBytes() const override {
225     return MaxOutArgsSizeBytes;
226   }
updateMaxOutArgsSizeBytes(uint32_t Size)227   virtual void updateMaxOutArgsSizeBytes(uint32_t Size) {
228     MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, Size);
229   }
230 
shouldSplitToVariable64On32(Type Ty)231   bool shouldSplitToVariable64On32(Type Ty) const override {
232     return Ty == IceType_i64;
233   }
234 
getMinJumpTableSize()235   SizeT getMinJumpTableSize() const override { return 4; }
236 
237   void emitVariable(const Variable *Var) const override;
238 
239   void emit(const ConstantInteger32 *C) const final;
240   void emit(const ConstantInteger64 *C) const final;
241   void emit(const ConstantFloat *C) const final;
242   void emit(const ConstantDouble *C) const final;
243   void emit(const ConstantUndef *C) const final;
244   void emit(const ConstantRelocatable *C) const final;
245 
246   void initNodeForLowering(CfgNode *Node) override;
247 
248   Operand *loOperand(Operand *Operand);
249   Operand *hiOperand(Operand *Operand);
250 
251   void addProlog(CfgNode *Node) override;
252   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
253                               size_t BasicFrameOffset, size_t StackAdjBytes,
254                               size_t &InArgsSizeBytes);
255   void addEpilog(CfgNode *Node) override;
256 
257   Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
258 
259 protected:
260   void postLower() override;
261 
262   void lowerAlloca(const InstAlloca *Instr) override;
263   void lowerArguments() override;
264   void lowerArithmetic(const InstArithmetic *Instr) override;
265   void lowerAssign(const InstAssign *Instr) override;
266   void lowerBr(const InstBr *Instr) override;
267   void lowerBreakpoint(const InstBreakpoint *Instr) override;
268   void lowerCall(const InstCall *Instr) override;
269   void lowerCast(const InstCast *Instr) override;
270   void lowerExtractElement(const InstExtractElement *Instr) override;
271   void lowerFcmp(const InstFcmp *Instr) override;
272   void lowerIcmp(const InstIcmp *Instr) override;
273 
274   void lowerIntrinsic(const InstIntrinsic *Instr) override;
275   void lowerInsertElement(const InstInsertElement *Instr) override;
276   void lowerLoad(const InstLoad *Instr) override;
277   void lowerPhi(const InstPhi *Instr) override;
278   void lowerRet(const InstRet *Instr) override;
279   void lowerSelect(const InstSelect *Instr) override;
280   void lowerShuffleVector(const InstShuffleVector *Instr) override;
281   void lowerStore(const InstStore *Instr) override;
282   void lowerSwitch(const InstSwitch *Instr) override;
283   void lowerUnreachable(const InstUnreachable *Instr) override;
284   void lowerOther(const Inst *Instr) override;
285   void lowerRMW(const InstX86FakeRMW *RMW);
286   void prelowerPhis() override;
287   uint32_t getCallStackArgumentsSizeBytes(const CfgVector<Type> &ArgTypes,
288                                           Type ReturnType);
289   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
290   void genTargetHelperCallFor(Inst *Instr) override;
291 
292   /// OptAddr wraps all the possible operands that an x86 address might have.
293   struct OptAddr {
294     Variable *Base = nullptr;
295     Variable *Index = nullptr;
296     uint16_t Shift = 0;
297     int32_t Offset = 0;
298     ConstantRelocatable *Relocatable = nullptr;
299   };
300 
301   // Builds information for a canonical address expresion:
302   //   <Relocatable + Offset>(Base, Index, Shift)
303   X86OperandMem *computeAddressOpt(const Inst *Instr, Type MemType,
304                                    Operand *Addr);
305   void doAddressOptOther() override;
306   void doAddressOptLoad() override;
307   void doAddressOptStore() override;
308   void doAddressOptLoadSubVector() override;
309   void doAddressOptStoreSubVector() override;
310   void doMockBoundsCheck(Operand *Opnd) override;
311 
312   /// Naive lowering of cmpxchg.
313   void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected,
314                           Operand *Desired);
315   /// Attempt a more optimized lowering of cmpxchg. Returns true if optimized.
316   bool tryOptimizedCmpxchgCmpBr(Variable *DestPrev, Operand *Ptr,
317                                 Operand *Expected, Operand *Desired);
318   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
319                       Operand *Val);
320   void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal,
321                        Operand *SecondVal);
322   /// Load from memory for a given type.
323   void typedLoad(Type Ty, Variable *Dest, Variable *Base, Constant *Offset);
324   /// Store to memory for a given type.
325   void typedStore(Type Ty, Variable *Value, Variable *Base, Constant *Offset);
326   /// Copy memory of given type from Src to Dest using OffsetAmt on both.
327   void copyMemory(Type Ty, Variable *Dest, Variable *Src, int32_t OffsetAmt);
328   /// Replace some calls to memcpy with inline instructions.
329   void lowerMemcpy(Operand *Dest, Operand *Src, Operand *Count);
330   /// Replace some calls to memmove with inline instructions.
331   void lowerMemmove(Operand *Dest, Operand *Src, Operand *Count);
332   /// Replace some calls to memset with inline instructions.
333   void lowerMemset(Operand *Dest, Operand *Val, Operand *Count);
334 
335   /// Lower an indirect jump adding sandboxing when needed.
336   void lowerIndirectJump(Variable *JumpTarget);
337 
338   /// Check the comparison is in [Min,Max]. The flags register will be modified
339   /// with:
340   ///   - below equal, if in range
341   ///   - above, set if not in range
342   /// The index into the range is returned.
343   Operand *lowerCmpRange(Operand *Comparison, uint64_t Min, uint64_t Max);
344   /// Lowering of a cluster of switch cases. If the case is not matched control
345   /// will pass to the default label provided. If the default label is nullptr
346   /// then control will fall through to the next instruction. DoneCmp should be
347   /// true if the flags contain the result of a comparison with the Comparison.
348   void lowerCaseCluster(const CaseCluster &Case, Operand *Src0, bool DoneCmp,
349                         CfgNode *DefaultLabel = nullptr);
350 
351   using LowerBinOp = void (TargetX8632::*)(Variable *, Operand *);
352   void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi,
353                                 Variable *Dest, Operand *Ptr, Operand *Val);
354 
355   void eliminateNextVectorSextInstruction(Variable *SignExtendedResult);
356 
357   void emitStackProbe(size_t StackSizeBytes);
358 
359   /// Emit just the call instruction (without argument or return variable
360   /// processing), sandboxing if needed.
361   Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg,
362                          size_t NumVariadicFpArgs = 0);
363   /// Materialize the moves needed to return a value of the specified type.
364   Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType);
365 
366   /// Emit a jump table to the constant pool.
367   void emitJumpTable(const Cfg *Func,
368                      const InstJumpTable *JumpTable) const override;
369 
370   /// Emit a fake use of esp to make sure esp stays alive for the entire
371   /// function. Otherwise some esp adjustments get dead-code eliminated.
keepEspLiveAtExit()372   void keepEspLiveAtExit() {
373     Variable *esp =
374         Func->getTarget()->getPhysicalRegister(getStackReg(), WordType);
375     Context.insert<InstFakeUse>(esp);
376   }
377 
378   /// Operand legalization helpers. To deal with address mode constraints, the
379   /// helpers will create a new Operand and emit instructions that guarantee
380   /// that the Operand kind is one of those indicated by the LegalMask (a
381   /// bitmask of allowed kinds). If the input Operand is known to already meet
382   /// the constraints, it may be simply returned as the result, without creating
383   /// any new instructions or operands.
384   enum OperandLegalization {
385     Legal_None = 0,
386     Legal_Reg = 1 << 0, // physical register, not stack location
387     Legal_Imm = 1 << 1,
388     Legal_Mem = 1 << 2, // includes [eax+4*ecx] as well as [esp+12]
389     Legal_Rematerializable = 1 << 3,
390     Legal_AddrAbs = 1 << 4, // ConstantRelocatable doesn't have to add RebasePtr
391     Legal_Default = ~(Legal_Rematerializable | Legal_AddrAbs)
392     // TODO(stichnot): Figure out whether this default works for x86-64.
393   };
394   using LegalMask = uint32_t;
395   Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
396                     RegNumT RegNum = RegNumT());
397   Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT());
398   /// Legalize the first source operand for use in the cmp instruction.
399   Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1);
400   /// Turn a pointer operand into a memory operand that can be used by a real
401   /// load/store operation. Legalizes the operand as well. This is a nop if the
402   /// operand is already a legal memory operand.
403   X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty,
404                                    bool DoLegalize = true);
405 
406   Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
407   static Type stackSlotType();
408 
409   static constexpr uint32_t NoSizeLimit = 0;
410   /// Returns the largest type which is equal to or larger than Size bytes. The
411   /// type is suitable for copying memory i.e. a load and store will be a single
412   /// instruction (for example x86 will get f64 not i64).
413   static Type largestTypeInSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit);
414   /// Returns the smallest type which is equal to or larger than Size bytes. If
415   /// one doesn't exist then the largest type smaller than Size bytes is
416   /// returned. The type is suitable for memory copies as described at
417   /// largestTypeInSize.
418   static Type firstTypeThatFitsSize(uint32_t Size,
419                                     uint32_t MaxSize = NoSizeLimit);
420 
421   Variable *copyToReg8(Operand *Src, RegNumT RegNum = RegNumT());
422   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
423 
424   /// Returns a register containing all zeros, without affecting the FLAGS
425   /// register, using the best instruction for the type.
426   Variable *makeZeroedRegister(Type Ty, RegNumT RegNum = RegNumT());
427 
428   /// \name Returns a vector in a register with the given constant entries.
429   /// @{
430   Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
431   Variable *makeVectorOfOnes(Type Ty, RegNumT RegNum = RegNumT());
432   Variable *makeVectorOfMinusOnes(Type Ty, RegNumT RegNum = RegNumT());
433   Variable *makeVectorOfHighOrderBits(Type Ty, RegNumT RegNum = RegNumT());
434   Variable *makeVectorOfFabsMask(Type Ty, RegNumT RegNum = RegNumT());
435   /// @}
436 
437   /// Return a memory operand corresponding to a stack allocated Variable.
438   X86OperandMem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
439                                               uint32_t Offset = 0);
440 
441   /// The following are helpers that insert lowered x86 instructions with
442   /// minimal syntactic overhead, so that the lowering code can look as close to
443   /// assembly as practical.
_adc(Variable * Dest,Operand * Src0)444   void _adc(Variable *Dest, Operand *Src0) {
445     Context.insert<Insts::Adc>(Dest, Src0);
446   }
_adc_rmw(X86OperandMem * DestSrc0,Operand * Src1)447   void _adc_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
448     Context.insert<Insts::AdcRMW>(DestSrc0, Src1);
449   }
_add(Variable * Dest,Operand * Src0)450   void _add(Variable *Dest, Operand *Src0) {
451     Context.insert<Insts::Add>(Dest, Src0);
452   }
_add_rmw(X86OperandMem * DestSrc0,Operand * Src1)453   void _add_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
454     Context.insert<Insts::AddRMW>(DestSrc0, Src1);
455   }
_addps(Variable * Dest,Operand * Src0)456   void _addps(Variable *Dest, Operand *Src0) {
457     Context.insert<Insts::Addps>(Dest, Src0);
458   }
_addss(Variable * Dest,Operand * Src0)459   void _addss(Variable *Dest, Operand *Src0) {
460     Context.insert<Insts::Addss>(Dest, Src0);
461   }
462   void _add_sp(Operand *Adjustment);
_and(Variable * Dest,Operand * Src0)463   void _and(Variable *Dest, Operand *Src0) {
464     Context.insert<Insts::And>(Dest, Src0);
465   }
_andnps(Variable * Dest,Operand * Src0)466   void _andnps(Variable *Dest, Operand *Src0) {
467     Context.insert<Insts::Andnps>(Dest, Src0);
468   }
_andps(Variable * Dest,Operand * Src0)469   void _andps(Variable *Dest, Operand *Src0) {
470     Context.insert<Insts::Andps>(Dest, Src0);
471   }
_and_rmw(X86OperandMem * DestSrc0,Operand * Src1)472   void _and_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
473     Context.insert<Insts::AndRMW>(DestSrc0, Src1);
474   }
_blendvps(Variable * Dest,Operand * Src0,Operand * Src1)475   void _blendvps(Variable *Dest, Operand *Src0, Operand *Src1) {
476     Context.insert<Insts::Blendvps>(Dest, Src0, Src1);
477   }
_br(BrCond Condition,CfgNode * TargetTrue,CfgNode * TargetFalse)478   void _br(BrCond Condition, CfgNode *TargetTrue, CfgNode *TargetFalse) {
479     Context.insert<InstX86Br>(TargetTrue, TargetFalse, Condition,
480                               InstX86Br::Far);
481   }
_br(CfgNode * Target)482   void _br(CfgNode *Target) {
483     Context.insert<InstX86Br>(Target, InstX86Br::Far);
484   }
_br(BrCond Condition,CfgNode * Target)485   void _br(BrCond Condition, CfgNode *Target) {
486     Context.insert<InstX86Br>(Target, Condition, InstX86Br::Far);
487   }
488   void _br(BrCond Condition, InstX86Label *Label,
489            InstX86Br::Mode Kind = InstX86Br::Near) {
490     Context.insert<InstX86Br>(Label, Condition, Kind);
491   }
_bsf(Variable * Dest,Operand * Src0)492   void _bsf(Variable *Dest, Operand *Src0) {
493     Context.insert<Insts::Bsf>(Dest, Src0);
494   }
_bsr(Variable * Dest,Operand * Src0)495   void _bsr(Variable *Dest, Operand *Src0) {
496     Context.insert<Insts::Bsr>(Dest, Src0);
497   }
_bswap(Variable * SrcDest)498   void _bswap(Variable *SrcDest) { Context.insert<Insts::Bswap>(SrcDest); }
_cbwdq(Variable * Dest,Operand * Src0)499   void _cbwdq(Variable *Dest, Operand *Src0) {
500     Context.insert<Insts::Cbwdq>(Dest, Src0);
501   }
_cmov(Variable * Dest,Operand * Src0,BrCond Condition)502   void _cmov(Variable *Dest, Operand *Src0, BrCond Condition) {
503     Context.insert<Insts::Cmov>(Dest, Src0, Condition);
504   }
_cmp(Operand * Src0,Operand * Src1)505   void _cmp(Operand *Src0, Operand *Src1) {
506     Context.insert<Insts::Icmp>(Src0, Src1);
507   }
_cmpps(Variable * Dest,Operand * Src0,CmppsCond Condition)508   void _cmpps(Variable *Dest, Operand *Src0, CmppsCond Condition) {
509     Context.insert<Insts::Cmpps>(Dest, Src0, Condition);
510   }
_cmpxchg(Operand * DestOrAddr,Variable * Eax,Variable * Desired,bool Locked)511   void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired,
512                 bool Locked) {
513     Context.insert<Insts::Cmpxchg>(DestOrAddr, Eax, Desired, Locked);
514     // Mark eax as possibly modified by cmpxchg.
515     Context.insert<InstFakeDef>(Eax, llvm::dyn_cast<Variable>(DestOrAddr));
516     _set_dest_redefined();
517     Context.insert<InstFakeUse>(Eax);
518   }
_cmpxchg8b(X86OperandMem * Addr,Variable * Edx,Variable * Eax,Variable * Ecx,Variable * Ebx,bool Locked)519   void _cmpxchg8b(X86OperandMem *Addr, Variable *Edx, Variable *Eax,
520                   Variable *Ecx, Variable *Ebx, bool Locked) {
521     Context.insert<Insts::Cmpxchg8b>(Addr, Edx, Eax, Ecx, Ebx, Locked);
522     // Mark edx, and eax as possibly modified by cmpxchg8b.
523     Context.insert<InstFakeDef>(Edx);
524     _set_dest_redefined();
525     Context.insert<InstFakeUse>(Edx);
526     Context.insert<InstFakeDef>(Eax);
527     _set_dest_redefined();
528     Context.insert<InstFakeUse>(Eax);
529   }
_cvt(Variable * Dest,Operand * Src0,Insts::Cvt::CvtVariant Variant)530   void _cvt(Variable *Dest, Operand *Src0, Insts::Cvt::CvtVariant Variant) {
531     Context.insert<Insts::Cvt>(Dest, Src0, Variant);
532   }
_round(Variable * Dest,Operand * Src0,Operand * Imm)533   void _round(Variable *Dest, Operand *Src0, Operand *Imm) {
534     Context.insert<Insts::Round>(Dest, Src0, Imm);
535   }
_div(Variable * Dest,Operand * Src0,Operand * Src1)536   void _div(Variable *Dest, Operand *Src0, Operand *Src1) {
537     Context.insert<Insts::Div>(Dest, Src0, Src1);
538   }
_divps(Variable * Dest,Operand * Src0)539   void _divps(Variable *Dest, Operand *Src0) {
540     Context.insert<Insts::Divps>(Dest, Src0);
541   }
_divss(Variable * Dest,Operand * Src0)542   void _divss(Variable *Dest, Operand *Src0) {
543     Context.insert<Insts::Divss>(Dest, Src0);
544   }
_fld(Operand * Src0)545   void _fld(Operand *Src0) { Context.insert<Insts::Fld>(Src0); }
_fstp(Variable * Dest)546   void _fstp(Variable *Dest) { Context.insert<Insts::Fstp>(Dest); }
_idiv(Variable * Dest,Operand * Src0,Operand * Src1)547   void _idiv(Variable *Dest, Operand *Src0, Operand *Src1) {
548     Context.insert<Insts::Idiv>(Dest, Src0, Src1);
549   }
_imul(Variable * Dest,Operand * Src0)550   void _imul(Variable *Dest, Operand *Src0) {
551     Context.insert<Insts::Imul>(Dest, Src0);
552   }
_imul_imm(Variable * Dest,Operand * Src0,Constant * Imm)553   void _imul_imm(Variable *Dest, Operand *Src0, Constant *Imm) {
554     Context.insert<Insts::ImulImm>(Dest, Src0, Imm);
555   }
_insertps(Variable * Dest,Operand * Src0,Operand * Src1)556   void _insertps(Variable *Dest, Operand *Src0, Operand *Src1) {
557     Context.insert<Insts::Insertps>(Dest, Src0, Src1);
558   }
_int3()559   void _int3() { Context.insert<Insts::Int3>(); }
_jmp(Operand * Target)560   void _jmp(Operand *Target) { Context.insert<Insts::Jmp>(Target); }
_lea(Variable * Dest,Operand * Src0)561   void _lea(Variable *Dest, Operand *Src0) {
562     Context.insert<Insts::Lea>(Dest, Src0);
563   }
564   void _link_bp();
565   void _push_reg(RegNumT RegNum);
566   void _pop_reg(RegNumT RegNum);
_mfence()567   void _mfence() { Context.insert<Insts::Mfence>(); }
568   /// Moves can be used to redefine registers, creating "partial kills" for
569   /// liveness.  Mark where moves are used in this way.
570   void _redefined(Inst *MovInst, bool IsRedefinition = true) {
571     if (IsRedefinition)
572       MovInst->setDestRedefined();
573   }
574   /// If Dest=nullptr is passed in, then a new variable is created, marked as
575   /// infinite register allocation weight, and returned through the in/out Dest
576   /// argument.
577   Insts::Mov *_mov(Variable *&Dest, Operand *Src0, RegNumT RegNum = RegNumT()) {
578     if (Dest == nullptr)
579       Dest = makeReg(Src0->getType(), RegNum);
580     return Context.insert<Insts::Mov>(Dest, Src0);
581   }
582   void _mov_sp(Operand *NewValue);
_movp(Variable * Dest,Operand * Src0)583   Insts::Movp *_movp(Variable *Dest, Operand *Src0) {
584     return Context.insert<Insts::Movp>(Dest, Src0);
585   }
_movd(Variable * Dest,Operand * Src0)586   void _movd(Variable *Dest, Operand *Src0) {
587     Context.insert<Insts::Movd>(Dest, Src0);
588   }
_movq(Variable * Dest,Operand * Src0)589   void _movq(Variable *Dest, Operand *Src0) {
590     Context.insert<Insts::Movq>(Dest, Src0);
591   }
_movss(Variable * Dest,Variable * Src0)592   void _movss(Variable *Dest, Variable *Src0) {
593     Context.insert<Insts::MovssRegs>(Dest, Src0);
594   }
_movsx(Variable * Dest,Operand * Src0)595   void _movsx(Variable *Dest, Operand *Src0) {
596     Context.insert<Insts::Movsx>(Dest, Src0);
597   }
_movzx(Variable * Dest,Operand * Src0)598   Insts::Movzx *_movzx(Variable *Dest, Operand *Src0) {
599     return Context.insert<Insts::Movzx>(Dest, Src0);
600   }
_maxss(Variable * Dest,Operand * Src0)601   void _maxss(Variable *Dest, Operand *Src0) {
602     Context.insert<Insts::Maxss>(Dest, Src0);
603   }
_minss(Variable * Dest,Operand * Src0)604   void _minss(Variable *Dest, Operand *Src0) {
605     Context.insert<Insts::Minss>(Dest, Src0);
606   }
_maxps(Variable * Dest,Operand * Src0)607   void _maxps(Variable *Dest, Operand *Src0) {
608     Context.insert<Insts::Maxps>(Dest, Src0);
609   }
_minps(Variable * Dest,Operand * Src0)610   void _minps(Variable *Dest, Operand *Src0) {
611     Context.insert<Insts::Minps>(Dest, Src0);
612   }
_mul(Variable * Dest,Variable * Src0,Operand * Src1)613   void _mul(Variable *Dest, Variable *Src0, Operand *Src1) {
614     Context.insert<Insts::Mul>(Dest, Src0, Src1);
615   }
_mulps(Variable * Dest,Operand * Src0)616   void _mulps(Variable *Dest, Operand *Src0) {
617     Context.insert<Insts::Mulps>(Dest, Src0);
618   }
_mulss(Variable * Dest,Operand * Src0)619   void _mulss(Variable *Dest, Operand *Src0) {
620     Context.insert<Insts::Mulss>(Dest, Src0);
621   }
_neg(Variable * SrcDest)622   void _neg(Variable *SrcDest) { Context.insert<Insts::Neg>(SrcDest); }
_nop(SizeT Variant)623   void _nop(SizeT Variant) { Context.insert<Insts::Nop>(Variant); }
_or(Variable * Dest,Operand * Src0)624   void _or(Variable *Dest, Operand *Src0) {
625     Context.insert<Insts::Or>(Dest, Src0);
626   }
_orps(Variable * Dest,Operand * Src0)627   void _orps(Variable *Dest, Operand *Src0) {
628     Context.insert<Insts::Orps>(Dest, Src0);
629   }
_or_rmw(X86OperandMem * DestSrc0,Operand * Src1)630   void _or_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
631     Context.insert<Insts::OrRMW>(DestSrc0, Src1);
632   }
_padd(Variable * Dest,Operand * Src0)633   void _padd(Variable *Dest, Operand *Src0) {
634     Context.insert<Insts::Padd>(Dest, Src0);
635   }
_padds(Variable * Dest,Operand * Src0)636   void _padds(Variable *Dest, Operand *Src0) {
637     Context.insert<Insts::Padds>(Dest, Src0);
638   }
_paddus(Variable * Dest,Operand * Src0)639   void _paddus(Variable *Dest, Operand *Src0) {
640     Context.insert<Insts::Paddus>(Dest, Src0);
641   }
_pand(Variable * Dest,Operand * Src0)642   void _pand(Variable *Dest, Operand *Src0) {
643     Context.insert<Insts::Pand>(Dest, Src0);
644   }
_pandn(Variable * Dest,Operand * Src0)645   void _pandn(Variable *Dest, Operand *Src0) {
646     Context.insert<Insts::Pandn>(Dest, Src0);
647   }
_pblendvb(Variable * Dest,Operand * Src0,Operand * Src1)648   void _pblendvb(Variable *Dest, Operand *Src0, Operand *Src1) {
649     Context.insert<Insts::Pblendvb>(Dest, Src0, Src1);
650   }
651   void _pcmpeq(Variable *Dest, Operand *Src0,
652                Type ArithmeticTypeOverride = IceType_void) {
653     Context.insert<Insts::Pcmpeq>(Dest, Src0, ArithmeticTypeOverride);
654   }
_pcmpgt(Variable * Dest,Operand * Src0)655   void _pcmpgt(Variable *Dest, Operand *Src0) {
656     Context.insert<Insts::Pcmpgt>(Dest, Src0);
657   }
_pextr(Variable * Dest,Operand * Src0,Operand * Src1)658   void _pextr(Variable *Dest, Operand *Src0, Operand *Src1) {
659     Context.insert<Insts::Pextr>(Dest, Src0, Src1);
660   }
_pinsr(Variable * Dest,Operand * Src0,Operand * Src1)661   void _pinsr(Variable *Dest, Operand *Src0, Operand *Src1) {
662     Context.insert<Insts::Pinsr>(Dest, Src0, Src1);
663   }
_pmull(Variable * Dest,Operand * Src0)664   void _pmull(Variable *Dest, Operand *Src0) {
665     Context.insert<Insts::Pmull>(Dest, Src0);
666   }
_pmulhw(Variable * Dest,Operand * Src0)667   void _pmulhw(Variable *Dest, Operand *Src0) {
668     Context.insert<Insts::Pmulhw>(Dest, Src0);
669   }
_pmulhuw(Variable * Dest,Operand * Src0)670   void _pmulhuw(Variable *Dest, Operand *Src0) {
671     Context.insert<Insts::Pmulhuw>(Dest, Src0);
672   }
_pmaddwd(Variable * Dest,Operand * Src0)673   void _pmaddwd(Variable *Dest, Operand *Src0) {
674     Context.insert<Insts::Pmaddwd>(Dest, Src0);
675   }
_pmuludq(Variable * Dest,Operand * Src0)676   void _pmuludq(Variable *Dest, Operand *Src0) {
677     Context.insert<Insts::Pmuludq>(Dest, Src0);
678   }
_pop(Variable * Dest)679   void _pop(Variable *Dest) { Context.insert<Insts::Pop>(Dest); }
_por(Variable * Dest,Operand * Src0)680   void _por(Variable *Dest, Operand *Src0) {
681     Context.insert<Insts::Por>(Dest, Src0);
682   }
_punpckl(Variable * Dest,Operand * Src0)683   void _punpckl(Variable *Dest, Operand *Src0) {
684     Context.insert<Insts::Punpckl>(Dest, Src0);
685   }
_punpckh(Variable * Dest,Operand * Src0)686   void _punpckh(Variable *Dest, Operand *Src0) {
687     Context.insert<Insts::Punpckh>(Dest, Src0);
688   }
_packss(Variable * Dest,Operand * Src0)689   void _packss(Variable *Dest, Operand *Src0) {
690     Context.insert<Insts::Packss>(Dest, Src0);
691   }
_packus(Variable * Dest,Operand * Src0)692   void _packus(Variable *Dest, Operand *Src0) {
693     Context.insert<Insts::Packus>(Dest, Src0);
694   }
_pshufb(Variable * Dest,Operand * Src0)695   void _pshufb(Variable *Dest, Operand *Src0) {
696     Context.insert<Insts::Pshufb>(Dest, Src0);
697   }
_pshufd(Variable * Dest,Operand * Src0,Operand * Src1)698   void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
699     Context.insert<Insts::Pshufd>(Dest, Src0, Src1);
700   }
_psll(Variable * Dest,Operand * Src0)701   void _psll(Variable *Dest, Operand *Src0) {
702     Context.insert<Insts::Psll>(Dest, Src0);
703   }
_psra(Variable * Dest,Operand * Src0)704   void _psra(Variable *Dest, Operand *Src0) {
705     Context.insert<Insts::Psra>(Dest, Src0);
706   }
_psrl(Variable * Dest,Operand * Src0)707   void _psrl(Variable *Dest, Operand *Src0) {
708     Context.insert<Insts::Psrl>(Dest, Src0);
709   }
_psub(Variable * Dest,Operand * Src0)710   void _psub(Variable *Dest, Operand *Src0) {
711     Context.insert<Insts::Psub>(Dest, Src0);
712   }
_psubs(Variable * Dest,Operand * Src0)713   void _psubs(Variable *Dest, Operand *Src0) {
714     Context.insert<Insts::Psubs>(Dest, Src0);
715   }
_psubus(Variable * Dest,Operand * Src0)716   void _psubus(Variable *Dest, Operand *Src0) {
717     Context.insert<Insts::Psubus>(Dest, Src0);
718   }
_push(Operand * Src0)719   void _push(Operand *Src0) { Context.insert<Insts::Push>(Src0); }
_pxor(Variable * Dest,Operand * Src0)720   void _pxor(Variable *Dest, Operand *Src0) {
721     Context.insert<Insts::Pxor>(Dest, Src0);
722   }
723   void _ret(Variable *Src0 = nullptr) { Context.insert<Insts::Ret>(Src0); }
_rol(Variable * Dest,Operand * Src0)724   void _rol(Variable *Dest, Operand *Src0) {
725     Context.insert<Insts::Rol>(Dest, Src0);
726   }
_round(Variable * Dest,Operand * Src,Constant * Imm)727   void _round(Variable *Dest, Operand *Src, Constant *Imm) {
728     Context.insert<Insts::Round>(Dest, Src, Imm);
729   }
_sar(Variable * Dest,Operand * Src0)730   void _sar(Variable *Dest, Operand *Src0) {
731     Context.insert<Insts::Sar>(Dest, Src0);
732   }
_sbb(Variable * Dest,Operand * Src0)733   void _sbb(Variable *Dest, Operand *Src0) {
734     Context.insert<Insts::Sbb>(Dest, Src0);
735   }
_sbb_rmw(X86OperandMem * DestSrc0,Operand * Src1)736   void _sbb_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
737     Context.insert<Insts::SbbRMW>(DestSrc0, Src1);
738   }
_setcc(Variable * Dest,BrCond Condition)739   void _setcc(Variable *Dest, BrCond Condition) {
740     Context.insert<Insts::Setcc>(Dest, Condition);
741   }
_shl(Variable * Dest,Operand * Src0)742   void _shl(Variable *Dest, Operand *Src0) {
743     Context.insert<Insts::Shl>(Dest, Src0);
744   }
_shld(Variable * Dest,Variable * Src0,Operand * Src1)745   void _shld(Variable *Dest, Variable *Src0, Operand *Src1) {
746     Context.insert<Insts::Shld>(Dest, Src0, Src1);
747   }
_shr(Variable * Dest,Operand * Src0)748   void _shr(Variable *Dest, Operand *Src0) {
749     Context.insert<Insts::Shr>(Dest, Src0);
750   }
_shrd(Variable * Dest,Variable * Src0,Operand * Src1)751   void _shrd(Variable *Dest, Variable *Src0, Operand *Src1) {
752     Context.insert<Insts::Shrd>(Dest, Src0, Src1);
753   }
_shufps(Variable * Dest,Operand * Src0,Operand * Src1)754   void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) {
755     Context.insert<Insts::Shufps>(Dest, Src0, Src1);
756   }
_movmsk(Variable * Dest,Operand * Src0)757   void _movmsk(Variable *Dest, Operand *Src0) {
758     Context.insert<Insts::Movmsk>(Dest, Src0);
759   }
_sqrt(Variable * Dest,Operand * Src0)760   void _sqrt(Variable *Dest, Operand *Src0) {
761     Context.insert<Insts::Sqrt>(Dest, Src0);
762   }
_store(Operand * Value,X86Operand * Mem)763   void _store(Operand *Value, X86Operand *Mem) {
764     Context.insert<Insts::Store>(Value, Mem);
765   }
_storep(Variable * Value,X86OperandMem * Mem)766   void _storep(Variable *Value, X86OperandMem *Mem) {
767     Context.insert<Insts::StoreP>(Value, Mem);
768   }
_storeq(Operand * Value,X86OperandMem * Mem)769   void _storeq(Operand *Value, X86OperandMem *Mem) {
770     Context.insert<Insts::StoreQ>(Value, Mem);
771   }
_stored(Operand * Value,X86OperandMem * Mem)772   void _stored(Operand *Value, X86OperandMem *Mem) {
773     Context.insert<Insts::StoreD>(Value, Mem);
774   }
_sub(Variable * Dest,Operand * Src0)775   void _sub(Variable *Dest, Operand *Src0) {
776     Context.insert<Insts::Sub>(Dest, Src0);
777   }
_sub_rmw(X86OperandMem * DestSrc0,Operand * Src1)778   void _sub_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
779     Context.insert<Insts::SubRMW>(DestSrc0, Src1);
780   }
781   void _sub_sp(Operand *Adjustment);
_subps(Variable * Dest,Operand * Src0)782   void _subps(Variable *Dest, Operand *Src0) {
783     Context.insert<Insts::Subps>(Dest, Src0);
784   }
_subss(Variable * Dest,Operand * Src0)785   void _subss(Variable *Dest, Operand *Src0) {
786     Context.insert<Insts::Subss>(Dest, Src0);
787   }
_test(Operand * Src0,Operand * Src1)788   void _test(Operand *Src0, Operand *Src1) {
789     Context.insert<Insts::Test>(Src0, Src1);
790   }
_ucomiss(Operand * Src0,Operand * Src1)791   void _ucomiss(Operand *Src0, Operand *Src1) {
792     Context.insert<Insts::Ucomiss>(Src0, Src1);
793   }
_ud2()794   void _ud2() { Context.insert<Insts::UD2>(); }
795   void _unlink_bp();
_xadd(Operand * Dest,Variable * Src,bool Locked)796   void _xadd(Operand *Dest, Variable *Src, bool Locked) {
797     Context.insert<Insts::Xadd>(Dest, Src, Locked);
798     // The xadd exchanges Dest and Src (modifying Src). Model that update with
799     // a FakeDef followed by a FakeUse.
800     Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest));
801     _set_dest_redefined();
802     Context.insert<InstFakeUse>(Src);
803   }
_xchg(Operand * Dest,Variable * Src)804   void _xchg(Operand *Dest, Variable *Src) {
805     Context.insert<Insts::Xchg>(Dest, Src);
806     // The xchg modifies Dest and Src -- model that update with a
807     // FakeDef/FakeUse.
808     Context.insert<InstFakeDef>(Src, llvm::dyn_cast<Variable>(Dest));
809     _set_dest_redefined();
810     Context.insert<InstFakeUse>(Src);
811   }
_xor(Variable * Dest,Operand * Src0)812   void _xor(Variable *Dest, Operand *Src0) {
813     Context.insert<Insts::Xor>(Dest, Src0);
814   }
_xorps(Variable * Dest,Operand * Src0)815   void _xorps(Variable *Dest, Operand *Src0) {
816     Context.insert<Insts::Xorps>(Dest, Src0);
817   }
_xor_rmw(X86OperandMem * DestSrc0,Operand * Src1)818   void _xor_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
819     Context.insert<Insts::XorRMW>(DestSrc0, Src1);
820   }
821 
_iaca_start()822   void _iaca_start() {
823     if (!BuildDefs::minimal())
824       Context.insert<Insts::IacaStart>();
825   }
_iaca_end()826   void _iaca_end() {
827     if (!BuildDefs::minimal())
828       Context.insert<Insts::IacaEnd>();
829   }
830 
831   /// This class helps wrap IACA markers around the code generated by the
832   /// current scope. It means you don't need to put an end before each return.
833   class ScopedIacaMark {
834     ScopedIacaMark(const ScopedIacaMark &) = delete;
835     ScopedIacaMark &operator=(const ScopedIacaMark &) = delete;
836 
837   public:
ScopedIacaMark(TargetX8632 * Lowering)838     ScopedIacaMark(TargetX8632 *Lowering) : Lowering(Lowering) {
839       Lowering->_iaca_start();
840     }
~ScopedIacaMark()841     ~ScopedIacaMark() { end(); }
end()842     void end() {
843       if (!Lowering)
844         return;
845       Lowering->_iaca_end();
846       Lowering = nullptr;
847     }
848 
849   private:
850     TargetX8632 *Lowering;
851   };
852 
853   bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1);
854   void findRMW();
855 
856   static uint32_t applyStackAlignment(uint32_t Value);
857 
858   bool IsEbpBasedFrame = false;
859 
860 #if defined(_WIN32)
861   // Windows 32-bit only guarantees 4 byte stack alignment
862   static constexpr uint32_t X86_STACK_ALIGNMENT_BYTES = 4;
863 #else
864   /// Stack alignment guaranteed by the System V ABI.
865   static constexpr uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
866 #endif
867   /// Stack alignment required by the currently lowered function.
868   size_t RequiredStackAlignment = X86_STACK_ALIGNMENT_BYTES;
869   size_t SpillAreaSizeBytes = 0;
870   size_t FixedAllocaSizeBytes = 0;
871   size_t FixedAllocaAlignBytes = 0;
872   bool PrologEmitsFixedAllocas = false;
873   uint32_t MaxOutArgsSizeBytes = 0;
874   static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSet;
875   static std::array<SmallBitVector, RCX86_NUM> TypeToRegisterSetUnfiltered;
876   static std::array<SmallBitVector, RegisterSet::Reg_NUM> RegisterAliases;
877   SmallBitVector RegsUsed;
878   std::array<VarList, IceType_NUM> PhysicalRegisters;
879   // RebasePtr is a Variable that holds the Rebasing pointer (if any) for the
880   // current sandboxing type.
881   Variable *RebasePtr = nullptr;
882 
883 private:
884   void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi,
885                     Operand *Src1Lo, Variable *DestLo, Variable *DestHi);
886 
887   /// Emit the code for a combined operation and consumer instruction, or set
888   /// the destination variable of the operation if Consumer == nullptr.
889   void lowerIcmpAndConsumer(const InstIcmp *Icmp, const Inst *Consumer);
890   void lowerFcmpAndConsumer(const InstFcmp *Fcmp, const Inst *Consumer);
891   void lowerArithAndConsumer(const InstArithmetic *Arith, const Inst *Consumer);
892 
893   /// Emit a setcc instruction if Consumer == nullptr; otherwise emit a
894   /// specialized version of Consumer.
895   void setccOrConsumer(BrCond Condition, Variable *Dest, const Inst *Consumer);
896 
897   /// Emit a mov [1|0] instruction if Consumer == nullptr; otherwise emit a
898   /// specialized version of Consumer.
899   void movOrConsumer(bool IcmpResult, Variable *Dest, const Inst *Consumer);
900 
901   /// Emit the code for instructions with a vector type.
902   void lowerIcmpVector(const InstIcmp *Icmp);
903   void lowerFcmpVector(const InstFcmp *Icmp);
904   void lowerSelectVector(const InstSelect *Instr);
905 
906   /// Helpers for select lowering.
907   void lowerSelectMove(Variable *Dest, BrCond Cond, Operand *SrcT,
908                        Operand *SrcF);
909   void lowerSelectIntMove(Variable *Dest, BrCond Cond, Operand *SrcT,
910                           Operand *SrcF);
911   /// Generic helper to move an arbitrary type from Src to Dest.
912   void lowerMove(Variable *Dest, Operand *Src, bool IsRedefinition);
913 
914   /// Optimizations for idiom recognition.
915   bool lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, const InstSelect *Select);
916 
917   /// x86lowerIcmp64 handles 64-bit icmp lowering.
918   void lowerIcmp64(const InstIcmp *Icmp, const Inst *Consumer);
919 
920   BoolFolding FoldingInfo;
921 
922   /// Helpers for lowering ShuffleVector
923   /// @{
924   Variable *lowerShuffleVector_AllFromSameSrc(Operand *Src, SizeT Index0,
925                                               SizeT Index1, SizeT Index2,
926                                               SizeT Index3);
927   static constexpr SizeT IGNORE_INDEX = 0x80000000u;
928   Variable *lowerShuffleVector_TwoFromSameSrc(Operand *Src0, SizeT Index0,
929                                               SizeT Index1, Operand *Src1,
930                                               SizeT Index2, SizeT Index3);
931   static constexpr SizeT UNIFIED_INDEX_0 = 0;
932   static constexpr SizeT UNIFIED_INDEX_1 = 2;
933   Variable *lowerShuffleVector_UnifyFromDifferentSrcs(Operand *Src0,
934                                                       SizeT Index0,
935                                                       Operand *Src1,
936                                                       SizeT Index1);
937   static constexpr SizeT CLEAR_ALL_BITS = 0x80;
938   SizeT PshufbMaskCount = 0;
939   GlobalString lowerShuffleVector_NewMaskName();
940   ConstantRelocatable *lowerShuffleVector_CreatePshufbMask(
941       int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4,
942       int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9,
943       int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14,
944       int8_t Idx15);
945   void lowerShuffleVector_UsingPshufb(Variable *Dest, Operand *Src0,
946                                       Operand *Src1, int8_t Idx0, int8_t Idx1,
947                                       int8_t Idx2, int8_t Idx3, int8_t Idx4,
948                                       int8_t Idx5, int8_t Idx6, int8_t Idx7,
949                                       int8_t Idx8, int8_t Idx9, int8_t Idx10,
950                                       int8_t Idx11, int8_t Idx12, int8_t Idx13,
951                                       int8_t Idx14, int8_t Idx15);
952   /// @}
953 
954   /// The following table summarizes the logic for lowering the fcmp
955   /// instruction. There is one table entry for each of the 16 conditions.
956   ///
957   /// The first four columns describe the case when the operands are floating
958   /// point scalar values. A comment in lowerFcmp() describes the lowering
959   /// template. In the most general case, there is a compare followed by two
960   /// conditional branches, because some fcmp conditions don't map to a single
961   /// x86 conditional branch. However, in many cases it is possible to swap the
962   /// operands in the comparison and have a single conditional branch. Since
963   /// it's quite tedious to validate the table by hand, good execution tests are
964   /// helpful.
965   ///
966   /// The last two columns describe the case when the operands are vectors of
967   /// floating point values. For most fcmp conditions, there is a clear mapping
968   /// to a single x86 cmpps instruction variant. Some fcmp conditions require
969   /// special code to handle and these are marked in the table with a
970   /// Cmpps_Invalid predicate.
971   /// {@
972   static const struct TableFcmpType {
973     uint32_t Default;
974     bool SwapScalarOperands;
975     CondX86::BrCond C1, C2;
976     bool SwapVectorOperands;
977     CondX86::CmppsCond Predicate;
978   } TableFcmp[];
979   static const size_t TableFcmpSize;
980   /// @}
981 
982   /// The following table summarizes the logic for lowering the icmp instruction
983   /// for i32 and narrower types. Each icmp condition has a clear mapping to an
984   /// x86 conditional branch instruction.
985   /// {@
986   static const struct TableIcmp32Type {
987     CondX86::BrCond Mapping;
988   } TableIcmp32[];
989   static const size_t TableIcmp32Size;
990   /// @}
991 
992   /// The following table summarizes the logic for lowering the icmp instruction
993   /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
994   /// conditional branches are needed. For the other conditions, three separate
995   /// conditional branches are needed.
996   /// {@
997   static const struct TableIcmp64Type {
998     CondX86::BrCond C1, C2, C3;
999   } TableIcmp64[];
1000   static const size_t TableIcmp64Size;
1001   /// @}
1002 
getIcmp32Mapping(InstIcmp::ICond Cond)1003   static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
1004     assert(static_cast<size_t>(Cond) < TableIcmp32Size);
1005     return TableIcmp32[Cond].Mapping;
1006   }
1007 
1008 public:
create(Cfg * Func)1009   static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
1010     return makeUnique<TargetX8632>(Func);
1011   }
1012 
createAssembler()1013   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
1014     return makeUnique<X8632::AssemblerX8632>();
1015   }
1016 
1017 private:
1018   ENABLE_MAKE_UNIQUE;
1019 
1020   explicit TargetX8632(Cfg *Func);
1021 };
1022 
1023 class TargetDataX8632 final : public TargetDataLowering {
1024   TargetDataX8632() = delete;
1025   TargetDataX8632(const TargetDataX8632 &) = delete;
1026   TargetDataX8632 &operator=(const TargetDataX8632 &) = delete;
1027 
1028 public:
1029   ~TargetDataX8632() override = default;
1030 
create(GlobalContext * Ctx)1031   static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
1032     return makeUnique<TargetDataX8632>(Ctx);
1033   }
1034 
1035   void lowerGlobals(const VariableDeclarationList &Vars,
1036                     const std::string &SectionSuffix) override;
1037   void lowerConstants() override;
1038   void lowerJumpTables() override;
1039 
1040 private:
1041   ENABLE_MAKE_UNIQUE;
1042 
TargetDataX8632(GlobalContext * Ctx)1043   explicit TargetDataX8632(GlobalContext *Ctx) : TargetDataLowering(Ctx) {}
1044   template <typename T> static void emitConstantPool(GlobalContext *Ctx);
1045 };
1046 
1047 class TargetHeaderX86 : public TargetHeaderLowering {
1048   TargetHeaderX86() = delete;
1049   TargetHeaderX86(const TargetHeaderX86 &) = delete;
1050   TargetHeaderX86 &operator=(const TargetHeaderX86 &) = delete;
1051 
1052 public:
1053   ~TargetHeaderX86() = default;
1054 
create(GlobalContext * Ctx)1055   static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
1056     return makeUnique<TargetHeaderX86>(Ctx);
1057   }
1058 
1059 private:
1060   ENABLE_MAKE_UNIQUE;
1061 
TargetHeaderX86(GlobalContext * Ctx)1062   explicit TargetHeaderX86(GlobalContext *Ctx) : TargetHeaderLowering(Ctx) {}
1063 };
1064 
1065 } // end of namespace X8632
1066 } // end of namespace Ice
1067 
1068 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8632_H
1069