xref: /aosp_15_r20/external/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker 
10*9880d681SAndroid Build Coastguard Worker #include "AArch64TargetTransformInfo.h"
11*9880d681SAndroid Build Coastguard Worker #include "MCTargetDesc/AArch64AddressingModes.h"
12*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/TargetTransformInfo.h"
13*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/LoopInfo.h"
14*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/BasicTTIImpl.h"
15*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
16*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/CostTable.h"
17*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetLowering.h"
18*9880d681SAndroid Build Coastguard Worker #include <algorithm>
19*9880d681SAndroid Build Coastguard Worker using namespace llvm;
20*9880d681SAndroid Build Coastguard Worker 
21*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "aarch64tti"
22*9880d681SAndroid Build Coastguard Worker 
23*9880d681SAndroid Build Coastguard Worker /// \brief Calculate the cost of materializing a 64-bit value. This helper
24*9880d681SAndroid Build Coastguard Worker /// method might only calculate a fraction of a larger immediate. Therefore it
25*9880d681SAndroid Build Coastguard Worker /// is valid to return a cost of ZERO.
getIntImmCost(int64_t Val)26*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getIntImmCost(int64_t Val) {
27*9880d681SAndroid Build Coastguard Worker   // Check if the immediate can be encoded within an instruction.
28*9880d681SAndroid Build Coastguard Worker   if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
29*9880d681SAndroid Build Coastguard Worker     return 0;
30*9880d681SAndroid Build Coastguard Worker 
31*9880d681SAndroid Build Coastguard Worker   if (Val < 0)
32*9880d681SAndroid Build Coastguard Worker     Val = ~Val;
33*9880d681SAndroid Build Coastguard Worker 
34*9880d681SAndroid Build Coastguard Worker   // Calculate how many moves we will need to materialize this constant.
35*9880d681SAndroid Build Coastguard Worker   unsigned LZ = countLeadingZeros((uint64_t)Val);
36*9880d681SAndroid Build Coastguard Worker   return (64 - LZ + 15) / 16;
37*9880d681SAndroid Build Coastguard Worker }
38*9880d681SAndroid Build Coastguard Worker 
39*9880d681SAndroid Build Coastguard Worker /// \brief Calculate the cost of materializing the given constant.
getIntImmCost(const APInt & Imm,Type * Ty)40*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
41*9880d681SAndroid Build Coastguard Worker   assert(Ty->isIntegerTy());
42*9880d681SAndroid Build Coastguard Worker 
43*9880d681SAndroid Build Coastguard Worker   unsigned BitSize = Ty->getPrimitiveSizeInBits();
44*9880d681SAndroid Build Coastguard Worker   if (BitSize == 0)
45*9880d681SAndroid Build Coastguard Worker     return ~0U;
46*9880d681SAndroid Build Coastguard Worker 
47*9880d681SAndroid Build Coastguard Worker   // Sign-extend all constants to a multiple of 64-bit.
48*9880d681SAndroid Build Coastguard Worker   APInt ImmVal = Imm;
49*9880d681SAndroid Build Coastguard Worker   if (BitSize & 0x3f)
50*9880d681SAndroid Build Coastguard Worker     ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
51*9880d681SAndroid Build Coastguard Worker 
52*9880d681SAndroid Build Coastguard Worker   // Split the constant into 64-bit chunks and calculate the cost for each
53*9880d681SAndroid Build Coastguard Worker   // chunk.
54*9880d681SAndroid Build Coastguard Worker   int Cost = 0;
55*9880d681SAndroid Build Coastguard Worker   for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
56*9880d681SAndroid Build Coastguard Worker     APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
57*9880d681SAndroid Build Coastguard Worker     int64_t Val = Tmp.getSExtValue();
58*9880d681SAndroid Build Coastguard Worker     Cost += getIntImmCost(Val);
59*9880d681SAndroid Build Coastguard Worker   }
60*9880d681SAndroid Build Coastguard Worker   // We need at least one instruction to materialze the constant.
61*9880d681SAndroid Build Coastguard Worker   return std::max(1, Cost);
62*9880d681SAndroid Build Coastguard Worker }
63*9880d681SAndroid Build Coastguard Worker 
getIntImmCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)64*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
65*9880d681SAndroid Build Coastguard Worker                                   const APInt &Imm, Type *Ty) {
66*9880d681SAndroid Build Coastguard Worker   assert(Ty->isIntegerTy());
67*9880d681SAndroid Build Coastguard Worker 
68*9880d681SAndroid Build Coastguard Worker   unsigned BitSize = Ty->getPrimitiveSizeInBits();
69*9880d681SAndroid Build Coastguard Worker   // There is no cost model for constants with a bit size of 0. Return TCC_Free
70*9880d681SAndroid Build Coastguard Worker   // here, so that constant hoisting will ignore this constant.
71*9880d681SAndroid Build Coastguard Worker   if (BitSize == 0)
72*9880d681SAndroid Build Coastguard Worker     return TTI::TCC_Free;
73*9880d681SAndroid Build Coastguard Worker 
74*9880d681SAndroid Build Coastguard Worker   unsigned ImmIdx = ~0U;
75*9880d681SAndroid Build Coastguard Worker   switch (Opcode) {
76*9880d681SAndroid Build Coastguard Worker   default:
77*9880d681SAndroid Build Coastguard Worker     return TTI::TCC_Free;
78*9880d681SAndroid Build Coastguard Worker   case Instruction::GetElementPtr:
79*9880d681SAndroid Build Coastguard Worker     // Always hoist the base address of a GetElementPtr.
80*9880d681SAndroid Build Coastguard Worker     if (Idx == 0)
81*9880d681SAndroid Build Coastguard Worker       return 2 * TTI::TCC_Basic;
82*9880d681SAndroid Build Coastguard Worker     return TTI::TCC_Free;
83*9880d681SAndroid Build Coastguard Worker   case Instruction::Store:
84*9880d681SAndroid Build Coastguard Worker     ImmIdx = 0;
85*9880d681SAndroid Build Coastguard Worker     break;
86*9880d681SAndroid Build Coastguard Worker   case Instruction::Add:
87*9880d681SAndroid Build Coastguard Worker   case Instruction::Sub:
88*9880d681SAndroid Build Coastguard Worker   case Instruction::Mul:
89*9880d681SAndroid Build Coastguard Worker   case Instruction::UDiv:
90*9880d681SAndroid Build Coastguard Worker   case Instruction::SDiv:
91*9880d681SAndroid Build Coastguard Worker   case Instruction::URem:
92*9880d681SAndroid Build Coastguard Worker   case Instruction::SRem:
93*9880d681SAndroid Build Coastguard Worker   case Instruction::And:
94*9880d681SAndroid Build Coastguard Worker   case Instruction::Or:
95*9880d681SAndroid Build Coastguard Worker   case Instruction::Xor:
96*9880d681SAndroid Build Coastguard Worker   case Instruction::ICmp:
97*9880d681SAndroid Build Coastguard Worker     ImmIdx = 1;
98*9880d681SAndroid Build Coastguard Worker     break;
99*9880d681SAndroid Build Coastguard Worker   // Always return TCC_Free for the shift value of a shift instruction.
100*9880d681SAndroid Build Coastguard Worker   case Instruction::Shl:
101*9880d681SAndroid Build Coastguard Worker   case Instruction::LShr:
102*9880d681SAndroid Build Coastguard Worker   case Instruction::AShr:
103*9880d681SAndroid Build Coastguard Worker     if (Idx == 1)
104*9880d681SAndroid Build Coastguard Worker       return TTI::TCC_Free;
105*9880d681SAndroid Build Coastguard Worker     break;
106*9880d681SAndroid Build Coastguard Worker   case Instruction::Trunc:
107*9880d681SAndroid Build Coastguard Worker   case Instruction::ZExt:
108*9880d681SAndroid Build Coastguard Worker   case Instruction::SExt:
109*9880d681SAndroid Build Coastguard Worker   case Instruction::IntToPtr:
110*9880d681SAndroid Build Coastguard Worker   case Instruction::PtrToInt:
111*9880d681SAndroid Build Coastguard Worker   case Instruction::BitCast:
112*9880d681SAndroid Build Coastguard Worker   case Instruction::PHI:
113*9880d681SAndroid Build Coastguard Worker   case Instruction::Call:
114*9880d681SAndroid Build Coastguard Worker   case Instruction::Select:
115*9880d681SAndroid Build Coastguard Worker   case Instruction::Ret:
116*9880d681SAndroid Build Coastguard Worker   case Instruction::Load:
117*9880d681SAndroid Build Coastguard Worker     break;
118*9880d681SAndroid Build Coastguard Worker   }
119*9880d681SAndroid Build Coastguard Worker 
120*9880d681SAndroid Build Coastguard Worker   if (Idx == ImmIdx) {
121*9880d681SAndroid Build Coastguard Worker     int NumConstants = (BitSize + 63) / 64;
122*9880d681SAndroid Build Coastguard Worker     int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
123*9880d681SAndroid Build Coastguard Worker     return (Cost <= NumConstants * TTI::TCC_Basic)
124*9880d681SAndroid Build Coastguard Worker                ? static_cast<int>(TTI::TCC_Free)
125*9880d681SAndroid Build Coastguard Worker                : Cost;
126*9880d681SAndroid Build Coastguard Worker   }
127*9880d681SAndroid Build Coastguard Worker   return AArch64TTIImpl::getIntImmCost(Imm, Ty);
128*9880d681SAndroid Build Coastguard Worker }
129*9880d681SAndroid Build Coastguard Worker 
getIntImmCost(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)130*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
131*9880d681SAndroid Build Coastguard Worker                                   const APInt &Imm, Type *Ty) {
132*9880d681SAndroid Build Coastguard Worker   assert(Ty->isIntegerTy());
133*9880d681SAndroid Build Coastguard Worker 
134*9880d681SAndroid Build Coastguard Worker   unsigned BitSize = Ty->getPrimitiveSizeInBits();
135*9880d681SAndroid Build Coastguard Worker   // There is no cost model for constants with a bit size of 0. Return TCC_Free
136*9880d681SAndroid Build Coastguard Worker   // here, so that constant hoisting will ignore this constant.
137*9880d681SAndroid Build Coastguard Worker   if (BitSize == 0)
138*9880d681SAndroid Build Coastguard Worker     return TTI::TCC_Free;
139*9880d681SAndroid Build Coastguard Worker 
140*9880d681SAndroid Build Coastguard Worker   switch (IID) {
141*9880d681SAndroid Build Coastguard Worker   default:
142*9880d681SAndroid Build Coastguard Worker     return TTI::TCC_Free;
143*9880d681SAndroid Build Coastguard Worker   case Intrinsic::sadd_with_overflow:
144*9880d681SAndroid Build Coastguard Worker   case Intrinsic::uadd_with_overflow:
145*9880d681SAndroid Build Coastguard Worker   case Intrinsic::ssub_with_overflow:
146*9880d681SAndroid Build Coastguard Worker   case Intrinsic::usub_with_overflow:
147*9880d681SAndroid Build Coastguard Worker   case Intrinsic::smul_with_overflow:
148*9880d681SAndroid Build Coastguard Worker   case Intrinsic::umul_with_overflow:
149*9880d681SAndroid Build Coastguard Worker     if (Idx == 1) {
150*9880d681SAndroid Build Coastguard Worker       int NumConstants = (BitSize + 63) / 64;
151*9880d681SAndroid Build Coastguard Worker       int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
152*9880d681SAndroid Build Coastguard Worker       return (Cost <= NumConstants * TTI::TCC_Basic)
153*9880d681SAndroid Build Coastguard Worker                  ? static_cast<int>(TTI::TCC_Free)
154*9880d681SAndroid Build Coastguard Worker                  : Cost;
155*9880d681SAndroid Build Coastguard Worker     }
156*9880d681SAndroid Build Coastguard Worker     break;
157*9880d681SAndroid Build Coastguard Worker   case Intrinsic::experimental_stackmap:
158*9880d681SAndroid Build Coastguard Worker     if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
159*9880d681SAndroid Build Coastguard Worker       return TTI::TCC_Free;
160*9880d681SAndroid Build Coastguard Worker     break;
161*9880d681SAndroid Build Coastguard Worker   case Intrinsic::experimental_patchpoint_void:
162*9880d681SAndroid Build Coastguard Worker   case Intrinsic::experimental_patchpoint_i64:
163*9880d681SAndroid Build Coastguard Worker     if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
164*9880d681SAndroid Build Coastguard Worker       return TTI::TCC_Free;
165*9880d681SAndroid Build Coastguard Worker     break;
166*9880d681SAndroid Build Coastguard Worker   }
167*9880d681SAndroid Build Coastguard Worker   return AArch64TTIImpl::getIntImmCost(Imm, Ty);
168*9880d681SAndroid Build Coastguard Worker }
169*9880d681SAndroid Build Coastguard Worker 
170*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::PopcntSupportKind
getPopcntSupport(unsigned TyWidth)171*9880d681SAndroid Build Coastguard Worker AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
172*9880d681SAndroid Build Coastguard Worker   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
173*9880d681SAndroid Build Coastguard Worker   if (TyWidth == 32 || TyWidth == 64)
174*9880d681SAndroid Build Coastguard Worker     return TTI::PSK_FastHardware;
175*9880d681SAndroid Build Coastguard Worker   // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
176*9880d681SAndroid Build Coastguard Worker   return TTI::PSK_Software;
177*9880d681SAndroid Build Coastguard Worker }
178*9880d681SAndroid Build Coastguard Worker 
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src)179*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
180*9880d681SAndroid Build Coastguard Worker   int ISD = TLI->InstructionOpcodeToISD(Opcode);
181*9880d681SAndroid Build Coastguard Worker   assert(ISD && "Invalid opcode");
182*9880d681SAndroid Build Coastguard Worker 
183*9880d681SAndroid Build Coastguard Worker   EVT SrcTy = TLI->getValueType(DL, Src);
184*9880d681SAndroid Build Coastguard Worker   EVT DstTy = TLI->getValueType(DL, Dst);
185*9880d681SAndroid Build Coastguard Worker 
186*9880d681SAndroid Build Coastguard Worker   if (!SrcTy.isSimple() || !DstTy.isSimple())
187*9880d681SAndroid Build Coastguard Worker     return BaseT::getCastInstrCost(Opcode, Dst, Src);
188*9880d681SAndroid Build Coastguard Worker 
189*9880d681SAndroid Build Coastguard Worker   static const TypeConversionCostTblEntry
190*9880d681SAndroid Build Coastguard Worker   ConversionTbl[] = {
191*9880d681SAndroid Build Coastguard Worker     { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32,  1 },
192*9880d681SAndroid Build Coastguard Worker     { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64,  0 },
193*9880d681SAndroid Build Coastguard Worker     { ISD::TRUNCATE, MVT::v8i8,  MVT::v8i32,  3 },
194*9880d681SAndroid Build Coastguard Worker     { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
195*9880d681SAndroid Build Coastguard Worker 
196*9880d681SAndroid Build Coastguard Worker     // The number of shll instructions for the extension.
197*9880d681SAndroid Build Coastguard Worker     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i16, 3 },
198*9880d681SAndroid Build Coastguard Worker     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i16, 3 },
199*9880d681SAndroid Build Coastguard Worker     { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i32, 2 },
200*9880d681SAndroid Build Coastguard Worker     { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i32, 2 },
201*9880d681SAndroid Build Coastguard Worker     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i8,  3 },
202*9880d681SAndroid Build Coastguard Worker     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i8,  3 },
203*9880d681SAndroid Build Coastguard Worker     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i16, 2 },
204*9880d681SAndroid Build Coastguard Worker     { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16, 2 },
205*9880d681SAndroid Build Coastguard Worker     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i8,  7 },
206*9880d681SAndroid Build Coastguard Worker     { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i8,  7 },
207*9880d681SAndroid Build Coastguard Worker     { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i16, 6 },
208*9880d681SAndroid Build Coastguard Worker     { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i16, 6 },
209*9880d681SAndroid Build Coastguard Worker     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
210*9880d681SAndroid Build Coastguard Worker     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
211*9880d681SAndroid Build Coastguard Worker     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
212*9880d681SAndroid Build Coastguard Worker     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
213*9880d681SAndroid Build Coastguard Worker 
214*9880d681SAndroid Build Coastguard Worker     // LowerVectorINT_TO_FP:
215*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
216*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
217*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
218*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
219*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
220*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
221*9880d681SAndroid Build Coastguard Worker 
222*9880d681SAndroid Build Coastguard Worker     // Complex: to v2f32
223*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8,  3 },
224*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
225*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
226*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8,  3 },
227*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
228*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
229*9880d681SAndroid Build Coastguard Worker 
230*9880d681SAndroid Build Coastguard Worker     // Complex: to v4f32
231*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8,  4 },
232*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
233*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8,  3 },
234*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
235*9880d681SAndroid Build Coastguard Worker 
236*9880d681SAndroid Build Coastguard Worker     // Complex: to v8f32
237*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8,  10 },
238*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
239*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8,  10 },
240*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
241*9880d681SAndroid Build Coastguard Worker 
242*9880d681SAndroid Build Coastguard Worker     // Complex: to v16f32
243*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
244*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
245*9880d681SAndroid Build Coastguard Worker 
246*9880d681SAndroid Build Coastguard Worker     // Complex: to v2f64
247*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8,  4 },
248*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
249*9880d681SAndroid Build Coastguard Worker     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
250*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8,  4 },
251*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
252*9880d681SAndroid Build Coastguard Worker     { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
253*9880d681SAndroid Build Coastguard Worker 
254*9880d681SAndroid Build Coastguard Worker 
255*9880d681SAndroid Build Coastguard Worker     // LowerVectorFP_TO_INT
256*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
257*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
258*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
259*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
260*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
261*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
262*9880d681SAndroid Build Coastguard Worker 
263*9880d681SAndroid Build Coastguard Worker     // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
264*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
265*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
266*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v2i8,  MVT::v2f32, 1 },
267*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
268*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
269*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v2i8,  MVT::v2f32, 1 },
270*9880d681SAndroid Build Coastguard Worker 
271*9880d681SAndroid Build Coastguard Worker     // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
272*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
273*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v4i8,  MVT::v4f32, 2 },
274*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
275*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v4i8,  MVT::v4f32, 2 },
276*9880d681SAndroid Build Coastguard Worker 
277*9880d681SAndroid Build Coastguard Worker     // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
278*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
279*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
280*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_SINT, MVT::v2i8,  MVT::v2f64, 2 },
281*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
282*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
283*9880d681SAndroid Build Coastguard Worker     { ISD::FP_TO_UINT, MVT::v2i8,  MVT::v2f64, 2 },
284*9880d681SAndroid Build Coastguard Worker   };
285*9880d681SAndroid Build Coastguard Worker 
286*9880d681SAndroid Build Coastguard Worker   if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
287*9880d681SAndroid Build Coastguard Worker                                                  DstTy.getSimpleVT(),
288*9880d681SAndroid Build Coastguard Worker                                                  SrcTy.getSimpleVT()))
289*9880d681SAndroid Build Coastguard Worker     return Entry->Cost;
290*9880d681SAndroid Build Coastguard Worker 
291*9880d681SAndroid Build Coastguard Worker   return BaseT::getCastInstrCost(Opcode, Dst, Src);
292*9880d681SAndroid Build Coastguard Worker }
293*9880d681SAndroid Build Coastguard Worker 
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)294*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
295*9880d681SAndroid Build Coastguard Worker                                              VectorType *VecTy,
296*9880d681SAndroid Build Coastguard Worker                                              unsigned Index) {
297*9880d681SAndroid Build Coastguard Worker 
298*9880d681SAndroid Build Coastguard Worker   // Make sure we were given a valid extend opcode.
299*9880d681SAndroid Build Coastguard Worker   assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
300*9880d681SAndroid Build Coastguard Worker          "Invalid opcode");
301*9880d681SAndroid Build Coastguard Worker 
302*9880d681SAndroid Build Coastguard Worker   // We are extending an element we extract from a vector, so the source type
303*9880d681SAndroid Build Coastguard Worker   // of the extend is the element type of the vector.
304*9880d681SAndroid Build Coastguard Worker   auto *Src = VecTy->getElementType();
305*9880d681SAndroid Build Coastguard Worker 
306*9880d681SAndroid Build Coastguard Worker   // Sign- and zero-extends are for integer types only.
307*9880d681SAndroid Build Coastguard Worker   assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
308*9880d681SAndroid Build Coastguard Worker 
309*9880d681SAndroid Build Coastguard Worker   // Get the cost for the extract. We compute the cost (if any) for the extend
310*9880d681SAndroid Build Coastguard Worker   // below.
311*9880d681SAndroid Build Coastguard Worker   auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
312*9880d681SAndroid Build Coastguard Worker 
313*9880d681SAndroid Build Coastguard Worker   // Legalize the types.
314*9880d681SAndroid Build Coastguard Worker   auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
315*9880d681SAndroid Build Coastguard Worker   auto DstVT = TLI->getValueType(DL, Dst);
316*9880d681SAndroid Build Coastguard Worker   auto SrcVT = TLI->getValueType(DL, Src);
317*9880d681SAndroid Build Coastguard Worker 
318*9880d681SAndroid Build Coastguard Worker   // If the resulting type is still a vector and the destination type is legal,
319*9880d681SAndroid Build Coastguard Worker   // we may get the extension for free. If not, get the default cost for the
320*9880d681SAndroid Build Coastguard Worker   // extend.
321*9880d681SAndroid Build Coastguard Worker   if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
322*9880d681SAndroid Build Coastguard Worker     return Cost + getCastInstrCost(Opcode, Dst, Src);
323*9880d681SAndroid Build Coastguard Worker 
324*9880d681SAndroid Build Coastguard Worker   // The destination type should be larger than the element type. If not, get
325*9880d681SAndroid Build Coastguard Worker   // the default cost for the extend.
326*9880d681SAndroid Build Coastguard Worker   if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
327*9880d681SAndroid Build Coastguard Worker     return Cost + getCastInstrCost(Opcode, Dst, Src);
328*9880d681SAndroid Build Coastguard Worker 
329*9880d681SAndroid Build Coastguard Worker   switch (Opcode) {
330*9880d681SAndroid Build Coastguard Worker   default:
331*9880d681SAndroid Build Coastguard Worker     llvm_unreachable("Opcode should be either SExt or ZExt");
332*9880d681SAndroid Build Coastguard Worker 
333*9880d681SAndroid Build Coastguard Worker   // For sign-extends, we only need a smov, which performs the extension
334*9880d681SAndroid Build Coastguard Worker   // automatically.
335*9880d681SAndroid Build Coastguard Worker   case Instruction::SExt:
336*9880d681SAndroid Build Coastguard Worker     return Cost;
337*9880d681SAndroid Build Coastguard Worker 
338*9880d681SAndroid Build Coastguard Worker   // For zero-extends, the extend is performed automatically by a umov unless
339*9880d681SAndroid Build Coastguard Worker   // the destination type is i64 and the element type is i8 or i16.
340*9880d681SAndroid Build Coastguard Worker   case Instruction::ZExt:
341*9880d681SAndroid Build Coastguard Worker     if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
342*9880d681SAndroid Build Coastguard Worker       return Cost;
343*9880d681SAndroid Build Coastguard Worker   }
344*9880d681SAndroid Build Coastguard Worker 
345*9880d681SAndroid Build Coastguard Worker   // If we are unable to perform the extend for free, get the default cost.
346*9880d681SAndroid Build Coastguard Worker   return Cost + getCastInstrCost(Opcode, Dst, Src);
347*9880d681SAndroid Build Coastguard Worker }
348*9880d681SAndroid Build Coastguard Worker 
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)349*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
350*9880d681SAndroid Build Coastguard Worker                                        unsigned Index) {
351*9880d681SAndroid Build Coastguard Worker   assert(Val->isVectorTy() && "This must be a vector type");
352*9880d681SAndroid Build Coastguard Worker 
353*9880d681SAndroid Build Coastguard Worker   if (Index != -1U) {
354*9880d681SAndroid Build Coastguard Worker     // Legalize the type.
355*9880d681SAndroid Build Coastguard Worker     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
356*9880d681SAndroid Build Coastguard Worker 
357*9880d681SAndroid Build Coastguard Worker     // This type is legalized to a scalar type.
358*9880d681SAndroid Build Coastguard Worker     if (!LT.second.isVector())
359*9880d681SAndroid Build Coastguard Worker       return 0;
360*9880d681SAndroid Build Coastguard Worker 
361*9880d681SAndroid Build Coastguard Worker     // The type may be split. Normalize the index to the new type.
362*9880d681SAndroid Build Coastguard Worker     unsigned Width = LT.second.getVectorNumElements();
363*9880d681SAndroid Build Coastguard Worker     Index = Index % Width;
364*9880d681SAndroid Build Coastguard Worker 
365*9880d681SAndroid Build Coastguard Worker     // The element at index zero is already inside the vector.
366*9880d681SAndroid Build Coastguard Worker     if (Index == 0)
367*9880d681SAndroid Build Coastguard Worker       return 0;
368*9880d681SAndroid Build Coastguard Worker   }
369*9880d681SAndroid Build Coastguard Worker 
370*9880d681SAndroid Build Coastguard Worker   // All other insert/extracts cost this much.
371*9880d681SAndroid Build Coastguard Worker   return ST->getVectorInsertExtractBaseCost();
372*9880d681SAndroid Build Coastguard Worker }
373*9880d681SAndroid Build Coastguard Worker 
getArithmeticInstrCost(unsigned Opcode,Type * Ty,TTI::OperandValueKind Opd1Info,TTI::OperandValueKind Opd2Info,TTI::OperandValueProperties Opd1PropInfo,TTI::OperandValueProperties Opd2PropInfo)374*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getArithmeticInstrCost(
375*9880d681SAndroid Build Coastguard Worker     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
376*9880d681SAndroid Build Coastguard Worker     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
377*9880d681SAndroid Build Coastguard Worker     TTI::OperandValueProperties Opd2PropInfo) {
378*9880d681SAndroid Build Coastguard Worker   // Legalize the type.
379*9880d681SAndroid Build Coastguard Worker   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
380*9880d681SAndroid Build Coastguard Worker 
381*9880d681SAndroid Build Coastguard Worker   int ISD = TLI->InstructionOpcodeToISD(Opcode);
382*9880d681SAndroid Build Coastguard Worker 
383*9880d681SAndroid Build Coastguard Worker   if (ISD == ISD::SDIV &&
384*9880d681SAndroid Build Coastguard Worker       Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
385*9880d681SAndroid Build Coastguard Worker       Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
386*9880d681SAndroid Build Coastguard Worker     // On AArch64, scalar signed division by constants power-of-two are
387*9880d681SAndroid Build Coastguard Worker     // normally expanded to the sequence ADD + CMP + SELECT + SRA.
388*9880d681SAndroid Build Coastguard Worker     // The OperandValue properties many not be same as that of previous
389*9880d681SAndroid Build Coastguard Worker     // operation; conservatively assume OP_None.
390*9880d681SAndroid Build Coastguard Worker     int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
391*9880d681SAndroid Build Coastguard Worker                                       TargetTransformInfo::OP_None,
392*9880d681SAndroid Build Coastguard Worker                                       TargetTransformInfo::OP_None);
393*9880d681SAndroid Build Coastguard Worker     Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
394*9880d681SAndroid Build Coastguard Worker                                    TargetTransformInfo::OP_None,
395*9880d681SAndroid Build Coastguard Worker                                    TargetTransformInfo::OP_None);
396*9880d681SAndroid Build Coastguard Worker     Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
397*9880d681SAndroid Build Coastguard Worker                                    TargetTransformInfo::OP_None,
398*9880d681SAndroid Build Coastguard Worker                                    TargetTransformInfo::OP_None);
399*9880d681SAndroid Build Coastguard Worker     Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
400*9880d681SAndroid Build Coastguard Worker                                    TargetTransformInfo::OP_None,
401*9880d681SAndroid Build Coastguard Worker                                    TargetTransformInfo::OP_None);
402*9880d681SAndroid Build Coastguard Worker     return Cost;
403*9880d681SAndroid Build Coastguard Worker   }
404*9880d681SAndroid Build Coastguard Worker 
405*9880d681SAndroid Build Coastguard Worker   switch (ISD) {
406*9880d681SAndroid Build Coastguard Worker   default:
407*9880d681SAndroid Build Coastguard Worker     return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
408*9880d681SAndroid Build Coastguard Worker                                          Opd1PropInfo, Opd2PropInfo);
409*9880d681SAndroid Build Coastguard Worker   case ISD::ADD:
410*9880d681SAndroid Build Coastguard Worker   case ISD::MUL:
411*9880d681SAndroid Build Coastguard Worker   case ISD::XOR:
412*9880d681SAndroid Build Coastguard Worker   case ISD::OR:
413*9880d681SAndroid Build Coastguard Worker   case ISD::AND:
414*9880d681SAndroid Build Coastguard Worker     // These nodes are marked as 'custom' for combining purposes only.
415*9880d681SAndroid Build Coastguard Worker     // We know that they are legal. See LowerAdd in ISelLowering.
416*9880d681SAndroid Build Coastguard Worker     return 1 * LT.first;
417*9880d681SAndroid Build Coastguard Worker   }
418*9880d681SAndroid Build Coastguard Worker }
419*9880d681SAndroid Build Coastguard Worker 
getAddressComputationCost(Type * Ty,bool IsComplex)420*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
421*9880d681SAndroid Build Coastguard Worker   // Address computations in vectorized code with non-consecutive addresses will
422*9880d681SAndroid Build Coastguard Worker   // likely result in more instructions compared to scalar code where the
423*9880d681SAndroid Build Coastguard Worker   // computation can more often be merged into the index mode. The resulting
424*9880d681SAndroid Build Coastguard Worker   // extra micro-ops can significantly decrease throughput.
425*9880d681SAndroid Build Coastguard Worker   unsigned NumVectorInstToHideOverhead = 10;
426*9880d681SAndroid Build Coastguard Worker 
427*9880d681SAndroid Build Coastguard Worker   if (Ty->isVectorTy() && IsComplex)
428*9880d681SAndroid Build Coastguard Worker     return NumVectorInstToHideOverhead;
429*9880d681SAndroid Build Coastguard Worker 
430*9880d681SAndroid Build Coastguard Worker   // In many cases the address computation is not merged into the instruction
431*9880d681SAndroid Build Coastguard Worker   // addressing mode.
432*9880d681SAndroid Build Coastguard Worker   return 1;
433*9880d681SAndroid Build Coastguard Worker }
434*9880d681SAndroid Build Coastguard Worker 
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy)435*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
436*9880d681SAndroid Build Coastguard Worker                                        Type *CondTy) {
437*9880d681SAndroid Build Coastguard Worker 
438*9880d681SAndroid Build Coastguard Worker   int ISD = TLI->InstructionOpcodeToISD(Opcode);
439*9880d681SAndroid Build Coastguard Worker   // We don't lower some vector selects well that are wider than the register
440*9880d681SAndroid Build Coastguard Worker   // width.
441*9880d681SAndroid Build Coastguard Worker   if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
442*9880d681SAndroid Build Coastguard Worker     // We would need this many instructions to hide the scalarization happening.
443*9880d681SAndroid Build Coastguard Worker     const int AmortizationCost = 20;
444*9880d681SAndroid Build Coastguard Worker     static const TypeConversionCostTblEntry
445*9880d681SAndroid Build Coastguard Worker     VectorSelectTbl[] = {
446*9880d681SAndroid Build Coastguard Worker       { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
447*9880d681SAndroid Build Coastguard Worker       { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
448*9880d681SAndroid Build Coastguard Worker       { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
449*9880d681SAndroid Build Coastguard Worker       { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
450*9880d681SAndroid Build Coastguard Worker       { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
451*9880d681SAndroid Build Coastguard Worker       { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
452*9880d681SAndroid Build Coastguard Worker     };
453*9880d681SAndroid Build Coastguard Worker 
454*9880d681SAndroid Build Coastguard Worker     EVT SelCondTy = TLI->getValueType(DL, CondTy);
455*9880d681SAndroid Build Coastguard Worker     EVT SelValTy = TLI->getValueType(DL, ValTy);
456*9880d681SAndroid Build Coastguard Worker     if (SelCondTy.isSimple() && SelValTy.isSimple()) {
457*9880d681SAndroid Build Coastguard Worker       if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
458*9880d681SAndroid Build Coastguard Worker                                                      SelCondTy.getSimpleVT(),
459*9880d681SAndroid Build Coastguard Worker                                                      SelValTy.getSimpleVT()))
460*9880d681SAndroid Build Coastguard Worker         return Entry->Cost;
461*9880d681SAndroid Build Coastguard Worker     }
462*9880d681SAndroid Build Coastguard Worker   }
463*9880d681SAndroid Build Coastguard Worker   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
464*9880d681SAndroid Build Coastguard Worker }
465*9880d681SAndroid Build Coastguard Worker 
getMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)466*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
467*9880d681SAndroid Build Coastguard Worker                                     unsigned Alignment, unsigned AddressSpace) {
468*9880d681SAndroid Build Coastguard Worker   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
469*9880d681SAndroid Build Coastguard Worker 
470*9880d681SAndroid Build Coastguard Worker   if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
471*9880d681SAndroid Build Coastguard Worker       Src->getVectorElementType()->isIntegerTy(64)) {
472*9880d681SAndroid Build Coastguard Worker     // Unaligned stores are extremely inefficient. We don't split
473*9880d681SAndroid Build Coastguard Worker     // unaligned v2i64 stores because the negative impact that has shown in
474*9880d681SAndroid Build Coastguard Worker     // practice on inlined memcpy code.
475*9880d681SAndroid Build Coastguard Worker     // We make v2i64 stores expensive so that we will only vectorize if there
476*9880d681SAndroid Build Coastguard Worker     // are 6 other instructions getting vectorized.
477*9880d681SAndroid Build Coastguard Worker     int AmortizationCost = 6;
478*9880d681SAndroid Build Coastguard Worker 
479*9880d681SAndroid Build Coastguard Worker     return LT.first * 2 * AmortizationCost;
480*9880d681SAndroid Build Coastguard Worker   }
481*9880d681SAndroid Build Coastguard Worker 
482*9880d681SAndroid Build Coastguard Worker   if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
483*9880d681SAndroid Build Coastguard Worker       Src->getVectorNumElements() < 8) {
484*9880d681SAndroid Build Coastguard Worker     // We scalarize the loads/stores because there is not v.4b register and we
485*9880d681SAndroid Build Coastguard Worker     // have to promote the elements to v.4h.
486*9880d681SAndroid Build Coastguard Worker     unsigned NumVecElts = Src->getVectorNumElements();
487*9880d681SAndroid Build Coastguard Worker     unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
488*9880d681SAndroid Build Coastguard Worker     // We generate 2 instructions per vector element.
489*9880d681SAndroid Build Coastguard Worker     return NumVectorizableInstsToAmortize * NumVecElts * 2;
490*9880d681SAndroid Build Coastguard Worker   }
491*9880d681SAndroid Build Coastguard Worker 
492*9880d681SAndroid Build Coastguard Worker   return LT.first;
493*9880d681SAndroid Build Coastguard Worker }
494*9880d681SAndroid Build Coastguard Worker 
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,unsigned Alignment,unsigned AddressSpace)495*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
496*9880d681SAndroid Build Coastguard Worker                                                unsigned Factor,
497*9880d681SAndroid Build Coastguard Worker                                                ArrayRef<unsigned> Indices,
498*9880d681SAndroid Build Coastguard Worker                                                unsigned Alignment,
499*9880d681SAndroid Build Coastguard Worker                                                unsigned AddressSpace) {
500*9880d681SAndroid Build Coastguard Worker   assert(Factor >= 2 && "Invalid interleave factor");
501*9880d681SAndroid Build Coastguard Worker   assert(isa<VectorType>(VecTy) && "Expect a vector type");
502*9880d681SAndroid Build Coastguard Worker 
503*9880d681SAndroid Build Coastguard Worker   if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
504*9880d681SAndroid Build Coastguard Worker     unsigned NumElts = VecTy->getVectorNumElements();
505*9880d681SAndroid Build Coastguard Worker     Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
506*9880d681SAndroid Build Coastguard Worker     unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
507*9880d681SAndroid Build Coastguard Worker 
508*9880d681SAndroid Build Coastguard Worker     // ldN/stN only support legal vector types of size 64 or 128 in bits.
509*9880d681SAndroid Build Coastguard Worker     if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
510*9880d681SAndroid Build Coastguard Worker       return Factor;
511*9880d681SAndroid Build Coastguard Worker   }
512*9880d681SAndroid Build Coastguard Worker 
513*9880d681SAndroid Build Coastguard Worker   return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
514*9880d681SAndroid Build Coastguard Worker                                            Alignment, AddressSpace);
515*9880d681SAndroid Build Coastguard Worker }
516*9880d681SAndroid Build Coastguard Worker 
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)517*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
518*9880d681SAndroid Build Coastguard Worker   int Cost = 0;
519*9880d681SAndroid Build Coastguard Worker   for (auto *I : Tys) {
520*9880d681SAndroid Build Coastguard Worker     if (!I->isVectorTy())
521*9880d681SAndroid Build Coastguard Worker       continue;
522*9880d681SAndroid Build Coastguard Worker     if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
523*9880d681SAndroid Build Coastguard Worker       Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
524*9880d681SAndroid Build Coastguard Worker         getMemoryOpCost(Instruction::Load, I, 128, 0);
525*9880d681SAndroid Build Coastguard Worker   }
526*9880d681SAndroid Build Coastguard Worker   return Cost;
527*9880d681SAndroid Build Coastguard Worker }
528*9880d681SAndroid Build Coastguard Worker 
getMaxInterleaveFactor(unsigned VF)529*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
530*9880d681SAndroid Build Coastguard Worker   return ST->getMaxInterleaveFactor();
531*9880d681SAndroid Build Coastguard Worker }
532*9880d681SAndroid Build Coastguard Worker 
getUnrollingPreferences(Loop * L,TTI::UnrollingPreferences & UP)533*9880d681SAndroid Build Coastguard Worker void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
534*9880d681SAndroid Build Coastguard Worker                                              TTI::UnrollingPreferences &UP) {
535*9880d681SAndroid Build Coastguard Worker   // Enable partial unrolling and runtime unrolling.
536*9880d681SAndroid Build Coastguard Worker   BaseT::getUnrollingPreferences(L, UP);
537*9880d681SAndroid Build Coastguard Worker 
538*9880d681SAndroid Build Coastguard Worker   // For inner loop, it is more likely to be a hot one, and the runtime check
539*9880d681SAndroid Build Coastguard Worker   // can be promoted out from LICM pass, so the overhead is less, let's try
540*9880d681SAndroid Build Coastguard Worker   // a larger threshold to unroll more loops.
541*9880d681SAndroid Build Coastguard Worker   if (L->getLoopDepth() > 1)
542*9880d681SAndroid Build Coastguard Worker     UP.PartialThreshold *= 2;
543*9880d681SAndroid Build Coastguard Worker 
544*9880d681SAndroid Build Coastguard Worker   // Disable partial & runtime unrolling on -Os.
545*9880d681SAndroid Build Coastguard Worker   UP.PartialOptSizeThreshold = 0;
546*9880d681SAndroid Build Coastguard Worker }
547*9880d681SAndroid Build Coastguard Worker 
getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)548*9880d681SAndroid Build Coastguard Worker Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
549*9880d681SAndroid Build Coastguard Worker                                                          Type *ExpectedType) {
550*9880d681SAndroid Build Coastguard Worker   switch (Inst->getIntrinsicID()) {
551*9880d681SAndroid Build Coastguard Worker   default:
552*9880d681SAndroid Build Coastguard Worker     return nullptr;
553*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_st2:
554*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_st3:
555*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_st4: {
556*9880d681SAndroid Build Coastguard Worker     // Create a struct type
557*9880d681SAndroid Build Coastguard Worker     StructType *ST = dyn_cast<StructType>(ExpectedType);
558*9880d681SAndroid Build Coastguard Worker     if (!ST)
559*9880d681SAndroid Build Coastguard Worker       return nullptr;
560*9880d681SAndroid Build Coastguard Worker     unsigned NumElts = Inst->getNumArgOperands() - 1;
561*9880d681SAndroid Build Coastguard Worker     if (ST->getNumElements() != NumElts)
562*9880d681SAndroid Build Coastguard Worker       return nullptr;
563*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0, e = NumElts; i != e; ++i) {
564*9880d681SAndroid Build Coastguard Worker       if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
565*9880d681SAndroid Build Coastguard Worker         return nullptr;
566*9880d681SAndroid Build Coastguard Worker     }
567*9880d681SAndroid Build Coastguard Worker     Value *Res = UndefValue::get(ExpectedType);
568*9880d681SAndroid Build Coastguard Worker     IRBuilder<> Builder(Inst);
569*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0, e = NumElts; i != e; ++i) {
570*9880d681SAndroid Build Coastguard Worker       Value *L = Inst->getArgOperand(i);
571*9880d681SAndroid Build Coastguard Worker       Res = Builder.CreateInsertValue(Res, L, i);
572*9880d681SAndroid Build Coastguard Worker     }
573*9880d681SAndroid Build Coastguard Worker     return Res;
574*9880d681SAndroid Build Coastguard Worker   }
575*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_ld2:
576*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_ld3:
577*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_ld4:
578*9880d681SAndroid Build Coastguard Worker     if (Inst->getType() == ExpectedType)
579*9880d681SAndroid Build Coastguard Worker       return Inst;
580*9880d681SAndroid Build Coastguard Worker     return nullptr;
581*9880d681SAndroid Build Coastguard Worker   }
582*9880d681SAndroid Build Coastguard Worker }
583*9880d681SAndroid Build Coastguard Worker 
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)584*9880d681SAndroid Build Coastguard Worker bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
585*9880d681SAndroid Build Coastguard Worker                                         MemIntrinsicInfo &Info) {
586*9880d681SAndroid Build Coastguard Worker   switch (Inst->getIntrinsicID()) {
587*9880d681SAndroid Build Coastguard Worker   default:
588*9880d681SAndroid Build Coastguard Worker     break;
589*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_ld2:
590*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_ld3:
591*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_ld4:
592*9880d681SAndroid Build Coastguard Worker     Info.ReadMem = true;
593*9880d681SAndroid Build Coastguard Worker     Info.WriteMem = false;
594*9880d681SAndroid Build Coastguard Worker     Info.IsSimple = true;
595*9880d681SAndroid Build Coastguard Worker     Info.NumMemRefs = 1;
596*9880d681SAndroid Build Coastguard Worker     Info.PtrVal = Inst->getArgOperand(0);
597*9880d681SAndroid Build Coastguard Worker     break;
598*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_st2:
599*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_st3:
600*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_st4:
601*9880d681SAndroid Build Coastguard Worker     Info.ReadMem = false;
602*9880d681SAndroid Build Coastguard Worker     Info.WriteMem = true;
603*9880d681SAndroid Build Coastguard Worker     Info.IsSimple = true;
604*9880d681SAndroid Build Coastguard Worker     Info.NumMemRefs = 1;
605*9880d681SAndroid Build Coastguard Worker     Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
606*9880d681SAndroid Build Coastguard Worker     break;
607*9880d681SAndroid Build Coastguard Worker   }
608*9880d681SAndroid Build Coastguard Worker 
609*9880d681SAndroid Build Coastguard Worker   switch (Inst->getIntrinsicID()) {
610*9880d681SAndroid Build Coastguard Worker   default:
611*9880d681SAndroid Build Coastguard Worker     return false;
612*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_ld2:
613*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_st2:
614*9880d681SAndroid Build Coastguard Worker     Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
615*9880d681SAndroid Build Coastguard Worker     break;
616*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_ld3:
617*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_st3:
618*9880d681SAndroid Build Coastguard Worker     Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
619*9880d681SAndroid Build Coastguard Worker     break;
620*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_ld4:
621*9880d681SAndroid Build Coastguard Worker   case Intrinsic::aarch64_neon_st4:
622*9880d681SAndroid Build Coastguard Worker     Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
623*9880d681SAndroid Build Coastguard Worker     break;
624*9880d681SAndroid Build Coastguard Worker   }
625*9880d681SAndroid Build Coastguard Worker   return true;
626*9880d681SAndroid Build Coastguard Worker }
627*9880d681SAndroid Build Coastguard Worker 
getCacheLineSize()628*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getCacheLineSize() {
629*9880d681SAndroid Build Coastguard Worker   return ST->getCacheLineSize();
630*9880d681SAndroid Build Coastguard Worker }
631*9880d681SAndroid Build Coastguard Worker 
getPrefetchDistance()632*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getPrefetchDistance() {
633*9880d681SAndroid Build Coastguard Worker   return ST->getPrefetchDistance();
634*9880d681SAndroid Build Coastguard Worker }
635*9880d681SAndroid Build Coastguard Worker 
getMinPrefetchStride()636*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getMinPrefetchStride() {
637*9880d681SAndroid Build Coastguard Worker   return ST->getMinPrefetchStride();
638*9880d681SAndroid Build Coastguard Worker }
639*9880d681SAndroid Build Coastguard Worker 
getMaxPrefetchIterationsAhead()640*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
641*9880d681SAndroid Build Coastguard Worker   return ST->getMaxPrefetchIterationsAhead();
642*9880d681SAndroid Build Coastguard Worker }
643