1*9880d681SAndroid Build Coastguard Worker //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker
10*9880d681SAndroid Build Coastguard Worker #include "AArch64TargetTransformInfo.h"
11*9880d681SAndroid Build Coastguard Worker #include "MCTargetDesc/AArch64AddressingModes.h"
12*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/TargetTransformInfo.h"
13*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/LoopInfo.h"
14*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/BasicTTIImpl.h"
15*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
16*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/CostTable.h"
17*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetLowering.h"
18*9880d681SAndroid Build Coastguard Worker #include <algorithm>
19*9880d681SAndroid Build Coastguard Worker using namespace llvm;
20*9880d681SAndroid Build Coastguard Worker
21*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "aarch64tti"
22*9880d681SAndroid Build Coastguard Worker
23*9880d681SAndroid Build Coastguard Worker /// \brief Calculate the cost of materializing a 64-bit value. This helper
24*9880d681SAndroid Build Coastguard Worker /// method might only calculate a fraction of a larger immediate. Therefore it
25*9880d681SAndroid Build Coastguard Worker /// is valid to return a cost of ZERO.
getIntImmCost(int64_t Val)26*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getIntImmCost(int64_t Val) {
27*9880d681SAndroid Build Coastguard Worker // Check if the immediate can be encoded within an instruction.
28*9880d681SAndroid Build Coastguard Worker if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
29*9880d681SAndroid Build Coastguard Worker return 0;
30*9880d681SAndroid Build Coastguard Worker
31*9880d681SAndroid Build Coastguard Worker if (Val < 0)
32*9880d681SAndroid Build Coastguard Worker Val = ~Val;
33*9880d681SAndroid Build Coastguard Worker
34*9880d681SAndroid Build Coastguard Worker // Calculate how many moves we will need to materialize this constant.
35*9880d681SAndroid Build Coastguard Worker unsigned LZ = countLeadingZeros((uint64_t)Val);
36*9880d681SAndroid Build Coastguard Worker return (64 - LZ + 15) / 16;
37*9880d681SAndroid Build Coastguard Worker }
38*9880d681SAndroid Build Coastguard Worker
39*9880d681SAndroid Build Coastguard Worker /// \brief Calculate the cost of materializing the given constant.
getIntImmCost(const APInt & Imm,Type * Ty)40*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
41*9880d681SAndroid Build Coastguard Worker assert(Ty->isIntegerTy());
42*9880d681SAndroid Build Coastguard Worker
43*9880d681SAndroid Build Coastguard Worker unsigned BitSize = Ty->getPrimitiveSizeInBits();
44*9880d681SAndroid Build Coastguard Worker if (BitSize == 0)
45*9880d681SAndroid Build Coastguard Worker return ~0U;
46*9880d681SAndroid Build Coastguard Worker
47*9880d681SAndroid Build Coastguard Worker // Sign-extend all constants to a multiple of 64-bit.
48*9880d681SAndroid Build Coastguard Worker APInt ImmVal = Imm;
49*9880d681SAndroid Build Coastguard Worker if (BitSize & 0x3f)
50*9880d681SAndroid Build Coastguard Worker ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Worker // Split the constant into 64-bit chunks and calculate the cost for each
53*9880d681SAndroid Build Coastguard Worker // chunk.
54*9880d681SAndroid Build Coastguard Worker int Cost = 0;
55*9880d681SAndroid Build Coastguard Worker for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
56*9880d681SAndroid Build Coastguard Worker APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
57*9880d681SAndroid Build Coastguard Worker int64_t Val = Tmp.getSExtValue();
58*9880d681SAndroid Build Coastguard Worker Cost += getIntImmCost(Val);
59*9880d681SAndroid Build Coastguard Worker }
60*9880d681SAndroid Build Coastguard Worker // We need at least one instruction to materialze the constant.
61*9880d681SAndroid Build Coastguard Worker return std::max(1, Cost);
62*9880d681SAndroid Build Coastguard Worker }
63*9880d681SAndroid Build Coastguard Worker
getIntImmCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)64*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
65*9880d681SAndroid Build Coastguard Worker const APInt &Imm, Type *Ty) {
66*9880d681SAndroid Build Coastguard Worker assert(Ty->isIntegerTy());
67*9880d681SAndroid Build Coastguard Worker
68*9880d681SAndroid Build Coastguard Worker unsigned BitSize = Ty->getPrimitiveSizeInBits();
69*9880d681SAndroid Build Coastguard Worker // There is no cost model for constants with a bit size of 0. Return TCC_Free
70*9880d681SAndroid Build Coastguard Worker // here, so that constant hoisting will ignore this constant.
71*9880d681SAndroid Build Coastguard Worker if (BitSize == 0)
72*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
73*9880d681SAndroid Build Coastguard Worker
74*9880d681SAndroid Build Coastguard Worker unsigned ImmIdx = ~0U;
75*9880d681SAndroid Build Coastguard Worker switch (Opcode) {
76*9880d681SAndroid Build Coastguard Worker default:
77*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
78*9880d681SAndroid Build Coastguard Worker case Instruction::GetElementPtr:
79*9880d681SAndroid Build Coastguard Worker // Always hoist the base address of a GetElementPtr.
80*9880d681SAndroid Build Coastguard Worker if (Idx == 0)
81*9880d681SAndroid Build Coastguard Worker return 2 * TTI::TCC_Basic;
82*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
83*9880d681SAndroid Build Coastguard Worker case Instruction::Store:
84*9880d681SAndroid Build Coastguard Worker ImmIdx = 0;
85*9880d681SAndroid Build Coastguard Worker break;
86*9880d681SAndroid Build Coastguard Worker case Instruction::Add:
87*9880d681SAndroid Build Coastguard Worker case Instruction::Sub:
88*9880d681SAndroid Build Coastguard Worker case Instruction::Mul:
89*9880d681SAndroid Build Coastguard Worker case Instruction::UDiv:
90*9880d681SAndroid Build Coastguard Worker case Instruction::SDiv:
91*9880d681SAndroid Build Coastguard Worker case Instruction::URem:
92*9880d681SAndroid Build Coastguard Worker case Instruction::SRem:
93*9880d681SAndroid Build Coastguard Worker case Instruction::And:
94*9880d681SAndroid Build Coastguard Worker case Instruction::Or:
95*9880d681SAndroid Build Coastguard Worker case Instruction::Xor:
96*9880d681SAndroid Build Coastguard Worker case Instruction::ICmp:
97*9880d681SAndroid Build Coastguard Worker ImmIdx = 1;
98*9880d681SAndroid Build Coastguard Worker break;
99*9880d681SAndroid Build Coastguard Worker // Always return TCC_Free for the shift value of a shift instruction.
100*9880d681SAndroid Build Coastguard Worker case Instruction::Shl:
101*9880d681SAndroid Build Coastguard Worker case Instruction::LShr:
102*9880d681SAndroid Build Coastguard Worker case Instruction::AShr:
103*9880d681SAndroid Build Coastguard Worker if (Idx == 1)
104*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
105*9880d681SAndroid Build Coastguard Worker break;
106*9880d681SAndroid Build Coastguard Worker case Instruction::Trunc:
107*9880d681SAndroid Build Coastguard Worker case Instruction::ZExt:
108*9880d681SAndroid Build Coastguard Worker case Instruction::SExt:
109*9880d681SAndroid Build Coastguard Worker case Instruction::IntToPtr:
110*9880d681SAndroid Build Coastguard Worker case Instruction::PtrToInt:
111*9880d681SAndroid Build Coastguard Worker case Instruction::BitCast:
112*9880d681SAndroid Build Coastguard Worker case Instruction::PHI:
113*9880d681SAndroid Build Coastguard Worker case Instruction::Call:
114*9880d681SAndroid Build Coastguard Worker case Instruction::Select:
115*9880d681SAndroid Build Coastguard Worker case Instruction::Ret:
116*9880d681SAndroid Build Coastguard Worker case Instruction::Load:
117*9880d681SAndroid Build Coastguard Worker break;
118*9880d681SAndroid Build Coastguard Worker }
119*9880d681SAndroid Build Coastguard Worker
120*9880d681SAndroid Build Coastguard Worker if (Idx == ImmIdx) {
121*9880d681SAndroid Build Coastguard Worker int NumConstants = (BitSize + 63) / 64;
122*9880d681SAndroid Build Coastguard Worker int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
123*9880d681SAndroid Build Coastguard Worker return (Cost <= NumConstants * TTI::TCC_Basic)
124*9880d681SAndroid Build Coastguard Worker ? static_cast<int>(TTI::TCC_Free)
125*9880d681SAndroid Build Coastguard Worker : Cost;
126*9880d681SAndroid Build Coastguard Worker }
127*9880d681SAndroid Build Coastguard Worker return AArch64TTIImpl::getIntImmCost(Imm, Ty);
128*9880d681SAndroid Build Coastguard Worker }
129*9880d681SAndroid Build Coastguard Worker
getIntImmCost(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)130*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
131*9880d681SAndroid Build Coastguard Worker const APInt &Imm, Type *Ty) {
132*9880d681SAndroid Build Coastguard Worker assert(Ty->isIntegerTy());
133*9880d681SAndroid Build Coastguard Worker
134*9880d681SAndroid Build Coastguard Worker unsigned BitSize = Ty->getPrimitiveSizeInBits();
135*9880d681SAndroid Build Coastguard Worker // There is no cost model for constants with a bit size of 0. Return TCC_Free
136*9880d681SAndroid Build Coastguard Worker // here, so that constant hoisting will ignore this constant.
137*9880d681SAndroid Build Coastguard Worker if (BitSize == 0)
138*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
139*9880d681SAndroid Build Coastguard Worker
140*9880d681SAndroid Build Coastguard Worker switch (IID) {
141*9880d681SAndroid Build Coastguard Worker default:
142*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
143*9880d681SAndroid Build Coastguard Worker case Intrinsic::sadd_with_overflow:
144*9880d681SAndroid Build Coastguard Worker case Intrinsic::uadd_with_overflow:
145*9880d681SAndroid Build Coastguard Worker case Intrinsic::ssub_with_overflow:
146*9880d681SAndroid Build Coastguard Worker case Intrinsic::usub_with_overflow:
147*9880d681SAndroid Build Coastguard Worker case Intrinsic::smul_with_overflow:
148*9880d681SAndroid Build Coastguard Worker case Intrinsic::umul_with_overflow:
149*9880d681SAndroid Build Coastguard Worker if (Idx == 1) {
150*9880d681SAndroid Build Coastguard Worker int NumConstants = (BitSize + 63) / 64;
151*9880d681SAndroid Build Coastguard Worker int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
152*9880d681SAndroid Build Coastguard Worker return (Cost <= NumConstants * TTI::TCC_Basic)
153*9880d681SAndroid Build Coastguard Worker ? static_cast<int>(TTI::TCC_Free)
154*9880d681SAndroid Build Coastguard Worker : Cost;
155*9880d681SAndroid Build Coastguard Worker }
156*9880d681SAndroid Build Coastguard Worker break;
157*9880d681SAndroid Build Coastguard Worker case Intrinsic::experimental_stackmap:
158*9880d681SAndroid Build Coastguard Worker if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
159*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
160*9880d681SAndroid Build Coastguard Worker break;
161*9880d681SAndroid Build Coastguard Worker case Intrinsic::experimental_patchpoint_void:
162*9880d681SAndroid Build Coastguard Worker case Intrinsic::experimental_patchpoint_i64:
163*9880d681SAndroid Build Coastguard Worker if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
164*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
165*9880d681SAndroid Build Coastguard Worker break;
166*9880d681SAndroid Build Coastguard Worker }
167*9880d681SAndroid Build Coastguard Worker return AArch64TTIImpl::getIntImmCost(Imm, Ty);
168*9880d681SAndroid Build Coastguard Worker }
169*9880d681SAndroid Build Coastguard Worker
170*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::PopcntSupportKind
getPopcntSupport(unsigned TyWidth)171*9880d681SAndroid Build Coastguard Worker AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
172*9880d681SAndroid Build Coastguard Worker assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
173*9880d681SAndroid Build Coastguard Worker if (TyWidth == 32 || TyWidth == 64)
174*9880d681SAndroid Build Coastguard Worker return TTI::PSK_FastHardware;
175*9880d681SAndroid Build Coastguard Worker // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
176*9880d681SAndroid Build Coastguard Worker return TTI::PSK_Software;
177*9880d681SAndroid Build Coastguard Worker }
178*9880d681SAndroid Build Coastguard Worker
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src)179*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
180*9880d681SAndroid Build Coastguard Worker int ISD = TLI->InstructionOpcodeToISD(Opcode);
181*9880d681SAndroid Build Coastguard Worker assert(ISD && "Invalid opcode");
182*9880d681SAndroid Build Coastguard Worker
183*9880d681SAndroid Build Coastguard Worker EVT SrcTy = TLI->getValueType(DL, Src);
184*9880d681SAndroid Build Coastguard Worker EVT DstTy = TLI->getValueType(DL, Dst);
185*9880d681SAndroid Build Coastguard Worker
186*9880d681SAndroid Build Coastguard Worker if (!SrcTy.isSimple() || !DstTy.isSimple())
187*9880d681SAndroid Build Coastguard Worker return BaseT::getCastInstrCost(Opcode, Dst, Src);
188*9880d681SAndroid Build Coastguard Worker
189*9880d681SAndroid Build Coastguard Worker static const TypeConversionCostTblEntry
190*9880d681SAndroid Build Coastguard Worker ConversionTbl[] = {
191*9880d681SAndroid Build Coastguard Worker { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
192*9880d681SAndroid Build Coastguard Worker { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
193*9880d681SAndroid Build Coastguard Worker { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
194*9880d681SAndroid Build Coastguard Worker { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
195*9880d681SAndroid Build Coastguard Worker
196*9880d681SAndroid Build Coastguard Worker // The number of shll instructions for the extension.
197*9880d681SAndroid Build Coastguard Worker { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
198*9880d681SAndroid Build Coastguard Worker { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
199*9880d681SAndroid Build Coastguard Worker { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
200*9880d681SAndroid Build Coastguard Worker { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
201*9880d681SAndroid Build Coastguard Worker { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
202*9880d681SAndroid Build Coastguard Worker { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
203*9880d681SAndroid Build Coastguard Worker { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
204*9880d681SAndroid Build Coastguard Worker { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
205*9880d681SAndroid Build Coastguard Worker { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
206*9880d681SAndroid Build Coastguard Worker { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
207*9880d681SAndroid Build Coastguard Worker { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
208*9880d681SAndroid Build Coastguard Worker { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
209*9880d681SAndroid Build Coastguard Worker { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
210*9880d681SAndroid Build Coastguard Worker { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
211*9880d681SAndroid Build Coastguard Worker { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
212*9880d681SAndroid Build Coastguard Worker { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
213*9880d681SAndroid Build Coastguard Worker
214*9880d681SAndroid Build Coastguard Worker // LowerVectorINT_TO_FP:
215*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
216*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
217*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
218*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
219*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
220*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
221*9880d681SAndroid Build Coastguard Worker
222*9880d681SAndroid Build Coastguard Worker // Complex: to v2f32
223*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
224*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
225*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
226*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
227*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
228*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Worker // Complex: to v4f32
231*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
232*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
233*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
234*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
235*9880d681SAndroid Build Coastguard Worker
236*9880d681SAndroid Build Coastguard Worker // Complex: to v8f32
237*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
238*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
239*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
240*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
241*9880d681SAndroid Build Coastguard Worker
242*9880d681SAndroid Build Coastguard Worker // Complex: to v16f32
243*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
244*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
245*9880d681SAndroid Build Coastguard Worker
246*9880d681SAndroid Build Coastguard Worker // Complex: to v2f64
247*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
248*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
249*9880d681SAndroid Build Coastguard Worker { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
250*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
251*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
252*9880d681SAndroid Build Coastguard Worker { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
253*9880d681SAndroid Build Coastguard Worker
254*9880d681SAndroid Build Coastguard Worker
255*9880d681SAndroid Build Coastguard Worker // LowerVectorFP_TO_INT
256*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
257*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
258*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
259*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
260*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
261*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Worker // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
264*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
265*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
266*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
267*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
268*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
269*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
270*9880d681SAndroid Build Coastguard Worker
271*9880d681SAndroid Build Coastguard Worker // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
272*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
273*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
274*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
275*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
276*9880d681SAndroid Build Coastguard Worker
277*9880d681SAndroid Build Coastguard Worker // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
278*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
279*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
280*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
281*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
282*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
283*9880d681SAndroid Build Coastguard Worker { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
284*9880d681SAndroid Build Coastguard Worker };
285*9880d681SAndroid Build Coastguard Worker
286*9880d681SAndroid Build Coastguard Worker if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
287*9880d681SAndroid Build Coastguard Worker DstTy.getSimpleVT(),
288*9880d681SAndroid Build Coastguard Worker SrcTy.getSimpleVT()))
289*9880d681SAndroid Build Coastguard Worker return Entry->Cost;
290*9880d681SAndroid Build Coastguard Worker
291*9880d681SAndroid Build Coastguard Worker return BaseT::getCastInstrCost(Opcode, Dst, Src);
292*9880d681SAndroid Build Coastguard Worker }
293*9880d681SAndroid Build Coastguard Worker
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)294*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
295*9880d681SAndroid Build Coastguard Worker VectorType *VecTy,
296*9880d681SAndroid Build Coastguard Worker unsigned Index) {
297*9880d681SAndroid Build Coastguard Worker
298*9880d681SAndroid Build Coastguard Worker // Make sure we were given a valid extend opcode.
299*9880d681SAndroid Build Coastguard Worker assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
300*9880d681SAndroid Build Coastguard Worker "Invalid opcode");
301*9880d681SAndroid Build Coastguard Worker
302*9880d681SAndroid Build Coastguard Worker // We are extending an element we extract from a vector, so the source type
303*9880d681SAndroid Build Coastguard Worker // of the extend is the element type of the vector.
304*9880d681SAndroid Build Coastguard Worker auto *Src = VecTy->getElementType();
305*9880d681SAndroid Build Coastguard Worker
306*9880d681SAndroid Build Coastguard Worker // Sign- and zero-extends are for integer types only.
307*9880d681SAndroid Build Coastguard Worker assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
308*9880d681SAndroid Build Coastguard Worker
309*9880d681SAndroid Build Coastguard Worker // Get the cost for the extract. We compute the cost (if any) for the extend
310*9880d681SAndroid Build Coastguard Worker // below.
311*9880d681SAndroid Build Coastguard Worker auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
312*9880d681SAndroid Build Coastguard Worker
313*9880d681SAndroid Build Coastguard Worker // Legalize the types.
314*9880d681SAndroid Build Coastguard Worker auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
315*9880d681SAndroid Build Coastguard Worker auto DstVT = TLI->getValueType(DL, Dst);
316*9880d681SAndroid Build Coastguard Worker auto SrcVT = TLI->getValueType(DL, Src);
317*9880d681SAndroid Build Coastguard Worker
318*9880d681SAndroid Build Coastguard Worker // If the resulting type is still a vector and the destination type is legal,
319*9880d681SAndroid Build Coastguard Worker // we may get the extension for free. If not, get the default cost for the
320*9880d681SAndroid Build Coastguard Worker // extend.
321*9880d681SAndroid Build Coastguard Worker if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
322*9880d681SAndroid Build Coastguard Worker return Cost + getCastInstrCost(Opcode, Dst, Src);
323*9880d681SAndroid Build Coastguard Worker
324*9880d681SAndroid Build Coastguard Worker // The destination type should be larger than the element type. If not, get
325*9880d681SAndroid Build Coastguard Worker // the default cost for the extend.
326*9880d681SAndroid Build Coastguard Worker if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
327*9880d681SAndroid Build Coastguard Worker return Cost + getCastInstrCost(Opcode, Dst, Src);
328*9880d681SAndroid Build Coastguard Worker
329*9880d681SAndroid Build Coastguard Worker switch (Opcode) {
330*9880d681SAndroid Build Coastguard Worker default:
331*9880d681SAndroid Build Coastguard Worker llvm_unreachable("Opcode should be either SExt or ZExt");
332*9880d681SAndroid Build Coastguard Worker
333*9880d681SAndroid Build Coastguard Worker // For sign-extends, we only need a smov, which performs the extension
334*9880d681SAndroid Build Coastguard Worker // automatically.
335*9880d681SAndroid Build Coastguard Worker case Instruction::SExt:
336*9880d681SAndroid Build Coastguard Worker return Cost;
337*9880d681SAndroid Build Coastguard Worker
338*9880d681SAndroid Build Coastguard Worker // For zero-extends, the extend is performed automatically by a umov unless
339*9880d681SAndroid Build Coastguard Worker // the destination type is i64 and the element type is i8 or i16.
340*9880d681SAndroid Build Coastguard Worker case Instruction::ZExt:
341*9880d681SAndroid Build Coastguard Worker if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
342*9880d681SAndroid Build Coastguard Worker return Cost;
343*9880d681SAndroid Build Coastguard Worker }
344*9880d681SAndroid Build Coastguard Worker
345*9880d681SAndroid Build Coastguard Worker // If we are unable to perform the extend for free, get the default cost.
346*9880d681SAndroid Build Coastguard Worker return Cost + getCastInstrCost(Opcode, Dst, Src);
347*9880d681SAndroid Build Coastguard Worker }
348*9880d681SAndroid Build Coastguard Worker
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)349*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
350*9880d681SAndroid Build Coastguard Worker unsigned Index) {
351*9880d681SAndroid Build Coastguard Worker assert(Val->isVectorTy() && "This must be a vector type");
352*9880d681SAndroid Build Coastguard Worker
353*9880d681SAndroid Build Coastguard Worker if (Index != -1U) {
354*9880d681SAndroid Build Coastguard Worker // Legalize the type.
355*9880d681SAndroid Build Coastguard Worker std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
356*9880d681SAndroid Build Coastguard Worker
357*9880d681SAndroid Build Coastguard Worker // This type is legalized to a scalar type.
358*9880d681SAndroid Build Coastguard Worker if (!LT.second.isVector())
359*9880d681SAndroid Build Coastguard Worker return 0;
360*9880d681SAndroid Build Coastguard Worker
361*9880d681SAndroid Build Coastguard Worker // The type may be split. Normalize the index to the new type.
362*9880d681SAndroid Build Coastguard Worker unsigned Width = LT.second.getVectorNumElements();
363*9880d681SAndroid Build Coastguard Worker Index = Index % Width;
364*9880d681SAndroid Build Coastguard Worker
365*9880d681SAndroid Build Coastguard Worker // The element at index zero is already inside the vector.
366*9880d681SAndroid Build Coastguard Worker if (Index == 0)
367*9880d681SAndroid Build Coastguard Worker return 0;
368*9880d681SAndroid Build Coastguard Worker }
369*9880d681SAndroid Build Coastguard Worker
370*9880d681SAndroid Build Coastguard Worker // All other insert/extracts cost this much.
371*9880d681SAndroid Build Coastguard Worker return ST->getVectorInsertExtractBaseCost();
372*9880d681SAndroid Build Coastguard Worker }
373*9880d681SAndroid Build Coastguard Worker
getArithmeticInstrCost(unsigned Opcode,Type * Ty,TTI::OperandValueKind Opd1Info,TTI::OperandValueKind Opd2Info,TTI::OperandValueProperties Opd1PropInfo,TTI::OperandValueProperties Opd2PropInfo)374*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getArithmeticInstrCost(
375*9880d681SAndroid Build Coastguard Worker unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
376*9880d681SAndroid Build Coastguard Worker TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
377*9880d681SAndroid Build Coastguard Worker TTI::OperandValueProperties Opd2PropInfo) {
378*9880d681SAndroid Build Coastguard Worker // Legalize the type.
379*9880d681SAndroid Build Coastguard Worker std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
380*9880d681SAndroid Build Coastguard Worker
381*9880d681SAndroid Build Coastguard Worker int ISD = TLI->InstructionOpcodeToISD(Opcode);
382*9880d681SAndroid Build Coastguard Worker
383*9880d681SAndroid Build Coastguard Worker if (ISD == ISD::SDIV &&
384*9880d681SAndroid Build Coastguard Worker Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
385*9880d681SAndroid Build Coastguard Worker Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
386*9880d681SAndroid Build Coastguard Worker // On AArch64, scalar signed division by constants power-of-two are
387*9880d681SAndroid Build Coastguard Worker // normally expanded to the sequence ADD + CMP + SELECT + SRA.
388*9880d681SAndroid Build Coastguard Worker // The OperandValue properties many not be same as that of previous
389*9880d681SAndroid Build Coastguard Worker // operation; conservatively assume OP_None.
390*9880d681SAndroid Build Coastguard Worker int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
391*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::OP_None,
392*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::OP_None);
393*9880d681SAndroid Build Coastguard Worker Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
394*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::OP_None,
395*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::OP_None);
396*9880d681SAndroid Build Coastguard Worker Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
397*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::OP_None,
398*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::OP_None);
399*9880d681SAndroid Build Coastguard Worker Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
400*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::OP_None,
401*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::OP_None);
402*9880d681SAndroid Build Coastguard Worker return Cost;
403*9880d681SAndroid Build Coastguard Worker }
404*9880d681SAndroid Build Coastguard Worker
405*9880d681SAndroid Build Coastguard Worker switch (ISD) {
406*9880d681SAndroid Build Coastguard Worker default:
407*9880d681SAndroid Build Coastguard Worker return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
408*9880d681SAndroid Build Coastguard Worker Opd1PropInfo, Opd2PropInfo);
409*9880d681SAndroid Build Coastguard Worker case ISD::ADD:
410*9880d681SAndroid Build Coastguard Worker case ISD::MUL:
411*9880d681SAndroid Build Coastguard Worker case ISD::XOR:
412*9880d681SAndroid Build Coastguard Worker case ISD::OR:
413*9880d681SAndroid Build Coastguard Worker case ISD::AND:
414*9880d681SAndroid Build Coastguard Worker // These nodes are marked as 'custom' for combining purposes only.
415*9880d681SAndroid Build Coastguard Worker // We know that they are legal. See LowerAdd in ISelLowering.
416*9880d681SAndroid Build Coastguard Worker return 1 * LT.first;
417*9880d681SAndroid Build Coastguard Worker }
418*9880d681SAndroid Build Coastguard Worker }
419*9880d681SAndroid Build Coastguard Worker
getAddressComputationCost(Type * Ty,bool IsComplex)420*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
421*9880d681SAndroid Build Coastguard Worker // Address computations in vectorized code with non-consecutive addresses will
422*9880d681SAndroid Build Coastguard Worker // likely result in more instructions compared to scalar code where the
423*9880d681SAndroid Build Coastguard Worker // computation can more often be merged into the index mode. The resulting
424*9880d681SAndroid Build Coastguard Worker // extra micro-ops can significantly decrease throughput.
425*9880d681SAndroid Build Coastguard Worker unsigned NumVectorInstToHideOverhead = 10;
426*9880d681SAndroid Build Coastguard Worker
427*9880d681SAndroid Build Coastguard Worker if (Ty->isVectorTy() && IsComplex)
428*9880d681SAndroid Build Coastguard Worker return NumVectorInstToHideOverhead;
429*9880d681SAndroid Build Coastguard Worker
430*9880d681SAndroid Build Coastguard Worker // In many cases the address computation is not merged into the instruction
431*9880d681SAndroid Build Coastguard Worker // addressing mode.
432*9880d681SAndroid Build Coastguard Worker return 1;
433*9880d681SAndroid Build Coastguard Worker }
434*9880d681SAndroid Build Coastguard Worker
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy)435*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
436*9880d681SAndroid Build Coastguard Worker Type *CondTy) {
437*9880d681SAndroid Build Coastguard Worker
438*9880d681SAndroid Build Coastguard Worker int ISD = TLI->InstructionOpcodeToISD(Opcode);
439*9880d681SAndroid Build Coastguard Worker // We don't lower some vector selects well that are wider than the register
440*9880d681SAndroid Build Coastguard Worker // width.
441*9880d681SAndroid Build Coastguard Worker if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
442*9880d681SAndroid Build Coastguard Worker // We would need this many instructions to hide the scalarization happening.
443*9880d681SAndroid Build Coastguard Worker const int AmortizationCost = 20;
444*9880d681SAndroid Build Coastguard Worker static const TypeConversionCostTblEntry
445*9880d681SAndroid Build Coastguard Worker VectorSelectTbl[] = {
446*9880d681SAndroid Build Coastguard Worker { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
447*9880d681SAndroid Build Coastguard Worker { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
448*9880d681SAndroid Build Coastguard Worker { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
449*9880d681SAndroid Build Coastguard Worker { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
450*9880d681SAndroid Build Coastguard Worker { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
451*9880d681SAndroid Build Coastguard Worker { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
452*9880d681SAndroid Build Coastguard Worker };
453*9880d681SAndroid Build Coastguard Worker
454*9880d681SAndroid Build Coastguard Worker EVT SelCondTy = TLI->getValueType(DL, CondTy);
455*9880d681SAndroid Build Coastguard Worker EVT SelValTy = TLI->getValueType(DL, ValTy);
456*9880d681SAndroid Build Coastguard Worker if (SelCondTy.isSimple() && SelValTy.isSimple()) {
457*9880d681SAndroid Build Coastguard Worker if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
458*9880d681SAndroid Build Coastguard Worker SelCondTy.getSimpleVT(),
459*9880d681SAndroid Build Coastguard Worker SelValTy.getSimpleVT()))
460*9880d681SAndroid Build Coastguard Worker return Entry->Cost;
461*9880d681SAndroid Build Coastguard Worker }
462*9880d681SAndroid Build Coastguard Worker }
463*9880d681SAndroid Build Coastguard Worker return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
464*9880d681SAndroid Build Coastguard Worker }
465*9880d681SAndroid Build Coastguard Worker
getMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)466*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
467*9880d681SAndroid Build Coastguard Worker unsigned Alignment, unsigned AddressSpace) {
468*9880d681SAndroid Build Coastguard Worker std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
469*9880d681SAndroid Build Coastguard Worker
470*9880d681SAndroid Build Coastguard Worker if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
471*9880d681SAndroid Build Coastguard Worker Src->getVectorElementType()->isIntegerTy(64)) {
472*9880d681SAndroid Build Coastguard Worker // Unaligned stores are extremely inefficient. We don't split
473*9880d681SAndroid Build Coastguard Worker // unaligned v2i64 stores because the negative impact that has shown in
474*9880d681SAndroid Build Coastguard Worker // practice on inlined memcpy code.
475*9880d681SAndroid Build Coastguard Worker // We make v2i64 stores expensive so that we will only vectorize if there
476*9880d681SAndroid Build Coastguard Worker // are 6 other instructions getting vectorized.
477*9880d681SAndroid Build Coastguard Worker int AmortizationCost = 6;
478*9880d681SAndroid Build Coastguard Worker
479*9880d681SAndroid Build Coastguard Worker return LT.first * 2 * AmortizationCost;
480*9880d681SAndroid Build Coastguard Worker }
481*9880d681SAndroid Build Coastguard Worker
482*9880d681SAndroid Build Coastguard Worker if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
483*9880d681SAndroid Build Coastguard Worker Src->getVectorNumElements() < 8) {
484*9880d681SAndroid Build Coastguard Worker // We scalarize the loads/stores because there is not v.4b register and we
485*9880d681SAndroid Build Coastguard Worker // have to promote the elements to v.4h.
486*9880d681SAndroid Build Coastguard Worker unsigned NumVecElts = Src->getVectorNumElements();
487*9880d681SAndroid Build Coastguard Worker unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
488*9880d681SAndroid Build Coastguard Worker // We generate 2 instructions per vector element.
489*9880d681SAndroid Build Coastguard Worker return NumVectorizableInstsToAmortize * NumVecElts * 2;
490*9880d681SAndroid Build Coastguard Worker }
491*9880d681SAndroid Build Coastguard Worker
492*9880d681SAndroid Build Coastguard Worker return LT.first;
493*9880d681SAndroid Build Coastguard Worker }
494*9880d681SAndroid Build Coastguard Worker
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,unsigned Alignment,unsigned AddressSpace)495*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
496*9880d681SAndroid Build Coastguard Worker unsigned Factor,
497*9880d681SAndroid Build Coastguard Worker ArrayRef<unsigned> Indices,
498*9880d681SAndroid Build Coastguard Worker unsigned Alignment,
499*9880d681SAndroid Build Coastguard Worker unsigned AddressSpace) {
500*9880d681SAndroid Build Coastguard Worker assert(Factor >= 2 && "Invalid interleave factor");
501*9880d681SAndroid Build Coastguard Worker assert(isa<VectorType>(VecTy) && "Expect a vector type");
502*9880d681SAndroid Build Coastguard Worker
503*9880d681SAndroid Build Coastguard Worker if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
504*9880d681SAndroid Build Coastguard Worker unsigned NumElts = VecTy->getVectorNumElements();
505*9880d681SAndroid Build Coastguard Worker Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
506*9880d681SAndroid Build Coastguard Worker unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
507*9880d681SAndroid Build Coastguard Worker
508*9880d681SAndroid Build Coastguard Worker // ldN/stN only support legal vector types of size 64 or 128 in bits.
509*9880d681SAndroid Build Coastguard Worker if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
510*9880d681SAndroid Build Coastguard Worker return Factor;
511*9880d681SAndroid Build Coastguard Worker }
512*9880d681SAndroid Build Coastguard Worker
513*9880d681SAndroid Build Coastguard Worker return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
514*9880d681SAndroid Build Coastguard Worker Alignment, AddressSpace);
515*9880d681SAndroid Build Coastguard Worker }
516*9880d681SAndroid Build Coastguard Worker
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)517*9880d681SAndroid Build Coastguard Worker int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
518*9880d681SAndroid Build Coastguard Worker int Cost = 0;
519*9880d681SAndroid Build Coastguard Worker for (auto *I : Tys) {
520*9880d681SAndroid Build Coastguard Worker if (!I->isVectorTy())
521*9880d681SAndroid Build Coastguard Worker continue;
522*9880d681SAndroid Build Coastguard Worker if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
523*9880d681SAndroid Build Coastguard Worker Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
524*9880d681SAndroid Build Coastguard Worker getMemoryOpCost(Instruction::Load, I, 128, 0);
525*9880d681SAndroid Build Coastguard Worker }
526*9880d681SAndroid Build Coastguard Worker return Cost;
527*9880d681SAndroid Build Coastguard Worker }
528*9880d681SAndroid Build Coastguard Worker
getMaxInterleaveFactor(unsigned VF)529*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
530*9880d681SAndroid Build Coastguard Worker return ST->getMaxInterleaveFactor();
531*9880d681SAndroid Build Coastguard Worker }
532*9880d681SAndroid Build Coastguard Worker
getUnrollingPreferences(Loop * L,TTI::UnrollingPreferences & UP)533*9880d681SAndroid Build Coastguard Worker void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
534*9880d681SAndroid Build Coastguard Worker TTI::UnrollingPreferences &UP) {
535*9880d681SAndroid Build Coastguard Worker // Enable partial unrolling and runtime unrolling.
536*9880d681SAndroid Build Coastguard Worker BaseT::getUnrollingPreferences(L, UP);
537*9880d681SAndroid Build Coastguard Worker
538*9880d681SAndroid Build Coastguard Worker // For inner loop, it is more likely to be a hot one, and the runtime check
539*9880d681SAndroid Build Coastguard Worker // can be promoted out from LICM pass, so the overhead is less, let's try
540*9880d681SAndroid Build Coastguard Worker // a larger threshold to unroll more loops.
541*9880d681SAndroid Build Coastguard Worker if (L->getLoopDepth() > 1)
542*9880d681SAndroid Build Coastguard Worker UP.PartialThreshold *= 2;
543*9880d681SAndroid Build Coastguard Worker
544*9880d681SAndroid Build Coastguard Worker // Disable partial & runtime unrolling on -Os.
545*9880d681SAndroid Build Coastguard Worker UP.PartialOptSizeThreshold = 0;
546*9880d681SAndroid Build Coastguard Worker }
547*9880d681SAndroid Build Coastguard Worker
getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)548*9880d681SAndroid Build Coastguard Worker Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
549*9880d681SAndroid Build Coastguard Worker Type *ExpectedType) {
550*9880d681SAndroid Build Coastguard Worker switch (Inst->getIntrinsicID()) {
551*9880d681SAndroid Build Coastguard Worker default:
552*9880d681SAndroid Build Coastguard Worker return nullptr;
553*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_st2:
554*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_st3:
555*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_st4: {
556*9880d681SAndroid Build Coastguard Worker // Create a struct type
557*9880d681SAndroid Build Coastguard Worker StructType *ST = dyn_cast<StructType>(ExpectedType);
558*9880d681SAndroid Build Coastguard Worker if (!ST)
559*9880d681SAndroid Build Coastguard Worker return nullptr;
560*9880d681SAndroid Build Coastguard Worker unsigned NumElts = Inst->getNumArgOperands() - 1;
561*9880d681SAndroid Build Coastguard Worker if (ST->getNumElements() != NumElts)
562*9880d681SAndroid Build Coastguard Worker return nullptr;
563*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0, e = NumElts; i != e; ++i) {
564*9880d681SAndroid Build Coastguard Worker if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
565*9880d681SAndroid Build Coastguard Worker return nullptr;
566*9880d681SAndroid Build Coastguard Worker }
567*9880d681SAndroid Build Coastguard Worker Value *Res = UndefValue::get(ExpectedType);
568*9880d681SAndroid Build Coastguard Worker IRBuilder<> Builder(Inst);
569*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0, e = NumElts; i != e; ++i) {
570*9880d681SAndroid Build Coastguard Worker Value *L = Inst->getArgOperand(i);
571*9880d681SAndroid Build Coastguard Worker Res = Builder.CreateInsertValue(Res, L, i);
572*9880d681SAndroid Build Coastguard Worker }
573*9880d681SAndroid Build Coastguard Worker return Res;
574*9880d681SAndroid Build Coastguard Worker }
575*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_ld2:
576*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_ld3:
577*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_ld4:
578*9880d681SAndroid Build Coastguard Worker if (Inst->getType() == ExpectedType)
579*9880d681SAndroid Build Coastguard Worker return Inst;
580*9880d681SAndroid Build Coastguard Worker return nullptr;
581*9880d681SAndroid Build Coastguard Worker }
582*9880d681SAndroid Build Coastguard Worker }
583*9880d681SAndroid Build Coastguard Worker
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)584*9880d681SAndroid Build Coastguard Worker bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
585*9880d681SAndroid Build Coastguard Worker MemIntrinsicInfo &Info) {
586*9880d681SAndroid Build Coastguard Worker switch (Inst->getIntrinsicID()) {
587*9880d681SAndroid Build Coastguard Worker default:
588*9880d681SAndroid Build Coastguard Worker break;
589*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_ld2:
590*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_ld3:
591*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_ld4:
592*9880d681SAndroid Build Coastguard Worker Info.ReadMem = true;
593*9880d681SAndroid Build Coastguard Worker Info.WriteMem = false;
594*9880d681SAndroid Build Coastguard Worker Info.IsSimple = true;
595*9880d681SAndroid Build Coastguard Worker Info.NumMemRefs = 1;
596*9880d681SAndroid Build Coastguard Worker Info.PtrVal = Inst->getArgOperand(0);
597*9880d681SAndroid Build Coastguard Worker break;
598*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_st2:
599*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_st3:
600*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_st4:
601*9880d681SAndroid Build Coastguard Worker Info.ReadMem = false;
602*9880d681SAndroid Build Coastguard Worker Info.WriteMem = true;
603*9880d681SAndroid Build Coastguard Worker Info.IsSimple = true;
604*9880d681SAndroid Build Coastguard Worker Info.NumMemRefs = 1;
605*9880d681SAndroid Build Coastguard Worker Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
606*9880d681SAndroid Build Coastguard Worker break;
607*9880d681SAndroid Build Coastguard Worker }
608*9880d681SAndroid Build Coastguard Worker
609*9880d681SAndroid Build Coastguard Worker switch (Inst->getIntrinsicID()) {
610*9880d681SAndroid Build Coastguard Worker default:
611*9880d681SAndroid Build Coastguard Worker return false;
612*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_ld2:
613*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_st2:
614*9880d681SAndroid Build Coastguard Worker Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
615*9880d681SAndroid Build Coastguard Worker break;
616*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_ld3:
617*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_st3:
618*9880d681SAndroid Build Coastguard Worker Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
619*9880d681SAndroid Build Coastguard Worker break;
620*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_ld4:
621*9880d681SAndroid Build Coastguard Worker case Intrinsic::aarch64_neon_st4:
622*9880d681SAndroid Build Coastguard Worker Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
623*9880d681SAndroid Build Coastguard Worker break;
624*9880d681SAndroid Build Coastguard Worker }
625*9880d681SAndroid Build Coastguard Worker return true;
626*9880d681SAndroid Build Coastguard Worker }
627*9880d681SAndroid Build Coastguard Worker
getCacheLineSize()628*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getCacheLineSize() {
629*9880d681SAndroid Build Coastguard Worker return ST->getCacheLineSize();
630*9880d681SAndroid Build Coastguard Worker }
631*9880d681SAndroid Build Coastguard Worker
getPrefetchDistance()632*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getPrefetchDistance() {
633*9880d681SAndroid Build Coastguard Worker return ST->getPrefetchDistance();
634*9880d681SAndroid Build Coastguard Worker }
635*9880d681SAndroid Build Coastguard Worker
getMinPrefetchStride()636*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getMinPrefetchStride() {
637*9880d681SAndroid Build Coastguard Worker return ST->getMinPrefetchStride();
638*9880d681SAndroid Build Coastguard Worker }
639*9880d681SAndroid Build Coastguard Worker
getMaxPrefetchIterationsAhead()640*9880d681SAndroid Build Coastguard Worker unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
641*9880d681SAndroid Build Coastguard Worker return ST->getMaxPrefetchIterationsAhead();
642*9880d681SAndroid Build Coastguard Worker }
643