1*9880d681SAndroid Build Coastguard Worker //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker
10*9880d681SAndroid Build Coastguard Worker #include "PPCTargetTransformInfo.h"
11*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/TargetTransformInfo.h"
12*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/BasicTTIImpl.h"
13*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/CommandLine.h"
14*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
15*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/CostTable.h"
16*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetLowering.h"
17*9880d681SAndroid Build Coastguard Worker using namespace llvm;
18*9880d681SAndroid Build Coastguard Worker
19*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "ppctti"
20*9880d681SAndroid Build Coastguard Worker
21*9880d681SAndroid Build Coastguard Worker static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
22*9880d681SAndroid Build Coastguard Worker cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Worker // This is currently only used for the data prefetch pass which is only enabled
25*9880d681SAndroid Build Coastguard Worker // for BG/Q by default.
26*9880d681SAndroid Build Coastguard Worker static cl::opt<unsigned>
27*9880d681SAndroid Build Coastguard Worker CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
28*9880d681SAndroid Build Coastguard Worker cl::desc("The loop prefetch cache line size"));
29*9880d681SAndroid Build Coastguard Worker
30*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
31*9880d681SAndroid Build Coastguard Worker //
32*9880d681SAndroid Build Coastguard Worker // PPC cost model.
33*9880d681SAndroid Build Coastguard Worker //
34*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Worker TargetTransformInfo::PopcntSupportKind
getPopcntSupport(unsigned TyWidth)37*9880d681SAndroid Build Coastguard Worker PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
38*9880d681SAndroid Build Coastguard Worker assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
39*9880d681SAndroid Build Coastguard Worker if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)
40*9880d681SAndroid Build Coastguard Worker return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?
41*9880d681SAndroid Build Coastguard Worker TTI::PSK_SlowHardware : TTI::PSK_FastHardware;
42*9880d681SAndroid Build Coastguard Worker return TTI::PSK_Software;
43*9880d681SAndroid Build Coastguard Worker }
44*9880d681SAndroid Build Coastguard Worker
getIntImmCost(const APInt & Imm,Type * Ty)45*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
46*9880d681SAndroid Build Coastguard Worker if (DisablePPCConstHoist)
47*9880d681SAndroid Build Coastguard Worker return BaseT::getIntImmCost(Imm, Ty);
48*9880d681SAndroid Build Coastguard Worker
49*9880d681SAndroid Build Coastguard Worker assert(Ty->isIntegerTy());
50*9880d681SAndroid Build Coastguard Worker
51*9880d681SAndroid Build Coastguard Worker unsigned BitSize = Ty->getPrimitiveSizeInBits();
52*9880d681SAndroid Build Coastguard Worker if (BitSize == 0)
53*9880d681SAndroid Build Coastguard Worker return ~0U;
54*9880d681SAndroid Build Coastguard Worker
55*9880d681SAndroid Build Coastguard Worker if (Imm == 0)
56*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Worker if (Imm.getBitWidth() <= 64) {
59*9880d681SAndroid Build Coastguard Worker if (isInt<16>(Imm.getSExtValue()))
60*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Basic;
61*9880d681SAndroid Build Coastguard Worker
62*9880d681SAndroid Build Coastguard Worker if (isInt<32>(Imm.getSExtValue())) {
63*9880d681SAndroid Build Coastguard Worker // A constant that can be materialized using lis.
64*9880d681SAndroid Build Coastguard Worker if ((Imm.getZExtValue() & 0xFFFF) == 0)
65*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Basic;
66*9880d681SAndroid Build Coastguard Worker
67*9880d681SAndroid Build Coastguard Worker return 2 * TTI::TCC_Basic;
68*9880d681SAndroid Build Coastguard Worker }
69*9880d681SAndroid Build Coastguard Worker }
70*9880d681SAndroid Build Coastguard Worker
71*9880d681SAndroid Build Coastguard Worker return 4 * TTI::TCC_Basic;
72*9880d681SAndroid Build Coastguard Worker }
73*9880d681SAndroid Build Coastguard Worker
getIntImmCost(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)74*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
75*9880d681SAndroid Build Coastguard Worker Type *Ty) {
76*9880d681SAndroid Build Coastguard Worker if (DisablePPCConstHoist)
77*9880d681SAndroid Build Coastguard Worker return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
78*9880d681SAndroid Build Coastguard Worker
79*9880d681SAndroid Build Coastguard Worker assert(Ty->isIntegerTy());
80*9880d681SAndroid Build Coastguard Worker
81*9880d681SAndroid Build Coastguard Worker unsigned BitSize = Ty->getPrimitiveSizeInBits();
82*9880d681SAndroid Build Coastguard Worker if (BitSize == 0)
83*9880d681SAndroid Build Coastguard Worker return ~0U;
84*9880d681SAndroid Build Coastguard Worker
85*9880d681SAndroid Build Coastguard Worker switch (IID) {
86*9880d681SAndroid Build Coastguard Worker default:
87*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
88*9880d681SAndroid Build Coastguard Worker case Intrinsic::sadd_with_overflow:
89*9880d681SAndroid Build Coastguard Worker case Intrinsic::uadd_with_overflow:
90*9880d681SAndroid Build Coastguard Worker case Intrinsic::ssub_with_overflow:
91*9880d681SAndroid Build Coastguard Worker case Intrinsic::usub_with_overflow:
92*9880d681SAndroid Build Coastguard Worker if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
93*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
94*9880d681SAndroid Build Coastguard Worker break;
95*9880d681SAndroid Build Coastguard Worker case Intrinsic::experimental_stackmap:
96*9880d681SAndroid Build Coastguard Worker if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
97*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
98*9880d681SAndroid Build Coastguard Worker break;
99*9880d681SAndroid Build Coastguard Worker case Intrinsic::experimental_patchpoint_void:
100*9880d681SAndroid Build Coastguard Worker case Intrinsic::experimental_patchpoint_i64:
101*9880d681SAndroid Build Coastguard Worker if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
102*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
103*9880d681SAndroid Build Coastguard Worker break;
104*9880d681SAndroid Build Coastguard Worker }
105*9880d681SAndroid Build Coastguard Worker return PPCTTIImpl::getIntImmCost(Imm, Ty);
106*9880d681SAndroid Build Coastguard Worker }
107*9880d681SAndroid Build Coastguard Worker
getIntImmCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)108*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
109*9880d681SAndroid Build Coastguard Worker Type *Ty) {
110*9880d681SAndroid Build Coastguard Worker if (DisablePPCConstHoist)
111*9880d681SAndroid Build Coastguard Worker return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
112*9880d681SAndroid Build Coastguard Worker
113*9880d681SAndroid Build Coastguard Worker assert(Ty->isIntegerTy());
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Worker unsigned BitSize = Ty->getPrimitiveSizeInBits();
116*9880d681SAndroid Build Coastguard Worker if (BitSize == 0)
117*9880d681SAndroid Build Coastguard Worker return ~0U;
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Worker unsigned ImmIdx = ~0U;
120*9880d681SAndroid Build Coastguard Worker bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
121*9880d681SAndroid Build Coastguard Worker ZeroFree = false;
122*9880d681SAndroid Build Coastguard Worker switch (Opcode) {
123*9880d681SAndroid Build Coastguard Worker default:
124*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
125*9880d681SAndroid Build Coastguard Worker case Instruction::GetElementPtr:
126*9880d681SAndroid Build Coastguard Worker // Always hoist the base address of a GetElementPtr. This prevents the
127*9880d681SAndroid Build Coastguard Worker // creation of new constants for every base constant that gets constant
128*9880d681SAndroid Build Coastguard Worker // folded with the offset.
129*9880d681SAndroid Build Coastguard Worker if (Idx == 0)
130*9880d681SAndroid Build Coastguard Worker return 2 * TTI::TCC_Basic;
131*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
132*9880d681SAndroid Build Coastguard Worker case Instruction::And:
133*9880d681SAndroid Build Coastguard Worker RunFree = true; // (for the rotate-and-mask instructions)
134*9880d681SAndroid Build Coastguard Worker // Fallthrough...
135*9880d681SAndroid Build Coastguard Worker case Instruction::Add:
136*9880d681SAndroid Build Coastguard Worker case Instruction::Or:
137*9880d681SAndroid Build Coastguard Worker case Instruction::Xor:
138*9880d681SAndroid Build Coastguard Worker ShiftedFree = true;
139*9880d681SAndroid Build Coastguard Worker // Fallthrough...
140*9880d681SAndroid Build Coastguard Worker case Instruction::Sub:
141*9880d681SAndroid Build Coastguard Worker case Instruction::Mul:
142*9880d681SAndroid Build Coastguard Worker case Instruction::Shl:
143*9880d681SAndroid Build Coastguard Worker case Instruction::LShr:
144*9880d681SAndroid Build Coastguard Worker case Instruction::AShr:
145*9880d681SAndroid Build Coastguard Worker ImmIdx = 1;
146*9880d681SAndroid Build Coastguard Worker break;
147*9880d681SAndroid Build Coastguard Worker case Instruction::ICmp:
148*9880d681SAndroid Build Coastguard Worker UnsignedFree = true;
149*9880d681SAndroid Build Coastguard Worker ImmIdx = 1;
150*9880d681SAndroid Build Coastguard Worker // Fallthrough... (zero comparisons can use record-form instructions)
151*9880d681SAndroid Build Coastguard Worker case Instruction::Select:
152*9880d681SAndroid Build Coastguard Worker ZeroFree = true;
153*9880d681SAndroid Build Coastguard Worker break;
154*9880d681SAndroid Build Coastguard Worker case Instruction::PHI:
155*9880d681SAndroid Build Coastguard Worker case Instruction::Call:
156*9880d681SAndroid Build Coastguard Worker case Instruction::Ret:
157*9880d681SAndroid Build Coastguard Worker case Instruction::Load:
158*9880d681SAndroid Build Coastguard Worker case Instruction::Store:
159*9880d681SAndroid Build Coastguard Worker break;
160*9880d681SAndroid Build Coastguard Worker }
161*9880d681SAndroid Build Coastguard Worker
162*9880d681SAndroid Build Coastguard Worker if (ZeroFree && Imm == 0)
163*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Worker if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
166*9880d681SAndroid Build Coastguard Worker if (isInt<16>(Imm.getSExtValue()))
167*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
168*9880d681SAndroid Build Coastguard Worker
169*9880d681SAndroid Build Coastguard Worker if (RunFree) {
170*9880d681SAndroid Build Coastguard Worker if (Imm.getBitWidth() <= 32 &&
171*9880d681SAndroid Build Coastguard Worker (isShiftedMask_32(Imm.getZExtValue()) ||
172*9880d681SAndroid Build Coastguard Worker isShiftedMask_32(~Imm.getZExtValue())))
173*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
174*9880d681SAndroid Build Coastguard Worker
175*9880d681SAndroid Build Coastguard Worker if (ST->isPPC64() &&
176*9880d681SAndroid Build Coastguard Worker (isShiftedMask_64(Imm.getZExtValue()) ||
177*9880d681SAndroid Build Coastguard Worker isShiftedMask_64(~Imm.getZExtValue())))
178*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
179*9880d681SAndroid Build Coastguard Worker }
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Worker if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
182*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
183*9880d681SAndroid Build Coastguard Worker
184*9880d681SAndroid Build Coastguard Worker if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
185*9880d681SAndroid Build Coastguard Worker return TTI::TCC_Free;
186*9880d681SAndroid Build Coastguard Worker }
187*9880d681SAndroid Build Coastguard Worker
188*9880d681SAndroid Build Coastguard Worker return PPCTTIImpl::getIntImmCost(Imm, Ty);
189*9880d681SAndroid Build Coastguard Worker }
190*9880d681SAndroid Build Coastguard Worker
getUnrollingPreferences(Loop * L,TTI::UnrollingPreferences & UP)191*9880d681SAndroid Build Coastguard Worker void PPCTTIImpl::getUnrollingPreferences(Loop *L,
192*9880d681SAndroid Build Coastguard Worker TTI::UnrollingPreferences &UP) {
193*9880d681SAndroid Build Coastguard Worker if (ST->getDarwinDirective() == PPC::DIR_A2) {
194*9880d681SAndroid Build Coastguard Worker // The A2 is in-order with a deep pipeline, and concatenation unrolling
195*9880d681SAndroid Build Coastguard Worker // helps expose latency-hiding opportunities to the instruction scheduler.
196*9880d681SAndroid Build Coastguard Worker UP.Partial = UP.Runtime = true;
197*9880d681SAndroid Build Coastguard Worker
198*9880d681SAndroid Build Coastguard Worker // We unroll a lot on the A2 (hundreds of instructions), and the benefits
199*9880d681SAndroid Build Coastguard Worker // often outweigh the cost of a division to compute the trip count.
200*9880d681SAndroid Build Coastguard Worker UP.AllowExpensiveTripCount = true;
201*9880d681SAndroid Build Coastguard Worker }
202*9880d681SAndroid Build Coastguard Worker
203*9880d681SAndroid Build Coastguard Worker BaseT::getUnrollingPreferences(L, UP);
204*9880d681SAndroid Build Coastguard Worker }
205*9880d681SAndroid Build Coastguard Worker
enableAggressiveInterleaving(bool LoopHasReductions)206*9880d681SAndroid Build Coastguard Worker bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
207*9880d681SAndroid Build Coastguard Worker // On the A2, always unroll aggressively. For QPX unaligned loads, we depend
208*9880d681SAndroid Build Coastguard Worker // on combining the loads generated for consecutive accesses, and failure to
209*9880d681SAndroid Build Coastguard Worker // do so is particularly expensive. This makes it much more likely (compared
210*9880d681SAndroid Build Coastguard Worker // to only using concatenation unrolling).
211*9880d681SAndroid Build Coastguard Worker if (ST->getDarwinDirective() == PPC::DIR_A2)
212*9880d681SAndroid Build Coastguard Worker return true;
213*9880d681SAndroid Build Coastguard Worker
214*9880d681SAndroid Build Coastguard Worker return LoopHasReductions;
215*9880d681SAndroid Build Coastguard Worker }
216*9880d681SAndroid Build Coastguard Worker
enableInterleavedAccessVectorization()217*9880d681SAndroid Build Coastguard Worker bool PPCTTIImpl::enableInterleavedAccessVectorization() {
218*9880d681SAndroid Build Coastguard Worker return true;
219*9880d681SAndroid Build Coastguard Worker }
220*9880d681SAndroid Build Coastguard Worker
getNumberOfRegisters(bool Vector)221*9880d681SAndroid Build Coastguard Worker unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
222*9880d681SAndroid Build Coastguard Worker if (Vector && !ST->hasAltivec() && !ST->hasQPX())
223*9880d681SAndroid Build Coastguard Worker return 0;
224*9880d681SAndroid Build Coastguard Worker return ST->hasVSX() ? 64 : 32;
225*9880d681SAndroid Build Coastguard Worker }
226*9880d681SAndroid Build Coastguard Worker
getRegisterBitWidth(bool Vector)227*9880d681SAndroid Build Coastguard Worker unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
228*9880d681SAndroid Build Coastguard Worker if (Vector) {
229*9880d681SAndroid Build Coastguard Worker if (ST->hasQPX()) return 256;
230*9880d681SAndroid Build Coastguard Worker if (ST->hasAltivec()) return 128;
231*9880d681SAndroid Build Coastguard Worker return 0;
232*9880d681SAndroid Build Coastguard Worker }
233*9880d681SAndroid Build Coastguard Worker
234*9880d681SAndroid Build Coastguard Worker if (ST->isPPC64())
235*9880d681SAndroid Build Coastguard Worker return 64;
236*9880d681SAndroid Build Coastguard Worker return 32;
237*9880d681SAndroid Build Coastguard Worker
238*9880d681SAndroid Build Coastguard Worker }
239*9880d681SAndroid Build Coastguard Worker
getCacheLineSize()240*9880d681SAndroid Build Coastguard Worker unsigned PPCTTIImpl::getCacheLineSize() {
241*9880d681SAndroid Build Coastguard Worker // This is currently only used for the data prefetch pass which is only
242*9880d681SAndroid Build Coastguard Worker // enabled for BG/Q by default.
243*9880d681SAndroid Build Coastguard Worker return CacheLineSize;
244*9880d681SAndroid Build Coastguard Worker }
245*9880d681SAndroid Build Coastguard Worker
getPrefetchDistance()246*9880d681SAndroid Build Coastguard Worker unsigned PPCTTIImpl::getPrefetchDistance() {
247*9880d681SAndroid Build Coastguard Worker // This seems like a reasonable default for the BG/Q (this pass is enabled, by
248*9880d681SAndroid Build Coastguard Worker // default, only on the BG/Q).
249*9880d681SAndroid Build Coastguard Worker return 300;
250*9880d681SAndroid Build Coastguard Worker }
251*9880d681SAndroid Build Coastguard Worker
getMaxInterleaveFactor(unsigned VF)252*9880d681SAndroid Build Coastguard Worker unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
253*9880d681SAndroid Build Coastguard Worker unsigned Directive = ST->getDarwinDirective();
254*9880d681SAndroid Build Coastguard Worker // The 440 has no SIMD support, but floating-point instructions
255*9880d681SAndroid Build Coastguard Worker // have a 5-cycle latency, so unroll by 5x for latency hiding.
256*9880d681SAndroid Build Coastguard Worker if (Directive == PPC::DIR_440)
257*9880d681SAndroid Build Coastguard Worker return 5;
258*9880d681SAndroid Build Coastguard Worker
259*9880d681SAndroid Build Coastguard Worker // The A2 has no SIMD support, but floating-point instructions
260*9880d681SAndroid Build Coastguard Worker // have a 6-cycle latency, so unroll by 6x for latency hiding.
261*9880d681SAndroid Build Coastguard Worker if (Directive == PPC::DIR_A2)
262*9880d681SAndroid Build Coastguard Worker return 6;
263*9880d681SAndroid Build Coastguard Worker
264*9880d681SAndroid Build Coastguard Worker // FIXME: For lack of any better information, do no harm...
265*9880d681SAndroid Build Coastguard Worker if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
266*9880d681SAndroid Build Coastguard Worker return 1;
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Worker // For P7 and P8, floating-point instructions have a 6-cycle latency and
269*9880d681SAndroid Build Coastguard Worker // there are two execution units, so unroll by 12x for latency hiding.
270*9880d681SAndroid Build Coastguard Worker // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
271*9880d681SAndroid Build Coastguard Worker if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
272*9880d681SAndroid Build Coastguard Worker Directive == PPC::DIR_PWR9)
273*9880d681SAndroid Build Coastguard Worker return 12;
274*9880d681SAndroid Build Coastguard Worker
275*9880d681SAndroid Build Coastguard Worker // For most things, modern systems have two execution units (and
276*9880d681SAndroid Build Coastguard Worker // out-of-order execution).
277*9880d681SAndroid Build Coastguard Worker return 2;
278*9880d681SAndroid Build Coastguard Worker }
279*9880d681SAndroid Build Coastguard Worker
getArithmeticInstrCost(unsigned Opcode,Type * Ty,TTI::OperandValueKind Op1Info,TTI::OperandValueKind Op2Info,TTI::OperandValueProperties Opd1PropInfo,TTI::OperandValueProperties Opd2PropInfo)280*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getArithmeticInstrCost(
281*9880d681SAndroid Build Coastguard Worker unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
282*9880d681SAndroid Build Coastguard Worker TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
283*9880d681SAndroid Build Coastguard Worker TTI::OperandValueProperties Opd2PropInfo) {
284*9880d681SAndroid Build Coastguard Worker assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
285*9880d681SAndroid Build Coastguard Worker
286*9880d681SAndroid Build Coastguard Worker // Fallback to the default implementation.
287*9880d681SAndroid Build Coastguard Worker return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
288*9880d681SAndroid Build Coastguard Worker Opd1PropInfo, Opd2PropInfo);
289*9880d681SAndroid Build Coastguard Worker }
290*9880d681SAndroid Build Coastguard Worker
getShuffleCost(TTI::ShuffleKind Kind,Type * Tp,int Index,Type * SubTp)291*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
292*9880d681SAndroid Build Coastguard Worker Type *SubTp) {
293*9880d681SAndroid Build Coastguard Worker // Legalize the type.
294*9880d681SAndroid Build Coastguard Worker std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
295*9880d681SAndroid Build Coastguard Worker
296*9880d681SAndroid Build Coastguard Worker // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
297*9880d681SAndroid Build Coastguard Worker // (at least in the sense that there need only be one non-loop-invariant
298*9880d681SAndroid Build Coastguard Worker // instruction). We need one such shuffle instruction for each actual
299*9880d681SAndroid Build Coastguard Worker // register (this is not true for arbitrary shuffles, but is true for the
300*9880d681SAndroid Build Coastguard Worker // structured types of shuffles covered by TTI::ShuffleKind).
301*9880d681SAndroid Build Coastguard Worker return LT.first;
302*9880d681SAndroid Build Coastguard Worker }
303*9880d681SAndroid Build Coastguard Worker
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src)304*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
305*9880d681SAndroid Build Coastguard Worker assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
306*9880d681SAndroid Build Coastguard Worker
307*9880d681SAndroid Build Coastguard Worker return BaseT::getCastInstrCost(Opcode, Dst, Src);
308*9880d681SAndroid Build Coastguard Worker }
309*9880d681SAndroid Build Coastguard Worker
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy)310*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
311*9880d681SAndroid Build Coastguard Worker return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
312*9880d681SAndroid Build Coastguard Worker }
313*9880d681SAndroid Build Coastguard Worker
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)314*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
315*9880d681SAndroid Build Coastguard Worker assert(Val->isVectorTy() && "This must be a vector type");
316*9880d681SAndroid Build Coastguard Worker
317*9880d681SAndroid Build Coastguard Worker int ISD = TLI->InstructionOpcodeToISD(Opcode);
318*9880d681SAndroid Build Coastguard Worker assert(ISD && "Invalid opcode");
319*9880d681SAndroid Build Coastguard Worker
320*9880d681SAndroid Build Coastguard Worker if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
321*9880d681SAndroid Build Coastguard Worker // Double-precision scalars are already located in index #0.
322*9880d681SAndroid Build Coastguard Worker if (Index == 0)
323*9880d681SAndroid Build Coastguard Worker return 0;
324*9880d681SAndroid Build Coastguard Worker
325*9880d681SAndroid Build Coastguard Worker return BaseT::getVectorInstrCost(Opcode, Val, Index);
326*9880d681SAndroid Build Coastguard Worker } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
327*9880d681SAndroid Build Coastguard Worker // Floating point scalars are already located in index #0.
328*9880d681SAndroid Build Coastguard Worker if (Index == 0)
329*9880d681SAndroid Build Coastguard Worker return 0;
330*9880d681SAndroid Build Coastguard Worker
331*9880d681SAndroid Build Coastguard Worker return BaseT::getVectorInstrCost(Opcode, Val, Index);
332*9880d681SAndroid Build Coastguard Worker }
333*9880d681SAndroid Build Coastguard Worker
334*9880d681SAndroid Build Coastguard Worker // Estimated cost of a load-hit-store delay. This was obtained
335*9880d681SAndroid Build Coastguard Worker // experimentally as a minimum needed to prevent unprofitable
336*9880d681SAndroid Build Coastguard Worker // vectorization for the paq8p benchmark. It may need to be
337*9880d681SAndroid Build Coastguard Worker // raised further if other unprofitable cases remain.
338*9880d681SAndroid Build Coastguard Worker unsigned LHSPenalty = 2;
339*9880d681SAndroid Build Coastguard Worker if (ISD == ISD::INSERT_VECTOR_ELT)
340*9880d681SAndroid Build Coastguard Worker LHSPenalty += 7;
341*9880d681SAndroid Build Coastguard Worker
342*9880d681SAndroid Build Coastguard Worker // Vector element insert/extract with Altivec is very expensive,
343*9880d681SAndroid Build Coastguard Worker // because they require store and reload with the attendant
344*9880d681SAndroid Build Coastguard Worker // processor stall for load-hit-store. Until VSX is available,
345*9880d681SAndroid Build Coastguard Worker // these need to be estimated as very costly.
346*9880d681SAndroid Build Coastguard Worker if (ISD == ISD::EXTRACT_VECTOR_ELT ||
347*9880d681SAndroid Build Coastguard Worker ISD == ISD::INSERT_VECTOR_ELT)
348*9880d681SAndroid Build Coastguard Worker return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
349*9880d681SAndroid Build Coastguard Worker
350*9880d681SAndroid Build Coastguard Worker return BaseT::getVectorInstrCost(Opcode, Val, Index);
351*9880d681SAndroid Build Coastguard Worker }
352*9880d681SAndroid Build Coastguard Worker
getMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)353*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
354*9880d681SAndroid Build Coastguard Worker unsigned AddressSpace) {
355*9880d681SAndroid Build Coastguard Worker // Legalize the type.
356*9880d681SAndroid Build Coastguard Worker std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
357*9880d681SAndroid Build Coastguard Worker assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
358*9880d681SAndroid Build Coastguard Worker "Invalid Opcode");
359*9880d681SAndroid Build Coastguard Worker
360*9880d681SAndroid Build Coastguard Worker int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
361*9880d681SAndroid Build Coastguard Worker
362*9880d681SAndroid Build Coastguard Worker // Aligned loads and stores are easy.
363*9880d681SAndroid Build Coastguard Worker unsigned SrcBytes = LT.second.getStoreSize();
364*9880d681SAndroid Build Coastguard Worker if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
365*9880d681SAndroid Build Coastguard Worker return Cost;
366*9880d681SAndroid Build Coastguard Worker
367*9880d681SAndroid Build Coastguard Worker bool IsAltivecType = ST->hasAltivec() &&
368*9880d681SAndroid Build Coastguard Worker (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
369*9880d681SAndroid Build Coastguard Worker LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
370*9880d681SAndroid Build Coastguard Worker bool IsVSXType = ST->hasVSX() &&
371*9880d681SAndroid Build Coastguard Worker (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
372*9880d681SAndroid Build Coastguard Worker bool IsQPXType = ST->hasQPX() &&
373*9880d681SAndroid Build Coastguard Worker (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
374*9880d681SAndroid Build Coastguard Worker
375*9880d681SAndroid Build Coastguard Worker // If we can use the permutation-based load sequence, then this is also
376*9880d681SAndroid Build Coastguard Worker // relatively cheap (not counting loop-invariant instructions): one load plus
377*9880d681SAndroid Build Coastguard Worker // one permute (the last load in a series has extra cost, but we're
378*9880d681SAndroid Build Coastguard Worker // neglecting that here). Note that on the P7, we could do unaligned loads
379*9880d681SAndroid Build Coastguard Worker // for Altivec types using the VSX instructions, but that's more expensive
380*9880d681SAndroid Build Coastguard Worker // than using the permutation-based load sequence. On the P8, that's no
381*9880d681SAndroid Build Coastguard Worker // longer true.
382*9880d681SAndroid Build Coastguard Worker if (Opcode == Instruction::Load &&
383*9880d681SAndroid Build Coastguard Worker ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
384*9880d681SAndroid Build Coastguard Worker Alignment >= LT.second.getScalarType().getStoreSize())
385*9880d681SAndroid Build Coastguard Worker return Cost + LT.first; // Add the cost of the permutations.
386*9880d681SAndroid Build Coastguard Worker
387*9880d681SAndroid Build Coastguard Worker // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
388*9880d681SAndroid Build Coastguard Worker // P7, unaligned vector loads are more expensive than the permutation-based
389*9880d681SAndroid Build Coastguard Worker // load sequence, so that might be used instead, but regardless, the net cost
390*9880d681SAndroid Build Coastguard Worker // is about the same (not counting loop-invariant instructions).
391*9880d681SAndroid Build Coastguard Worker if (IsVSXType || (ST->hasVSX() && IsAltivecType))
392*9880d681SAndroid Build Coastguard Worker return Cost;
393*9880d681SAndroid Build Coastguard Worker
394*9880d681SAndroid Build Coastguard Worker // PPC in general does not support unaligned loads and stores. They'll need
395*9880d681SAndroid Build Coastguard Worker // to be decomposed based on the alignment factor.
396*9880d681SAndroid Build Coastguard Worker
397*9880d681SAndroid Build Coastguard Worker // Add the cost of each scalar load or store.
398*9880d681SAndroid Build Coastguard Worker Cost += LT.first*(SrcBytes/Alignment-1);
399*9880d681SAndroid Build Coastguard Worker
400*9880d681SAndroid Build Coastguard Worker // For a vector type, there is also scalarization overhead (only for
401*9880d681SAndroid Build Coastguard Worker // stores, loads are expanded using the vector-load + permutation sequence,
402*9880d681SAndroid Build Coastguard Worker // which is much less expensive).
403*9880d681SAndroid Build Coastguard Worker if (Src->isVectorTy() && Opcode == Instruction::Store)
404*9880d681SAndroid Build Coastguard Worker for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
405*9880d681SAndroid Build Coastguard Worker Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
406*9880d681SAndroid Build Coastguard Worker
407*9880d681SAndroid Build Coastguard Worker return Cost;
408*9880d681SAndroid Build Coastguard Worker }
409*9880d681SAndroid Build Coastguard Worker
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,unsigned Alignment,unsigned AddressSpace)410*9880d681SAndroid Build Coastguard Worker int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
411*9880d681SAndroid Build Coastguard Worker unsigned Factor,
412*9880d681SAndroid Build Coastguard Worker ArrayRef<unsigned> Indices,
413*9880d681SAndroid Build Coastguard Worker unsigned Alignment,
414*9880d681SAndroid Build Coastguard Worker unsigned AddressSpace) {
415*9880d681SAndroid Build Coastguard Worker assert(isa<VectorType>(VecTy) &&
416*9880d681SAndroid Build Coastguard Worker "Expect a vector type for interleaved memory op");
417*9880d681SAndroid Build Coastguard Worker
418*9880d681SAndroid Build Coastguard Worker // Legalize the type.
419*9880d681SAndroid Build Coastguard Worker std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
420*9880d681SAndroid Build Coastguard Worker
421*9880d681SAndroid Build Coastguard Worker // Firstly, the cost of load/store operation.
422*9880d681SAndroid Build Coastguard Worker int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
423*9880d681SAndroid Build Coastguard Worker
424*9880d681SAndroid Build Coastguard Worker // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
425*9880d681SAndroid Build Coastguard Worker // (at least in the sense that there need only be one non-loop-invariant
426*9880d681SAndroid Build Coastguard Worker // instruction). For each result vector, we need one shuffle per incoming
427*9880d681SAndroid Build Coastguard Worker // vector (except that the first shuffle can take two incoming vectors
428*9880d681SAndroid Build Coastguard Worker // because it does not need to take itself).
429*9880d681SAndroid Build Coastguard Worker Cost += Factor*(LT.first-1);
430*9880d681SAndroid Build Coastguard Worker
431*9880d681SAndroid Build Coastguard Worker return Cost;
432*9880d681SAndroid Build Coastguard Worker }
433*9880d681SAndroid Build Coastguard Worker
434