1 //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass converts selects to conditional jumps when profitable.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Analysis/BlockFrequencyInfo.h"
16 #include "llvm/Analysis/BranchProbabilityInfo.h"
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/Analysis/TargetTransformInfo.h"
21 #include "llvm/CodeGen/Passes.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetPassConfig.h"
24 #include "llvm/CodeGen/TargetSchedule.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/ProfDataUtils.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/ScaledNumber.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Transforms/Utils/SizeOpts.h"
37 #include <algorithm>
38 #include <memory>
39 #include <queue>
40 #include <stack>
41 #include <string>
42
43 using namespace llvm;
44
45 #define DEBUG_TYPE "select-optimize"
46
47 STATISTIC(NumSelectOptAnalyzed,
48 "Number of select groups considered for conversion to branch");
49 STATISTIC(NumSelectConvertedExpColdOperand,
50 "Number of select groups converted due to expensive cold operand");
51 STATISTIC(NumSelectConvertedHighPred,
52 "Number of select groups converted due to high-predictability");
53 STATISTIC(NumSelectUnPred,
54 "Number of select groups not converted due to unpredictability");
55 STATISTIC(NumSelectColdBB,
56 "Number of select groups not converted due to cold basic block");
57 STATISTIC(NumSelectConvertedLoop,
58 "Number of select groups converted due to loop-level analysis");
59 STATISTIC(NumSelectsConverted, "Number of selects converted");
60
61 static cl::opt<unsigned> ColdOperandThreshold(
62 "cold-operand-threshold",
63 cl::desc("Maximum frequency of path for an operand to be considered cold."),
64 cl::init(20), cl::Hidden);
65
66 static cl::opt<unsigned> ColdOperandMaxCostMultiplier(
67 "cold-operand-max-cost-multiplier",
68 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
69 "slice of a cold operand to be considered inexpensive."),
70 cl::init(1), cl::Hidden);
71
72 static cl::opt<unsigned>
73 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
74 cl::desc("Gradient gain threshold (%)."),
75 cl::init(25), cl::Hidden);
76
77 static cl::opt<unsigned>
78 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
79 cl::desc("Minimum gain per loop (in cycles) threshold."),
80 cl::init(4), cl::Hidden);
81
82 static cl::opt<unsigned> GainRelativeThreshold(
83 "select-opti-loop-relative-gain-threshold",
84 cl::desc(
85 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
86 cl::init(8), cl::Hidden);
87
88 static cl::opt<unsigned> MispredictDefaultRate(
89 "mispredict-default-rate", cl::Hidden, cl::init(25),
90 cl::desc("Default mispredict rate (initialized to 25%)."));
91
92 static cl::opt<bool>
93 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
94 cl::init(false),
95 cl::desc("Disable loop-level heuristics."));
96
97 namespace {
98
99 class SelectOptimize : public FunctionPass {
100 const TargetMachine *TM = nullptr;
101 const TargetSubtargetInfo *TSI;
102 const TargetLowering *TLI = nullptr;
103 const TargetTransformInfo *TTI = nullptr;
104 const LoopInfo *LI;
105 DominatorTree *DT;
106 std::unique_ptr<BlockFrequencyInfo> BFI;
107 std::unique_ptr<BranchProbabilityInfo> BPI;
108 ProfileSummaryInfo *PSI;
109 OptimizationRemarkEmitter *ORE;
110 TargetSchedModel TSchedModel;
111
112 public:
113 static char ID;
114
SelectOptimize()115 SelectOptimize() : FunctionPass(ID) {
116 initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
117 }
118
119 bool runOnFunction(Function &F) override;
120
getAnalysisUsage(AnalysisUsage & AU) const121 void getAnalysisUsage(AnalysisUsage &AU) const override {
122 AU.addRequired<ProfileSummaryInfoWrapperPass>();
123 AU.addRequired<TargetPassConfig>();
124 AU.addRequired<TargetTransformInfoWrapperPass>();
125 AU.addRequired<DominatorTreeWrapperPass>();
126 AU.addRequired<LoopInfoWrapperPass>();
127 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
128 }
129
130 private:
131 // Select groups consist of consecutive select instructions with the same
132 // condition.
133 using SelectGroup = SmallVector<SelectInst *, 2>;
134 using SelectGroups = SmallVector<SelectGroup, 2>;
135
136 using Scaled64 = ScaledNumber<uint64_t>;
137
138 struct CostInfo {
139 /// Predicated cost (with selects as conditional moves).
140 Scaled64 PredCost;
141 /// Non-predicated cost (with selects converted to branches).
142 Scaled64 NonPredCost;
143 };
144
145 // Converts select instructions of a function to conditional jumps when deemed
146 // profitable. Returns true if at least one select was converted.
147 bool optimizeSelects(Function &F);
148
149 // Heuristics for determining which select instructions can be profitably
150 // conveted to branches. Separate heuristics for selects in inner-most loops
151 // and the rest of code regions (base heuristics for non-inner-most loop
152 // regions).
153 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
154 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
155
156 // Converts to branches the select groups that were deemed
157 // profitable-to-convert.
158 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
159
160 // Splits selects of a given basic block into select groups.
161 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
162
163 // Determines for which select groups it is profitable converting to branches
164 // (base and inner-most-loop heuristics).
165 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
166 SelectGroups &ProfSIGroups);
167 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
168 SelectGroups &ProfSIGroups);
169
170 // Determines if a select group should be converted to a branch (base
171 // heuristics).
172 bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
173
174 // Returns true if there are expensive instructions in the cold value
175 // operand's (if any) dependence slice of any of the selects of the given
176 // group.
177 bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
178
179 // For a given source instruction, collect its backwards dependence slice
180 // consisting of instructions exclusively computed for producing the operands
181 // of the source instruction.
182 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
183 Instruction *SI, bool ForSinking = false);
184
185 // Returns true if the condition of the select is highly predictable.
186 bool isSelectHighlyPredictable(const SelectInst *SI);
187
188 // Loop-level checks to determine if a non-predicated version (with branches)
189 // of the given loop is more profitable than its predicated version.
190 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
191
192 // Computes instruction and loop-critical-path costs for both the predicated
193 // and non-predicated version of the given loop.
194 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
195 DenseMap<const Instruction *, CostInfo> &InstCostMap,
196 CostInfo *LoopCost);
197
198 // Returns a set of all the select instructions in the given select groups.
199 SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
200
201 // Returns the latency cost of a given instruction.
202 std::optional<uint64_t> computeInstCost(const Instruction *I);
203
204 // Returns the misprediction cost of a given select when converted to branch.
205 Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
206
207 // Returns the cost of a branch when the prediction is correct.
208 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
209 const SelectInst *SI);
210
211 // Returns true if the target architecture supports lowering a given select.
212 bool isSelectKindSupported(SelectInst *SI);
213 };
214 } // namespace
215
216 char SelectOptimize::ID = 0;
217
218 INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
219 false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)220 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
221 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
222 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
223 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
224 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
225 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
226 INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
227 false)
228
229 FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
230
runOnFunction(Function & F)231 bool SelectOptimize::runOnFunction(Function &F) {
232 TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
233 TSI = TM->getSubtargetImpl(F);
234 TLI = TSI->getTargetLowering();
235
236 // If none of the select types is supported then skip this pass.
237 // This is an optimization pass. Legality issues will be handled by
238 // instruction selection.
239 if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
240 !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
241 !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
242 return false;
243
244 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
245
246 if (!TTI->enableSelectOptimize())
247 return false;
248
249 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
250 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
251 BPI.reset(new BranchProbabilityInfo(F, *LI));
252 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
253 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
254 ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
255 TSchedModel.init(TSI);
256
257 // When optimizing for size, selects are preferable over branches.
258 if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
259 return false;
260
261 return optimizeSelects(F);
262 }
263
optimizeSelects(Function & F)264 bool SelectOptimize::optimizeSelects(Function &F) {
265 // Determine for which select groups it is profitable converting to branches.
266 SelectGroups ProfSIGroups;
267 // Base heuristics apply only to non-loops and outer loops.
268 optimizeSelectsBase(F, ProfSIGroups);
269 // Separate heuristics for inner-most loops.
270 optimizeSelectsInnerLoops(F, ProfSIGroups);
271
272 // Convert to branches the select groups that were deemed
273 // profitable-to-convert.
274 convertProfitableSIGroups(ProfSIGroups);
275
276 // Code modified if at least one select group was converted.
277 return !ProfSIGroups.empty();
278 }
279
optimizeSelectsBase(Function & F,SelectGroups & ProfSIGroups)280 void SelectOptimize::optimizeSelectsBase(Function &F,
281 SelectGroups &ProfSIGroups) {
282 // Collect all the select groups.
283 SelectGroups SIGroups;
284 for (BasicBlock &BB : F) {
285 // Base heuristics apply only to non-loops and outer loops.
286 Loop *L = LI->getLoopFor(&BB);
287 if (L && L->isInnermost())
288 continue;
289 collectSelectGroups(BB, SIGroups);
290 }
291
292 // Determine for which select groups it is profitable converting to branches.
293 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
294 }
295
optimizeSelectsInnerLoops(Function & F,SelectGroups & ProfSIGroups)296 void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
297 SelectGroups &ProfSIGroups) {
298 SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
299 // Need to check size on each iteration as we accumulate child loops.
300 for (unsigned long i = 0; i < Loops.size(); ++i)
301 for (Loop *ChildL : Loops[i]->getSubLoops())
302 Loops.push_back(ChildL);
303
304 for (Loop *L : Loops) {
305 if (!L->isInnermost())
306 continue;
307
308 SelectGroups SIGroups;
309 for (BasicBlock *BB : L->getBlocks())
310 collectSelectGroups(*BB, SIGroups);
311
312 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
313 }
314 }
315
316 /// If \p isTrue is true, return the true value of \p SI, otherwise return
317 /// false value of \p SI. If the true/false value of \p SI is defined by any
318 /// select instructions in \p Selects, look through the defining select
319 /// instruction until the true/false value is not defined in \p Selects.
320 static Value *
getTrueOrFalseValue(SelectInst * SI,bool isTrue,const SmallPtrSet<const Instruction *,2> & Selects)321 getTrueOrFalseValue(SelectInst *SI, bool isTrue,
322 const SmallPtrSet<const Instruction *, 2> &Selects) {
323 Value *V = nullptr;
324 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
325 DefSI = dyn_cast<SelectInst>(V)) {
326 assert(DefSI->getCondition() == SI->getCondition() &&
327 "The condition of DefSI does not match with SI");
328 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
329 }
330 assert(V && "Failed to get select true/false value");
331 return V;
332 }
333
convertProfitableSIGroups(SelectGroups & ProfSIGroups)334 void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
335 for (SelectGroup &ASI : ProfSIGroups) {
336 // The code transformation here is a modified version of the sinking
337 // transformation in CodeGenPrepare::optimizeSelectInst with a more
338 // aggressive strategy of which instructions to sink.
339 //
340 // TODO: eliminate the redundancy of logic transforming selects to branches
341 // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
342 // selects for all cases (with and without profile information).
343
344 // Transform a sequence like this:
345 // start:
346 // %cmp = cmp uge i32 %a, %b
347 // %sel = select i1 %cmp, i32 %c, i32 %d
348 //
349 // Into:
350 // start:
351 // %cmp = cmp uge i32 %a, %b
352 // %cmp.frozen = freeze %cmp
353 // br i1 %cmp.frozen, label %select.true, label %select.false
354 // select.true:
355 // br label %select.end
356 // select.false:
357 // br label %select.end
358 // select.end:
359 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
360 //
361 // %cmp should be frozen, otherwise it may introduce undefined behavior.
362 // In addition, we may sink instructions that produce %c or %d into the
363 // destination(s) of the new branch.
364 // If the true or false blocks do not contain a sunken instruction, that
365 // block and its branch may be optimized away. In that case, one side of the
366 // first branch will point directly to select.end, and the corresponding PHI
367 // predecessor block will be the start block.
368
369 // Find all the instructions that can be soundly sunk to the true/false
370 // blocks. These are instructions that are computed solely for producing the
371 // operands of the select instructions in the group and can be sunk without
372 // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
373 // with side effects).
374 SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
375 typedef std::stack<Instruction *>::size_type StackSizeType;
376 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
377 for (SelectInst *SI : ASI) {
378 // For each select, compute the sinkable dependence chains of the true and
379 // false operands.
380 if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
381 std::stack<Instruction *> TrueSlice;
382 getExclBackwardsSlice(TI, TrueSlice, SI, true);
383 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
384 TrueSlices.push_back(TrueSlice);
385 }
386 if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
387 std::stack<Instruction *> FalseSlice;
388 getExclBackwardsSlice(FI, FalseSlice, SI, true);
389 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
390 FalseSlices.push_back(FalseSlice);
391 }
392 }
393 // In the case of multiple select instructions in the same group, the order
394 // of non-dependent instructions (instructions of different dependence
395 // slices) in the true/false blocks appears to affect performance.
396 // Interleaving the slices seems to experimentally be the optimal approach.
397 // This interleaving scheduling allows for more ILP (with a natural downside
398 // of increasing a bit register pressure) compared to a simple ordering of
399 // one whole chain after another. One would expect that this ordering would
400 // not matter since the scheduling in the backend of the compiler would
401 // take care of it, but apparently the scheduler fails to deliver optimal
402 // ILP with a naive ordering here.
403 SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
404 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
405 for (auto &S : TrueSlices) {
406 if (!S.empty()) {
407 TrueSlicesInterleaved.push_back(S.top());
408 S.pop();
409 }
410 }
411 }
412 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
413 for (auto &S : FalseSlices) {
414 if (!S.empty()) {
415 FalseSlicesInterleaved.push_back(S.top());
416 S.pop();
417 }
418 }
419 }
420
421 // We split the block containing the select(s) into two blocks.
422 SelectInst *SI = ASI.front();
423 SelectInst *LastSI = ASI.back();
424 BasicBlock *StartBlock = SI->getParent();
425 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
426 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
427 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
428 // Delete the unconditional branch that was just created by the split.
429 StartBlock->getTerminator()->eraseFromParent();
430
431 // Move any debug/pseudo instructions that were in-between the select
432 // group to the newly-created end block.
433 SmallVector<Instruction *, 2> DebugPseudoINS;
434 auto DIt = SI->getIterator();
435 while (&*DIt != LastSI) {
436 if (DIt->isDebugOrPseudoInst())
437 DebugPseudoINS.push_back(&*DIt);
438 DIt++;
439 }
440 for (auto *DI : DebugPseudoINS) {
441 DI->moveBefore(&*EndBlock->getFirstInsertionPt());
442 }
443
444 // These are the new basic blocks for the conditional branch.
445 // At least one will become an actual new basic block.
446 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
447 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
448 if (!TrueSlicesInterleaved.empty()) {
449 TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink",
450 EndBlock->getParent(), EndBlock);
451 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
452 TrueBranch->setDebugLoc(LastSI->getDebugLoc());
453 for (Instruction *TrueInst : TrueSlicesInterleaved)
454 TrueInst->moveBefore(TrueBranch);
455 }
456 if (!FalseSlicesInterleaved.empty()) {
457 FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink",
458 EndBlock->getParent(), EndBlock);
459 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
460 FalseBranch->setDebugLoc(LastSI->getDebugLoc());
461 for (Instruction *FalseInst : FalseSlicesInterleaved)
462 FalseInst->moveBefore(FalseBranch);
463 }
464 // If there was nothing to sink, then arbitrarily choose the 'false' side
465 // for a new input value to the PHI.
466 if (TrueBlock == FalseBlock) {
467 assert(TrueBlock == nullptr &&
468 "Unexpected basic block transform while optimizing select");
469
470 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
471 EndBlock->getParent(), EndBlock);
472 auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
473 FalseBranch->setDebugLoc(SI->getDebugLoc());
474 }
475
476 // Insert the real conditional branch based on the original condition.
477 // If we did not create a new block for one of the 'true' or 'false' paths
478 // of the condition, it means that side of the branch goes to the end block
479 // directly and the path originates from the start block from the point of
480 // view of the new PHI.
481 BasicBlock *TT, *FT;
482 if (TrueBlock == nullptr) {
483 TT = EndBlock;
484 FT = FalseBlock;
485 TrueBlock = StartBlock;
486 } else if (FalseBlock == nullptr) {
487 TT = TrueBlock;
488 FT = EndBlock;
489 FalseBlock = StartBlock;
490 } else {
491 TT = TrueBlock;
492 FT = FalseBlock;
493 }
494 IRBuilder<> IB(SI);
495 auto *CondFr =
496 IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
497 IB.CreateCondBr(CondFr, TT, FT, SI);
498
499 SmallPtrSet<const Instruction *, 2> INS;
500 INS.insert(ASI.begin(), ASI.end());
501 // Use reverse iterator because later select may use the value of the
502 // earlier select, and we need to propagate value through earlier select
503 // to get the PHI operand.
504 for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
505 SelectInst *SI = *It;
506 // The select itself is replaced with a PHI Node.
507 PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
508 PN->takeName(SI);
509 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
510 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
511 PN->setDebugLoc(SI->getDebugLoc());
512
513 SI->replaceAllUsesWith(PN);
514 SI->eraseFromParent();
515 INS.erase(SI);
516 ++NumSelectsConverted;
517 }
518 }
519 }
520
isSpecialSelect(SelectInst * SI)521 static bool isSpecialSelect(SelectInst *SI) {
522 using namespace llvm::PatternMatch;
523
524 // If the select is a logical-and/logical-or then it is better treated as a
525 // and/or by the backend.
526 if (match(SI, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
527 m_LogicalOr(m_Value(), m_Value()))))
528 return true;
529
530 return false;
531 }
532
collectSelectGroups(BasicBlock & BB,SelectGroups & SIGroups)533 void SelectOptimize::collectSelectGroups(BasicBlock &BB,
534 SelectGroups &SIGroups) {
535 BasicBlock::iterator BBIt = BB.begin();
536 while (BBIt != BB.end()) {
537 Instruction *I = &*BBIt++;
538 if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
539 if (isSpecialSelect(SI))
540 continue;
541
542 SelectGroup SIGroup;
543 SIGroup.push_back(SI);
544 while (BBIt != BB.end()) {
545 Instruction *NI = &*BBIt;
546 SelectInst *NSI = dyn_cast<SelectInst>(NI);
547 if (NSI && SI->getCondition() == NSI->getCondition()) {
548 SIGroup.push_back(NSI);
549 } else if (!NI->isDebugOrPseudoInst()) {
550 // Debug/pseudo instructions should be skipped and not prevent the
551 // formation of a select group.
552 break;
553 }
554 ++BBIt;
555 }
556
557 // If the select type is not supported, no point optimizing it.
558 // Instruction selection will take care of it.
559 if (!isSelectKindSupported(SI))
560 continue;
561
562 SIGroups.push_back(SIGroup);
563 }
564 }
565 }
566
findProfitableSIGroupsBase(SelectGroups & SIGroups,SelectGroups & ProfSIGroups)567 void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
568 SelectGroups &ProfSIGroups) {
569 for (SelectGroup &ASI : SIGroups) {
570 ++NumSelectOptAnalyzed;
571 if (isConvertToBranchProfitableBase(ASI))
572 ProfSIGroups.push_back(ASI);
573 }
574 }
575
EmitAndPrintRemark(OptimizationRemarkEmitter * ORE,DiagnosticInfoOptimizationBase & Rem)576 static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
577 DiagnosticInfoOptimizationBase &Rem) {
578 LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
579 ORE->emit(Rem);
580 }
581
findProfitableSIGroupsInnerLoops(const Loop * L,SelectGroups & SIGroups,SelectGroups & ProfSIGroups)582 void SelectOptimize::findProfitableSIGroupsInnerLoops(
583 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
584 NumSelectOptAnalyzed += SIGroups.size();
585 // For each select group in an inner-most loop,
586 // a branch is more preferable than a select/conditional-move if:
587 // i) conversion to branches for all the select groups of the loop satisfies
588 // loop-level heuristics including reducing the loop's critical path by
589 // some threshold (see SelectOptimize::checkLoopHeuristics); and
590 // ii) the total cost of the select group is cheaper with a branch compared
591 // to its predicated version. The cost is in terms of latency and the cost
592 // of a select group is the cost of its most expensive select instruction
593 // (assuming infinite resources and thus fully leveraging available ILP).
594
595 DenseMap<const Instruction *, CostInfo> InstCostMap;
596 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
597 {Scaled64::getZero(), Scaled64::getZero()}};
598 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
599 !checkLoopHeuristics(L, LoopCost)) {
600 return;
601 }
602
603 for (SelectGroup &ASI : SIGroups) {
604 // Assuming infinite resources, the cost of a group of instructions is the
605 // cost of the most expensive instruction of the group.
606 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
607 for (SelectInst *SI : ASI) {
608 SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
609 BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
610 }
611 if (BranchCost < SelectCost) {
612 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
613 OR << "Profitable to convert to branch (loop analysis). BranchCost="
614 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
615 << ". ";
616 EmitAndPrintRemark(ORE, OR);
617 ++NumSelectConvertedLoop;
618 ProfSIGroups.push_back(ASI);
619 } else {
620 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
621 ORmiss << "Select is more profitable (loop analysis). BranchCost="
622 << BranchCost.toString()
623 << ", SelectCost=" << SelectCost.toString() << ". ";
624 EmitAndPrintRemark(ORE, ORmiss);
625 }
626 }
627 }
628
isConvertToBranchProfitableBase(const SmallVector<SelectInst *,2> & ASI)629 bool SelectOptimize::isConvertToBranchProfitableBase(
630 const SmallVector<SelectInst *, 2> &ASI) {
631 SelectInst *SI = ASI.front();
632 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
633 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
634 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
635
636 // Skip cold basic blocks. Better to optimize for size for cold blocks.
637 if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
638 ++NumSelectColdBB;
639 ORmiss << "Not converted to branch because of cold basic block. ";
640 EmitAndPrintRemark(ORE, ORmiss);
641 return false;
642 }
643
644 // If unpredictable, branch form is less profitable.
645 if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
646 ++NumSelectUnPred;
647 ORmiss << "Not converted to branch because of unpredictable branch. ";
648 EmitAndPrintRemark(ORE, ORmiss);
649 return false;
650 }
651
652 // If highly predictable, branch form is more profitable, unless a
653 // predictable select is inexpensive in the target architecture.
654 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
655 ++NumSelectConvertedHighPred;
656 OR << "Converted to branch because of highly predictable branch. ";
657 EmitAndPrintRemark(ORE, OR);
658 return true;
659 }
660
661 // Look for expensive instructions in the cold operand's (if any) dependence
662 // slice of any of the selects in the group.
663 if (hasExpensiveColdOperand(ASI)) {
664 ++NumSelectConvertedExpColdOperand;
665 OR << "Converted to branch because of expensive cold operand.";
666 EmitAndPrintRemark(ORE, OR);
667 return true;
668 }
669
670 ORmiss << "Not profitable to convert to branch (base heuristic).";
671 EmitAndPrintRemark(ORE, ORmiss);
672 return false;
673 }
674
divideNearest(InstructionCost Numerator,uint64_t Denominator)675 static InstructionCost divideNearest(InstructionCost Numerator,
676 uint64_t Denominator) {
677 return (Numerator + (Denominator / 2)) / Denominator;
678 }
679
hasExpensiveColdOperand(const SmallVector<SelectInst *,2> & ASI)680 bool SelectOptimize::hasExpensiveColdOperand(
681 const SmallVector<SelectInst *, 2> &ASI) {
682 bool ColdOperand = false;
683 uint64_t TrueWeight, FalseWeight, TotalWeight;
684 if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) {
685 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
686 TotalWeight = TrueWeight + FalseWeight;
687 // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
688 ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
689 } else if (PSI->hasProfileSummary()) {
690 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
691 ORmiss << "Profile data available but missing branch-weights metadata for "
692 "select instruction. ";
693 EmitAndPrintRemark(ORE, ORmiss);
694 }
695 if (!ColdOperand)
696 return false;
697 // Check if the cold path's dependence slice is expensive for any of the
698 // selects of the group.
699 for (SelectInst *SI : ASI) {
700 Instruction *ColdI = nullptr;
701 uint64_t HotWeight;
702 if (TrueWeight < FalseWeight) {
703 ColdI = dyn_cast<Instruction>(SI->getTrueValue());
704 HotWeight = FalseWeight;
705 } else {
706 ColdI = dyn_cast<Instruction>(SI->getFalseValue());
707 HotWeight = TrueWeight;
708 }
709 if (ColdI) {
710 std::stack<Instruction *> ColdSlice;
711 getExclBackwardsSlice(ColdI, ColdSlice, SI);
712 InstructionCost SliceCost = 0;
713 while (!ColdSlice.empty()) {
714 SliceCost += TTI->getInstructionCost(ColdSlice.top(),
715 TargetTransformInfo::TCK_Latency);
716 ColdSlice.pop();
717 }
718 // The colder the cold value operand of the select is the more expensive
719 // the cmov becomes for computing the cold value operand every time. Thus,
720 // the colder the cold operand is the more its cost counts.
721 // Get nearest integer cost adjusted for coldness.
722 InstructionCost AdjSliceCost =
723 divideNearest(SliceCost * HotWeight, TotalWeight);
724 if (AdjSliceCost >=
725 ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive)
726 return true;
727 }
728 }
729 return false;
730 }
731
732 // Check if it is safe to move LoadI next to the SI.
733 // Conservatively assume it is safe only if there is no instruction
734 // modifying memory in-between the load and the select instruction.
isSafeToSinkLoad(Instruction * LoadI,Instruction * SI)735 static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
736 // Assume loads from different basic blocks are unsafe to move.
737 if (LoadI->getParent() != SI->getParent())
738 return false;
739 auto It = LoadI->getIterator();
740 while (&*It != SI) {
741 if (It->mayWriteToMemory())
742 return false;
743 It++;
744 }
745 return true;
746 }
747
748 // For a given source instruction, collect its backwards dependence slice
749 // consisting of instructions exclusively computed for the purpose of producing
750 // the operands of the source instruction. As an approximation
751 // (sufficiently-accurate in practice), we populate this set with the
752 // instructions of the backwards dependence slice that only have one-use and
753 // form an one-use chain that leads to the source instruction.
getExclBackwardsSlice(Instruction * I,std::stack<Instruction * > & Slice,Instruction * SI,bool ForSinking)754 void SelectOptimize::getExclBackwardsSlice(Instruction *I,
755 std::stack<Instruction *> &Slice,
756 Instruction *SI, bool ForSinking) {
757 SmallPtrSet<Instruction *, 2> Visited;
758 std::queue<Instruction *> Worklist;
759 Worklist.push(I);
760 while (!Worklist.empty()) {
761 Instruction *II = Worklist.front();
762 Worklist.pop();
763
764 // Avoid cycles.
765 if (!Visited.insert(II).second)
766 continue;
767
768 if (!II->hasOneUse())
769 continue;
770
771 // Cannot soundly sink instructions with side-effects.
772 // Terminator or phi instructions cannot be sunk.
773 // Avoid sinking other select instructions (should be handled separetely).
774 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
775 isa<SelectInst>(II) || isa<PHINode>(II)))
776 continue;
777
778 // Avoid sinking loads in order not to skip state-modifying instructions,
779 // that may alias with the loaded address.
780 // Only allow sinking of loads within the same basic block that are
781 // conservatively proven to be safe.
782 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
783 continue;
784
785 // Avoid considering instructions with less frequency than the source
786 // instruction (i.e., avoid colder code regions of the dependence slice).
787 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
788 continue;
789
790 // Eligible one-use instruction added to the dependence slice.
791 Slice.push(II);
792
793 // Explore all the operands of the current instruction to expand the slice.
794 for (unsigned k = 0; k < II->getNumOperands(); ++k)
795 if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
796 Worklist.push(OpI);
797 }
798 }
799
isSelectHighlyPredictable(const SelectInst * SI)800 bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
801 uint64_t TrueWeight, FalseWeight;
802 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
803 uint64_t Max = std::max(TrueWeight, FalseWeight);
804 uint64_t Sum = TrueWeight + FalseWeight;
805 if (Sum != 0) {
806 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
807 if (Probability > TTI->getPredictableBranchThreshold())
808 return true;
809 }
810 }
811 return false;
812 }
813
checkLoopHeuristics(const Loop * L,const CostInfo LoopCost[2])814 bool SelectOptimize::checkLoopHeuristics(const Loop *L,
815 const CostInfo LoopCost[2]) {
816 // Loop-level checks to determine if a non-predicated version (with branches)
817 // of the loop is more profitable than its predicated version.
818
819 if (DisableLoopLevelHeuristics)
820 return true;
821
822 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
823 L->getHeader()->getFirstNonPHI());
824
825 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
826 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
827 ORmissL << "No select conversion in the loop due to no reduction of loop's "
828 "critical path. ";
829 EmitAndPrintRemark(ORE, ORmissL);
830 return false;
831 }
832
833 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
834 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
835
836 // Profitably converting to branches need to reduce the loop's critical path
837 // by at least some threshold (absolute gain of GainCycleThreshold cycles and
838 // relative gain of 12.5%).
839 if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
840 Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
841 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
842 ORmissL << "No select conversion in the loop due to small reduction of "
843 "loop's critical path. Gain="
844 << Gain[1].toString()
845 << ", RelativeGain=" << RelativeGain.toString() << "%. ";
846 EmitAndPrintRemark(ORE, ORmissL);
847 return false;
848 }
849
850 // If the loop's critical path involves loop-carried dependences, the gradient
851 // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
852 // This check ensures that the latency reduction for the loop's critical path
853 // keeps decreasing with sufficient rate beyond the two analyzed loop
854 // iterations.
855 if (Gain[1] > Gain[0]) {
856 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
857 (LoopCost[1].PredCost - LoopCost[0].PredCost);
858 if (GradientGain < Scaled64::get(GainGradientThreshold)) {
859 ORmissL << "No select conversion in the loop due to small gradient gain. "
860 "GradientGain="
861 << GradientGain.toString() << "%. ";
862 EmitAndPrintRemark(ORE, ORmissL);
863 return false;
864 }
865 }
866 // If the gain decreases it is not profitable to convert.
867 else if (Gain[1] < Gain[0]) {
868 ORmissL
869 << "No select conversion in the loop due to negative gradient gain. ";
870 EmitAndPrintRemark(ORE, ORmissL);
871 return false;
872 }
873
874 // Non-predicated version of the loop is more profitable than its
875 // predicated version.
876 return true;
877 }
878
879 // Computes instruction and loop-critical-path costs for both the predicated
880 // and non-predicated version of the given loop.
881 // Returns false if unable to compute these costs due to invalid cost of loop
882 // instruction(s).
computeLoopCosts(const Loop * L,const SelectGroups & SIGroups,DenseMap<const Instruction *,CostInfo> & InstCostMap,CostInfo * LoopCost)883 bool SelectOptimize::computeLoopCosts(
884 const Loop *L, const SelectGroups &SIGroups,
885 DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
886 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
887 << L->getHeader()->getName() << "\n");
888 const auto &SIset = getSIset(SIGroups);
889 // Compute instruction and loop-critical-path costs across two iterations for
890 // both predicated and non-predicated version.
891 const unsigned Iterations = 2;
892 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
893 // Cost of the loop's critical path.
894 CostInfo &MaxCost = LoopCost[Iter];
895 for (BasicBlock *BB : L->getBlocks()) {
896 for (const Instruction &I : *BB) {
897 if (I.isDebugOrPseudoInst())
898 continue;
899 // Compute the predicated and non-predicated cost of the instruction.
900 Scaled64 IPredCost = Scaled64::getZero(),
901 INonPredCost = Scaled64::getZero();
902
903 // Assume infinite resources that allow to fully exploit the available
904 // instruction-level parallelism.
905 // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
906 for (const Use &U : I.operands()) {
907 auto UI = dyn_cast<Instruction>(U.get());
908 if (!UI)
909 continue;
910 if (InstCostMap.count(UI)) {
911 IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
912 INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
913 }
914 }
915 auto ILatency = computeInstCost(&I);
916 if (!ILatency) {
917 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
918 ORmissL << "Invalid instruction cost preventing analysis and "
919 "optimization of the inner-most loop containing this "
920 "instruction. ";
921 EmitAndPrintRemark(ORE, ORmissL);
922 return false;
923 }
924 IPredCost += Scaled64::get(*ILatency);
925 INonPredCost += Scaled64::get(*ILatency);
926
927 // For a select that can be converted to branch,
928 // compute its cost as a branch (non-predicated cost).
929 //
930 // BranchCost = PredictedPathCost + MispredictCost
931 // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
932 // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
933 if (SIset.contains(&I)) {
934 auto SI = cast<SelectInst>(&I);
935
936 Scaled64 TrueOpCost = Scaled64::getZero(),
937 FalseOpCost = Scaled64::getZero();
938 if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
939 if (InstCostMap.count(TI))
940 TrueOpCost = InstCostMap[TI].NonPredCost;
941 if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
942 if (InstCostMap.count(FI))
943 FalseOpCost = InstCostMap[FI].NonPredCost;
944 Scaled64 PredictedPathCost =
945 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
946
947 Scaled64 CondCost = Scaled64::getZero();
948 if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
949 if (InstCostMap.count(CI))
950 CondCost = InstCostMap[CI].NonPredCost;
951 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
952
953 INonPredCost = PredictedPathCost + MispredictCost;
954 }
955 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
956 << INonPredCost << " for " << I << "\n");
957
958 InstCostMap[&I] = {IPredCost, INonPredCost};
959 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
960 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
961 }
962 }
963 LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
964 << " MaxCost = " << MaxCost.PredCost << " "
965 << MaxCost.NonPredCost << "\n");
966 }
967 return true;
968 }
969
970 SmallPtrSet<const Instruction *, 2>
getSIset(const SelectGroups & SIGroups)971 SelectOptimize::getSIset(const SelectGroups &SIGroups) {
972 SmallPtrSet<const Instruction *, 2> SIset;
973 for (const SelectGroup &ASI : SIGroups)
974 for (const SelectInst *SI : ASI)
975 SIset.insert(SI);
976 return SIset;
977 }
978
computeInstCost(const Instruction * I)979 std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
980 InstructionCost ICost =
981 TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
982 if (auto OC = ICost.getValue())
983 return std::optional<uint64_t>(*OC);
984 return std::nullopt;
985 }
986
987 ScaledNumber<uint64_t>
getMispredictionCost(const SelectInst * SI,const Scaled64 CondCost)988 SelectOptimize::getMispredictionCost(const SelectInst *SI,
989 const Scaled64 CondCost) {
990 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
991
992 // Account for the default misprediction rate when using a branch
993 // (conservatively set to 25% by default).
994 uint64_t MispredictRate = MispredictDefaultRate;
995 // If the select condition is obviously predictable, then the misprediction
996 // rate is zero.
997 if (isSelectHighlyPredictable(SI))
998 MispredictRate = 0;
999
1000 // CondCost is included to account for cases where the computation of the
1001 // condition is part of a long dependence chain (potentially loop-carried)
1002 // that would delay detection of a misprediction and increase its cost.
1003 Scaled64 MispredictCost =
1004 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1005 Scaled64::get(MispredictRate);
1006 MispredictCost /= Scaled64::get(100);
1007
1008 return MispredictCost;
1009 }
1010
1011 // Returns the cost of a branch when the prediction is correct.
1012 // TrueCost * TrueProbability + FalseCost * FalseProbability.
1013 ScaledNumber<uint64_t>
getPredictedPathCost(Scaled64 TrueCost,Scaled64 FalseCost,const SelectInst * SI)1014 SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1015 const SelectInst *SI) {
1016 Scaled64 PredPathCost;
1017 uint64_t TrueWeight, FalseWeight;
1018 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
1019 uint64_t SumWeight = TrueWeight + FalseWeight;
1020 if (SumWeight != 0) {
1021 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1022 FalseCost * Scaled64::get(FalseWeight);
1023 PredPathCost /= Scaled64::get(SumWeight);
1024 return PredPathCost;
1025 }
1026 }
1027 // Without branch weight metadata, we assume 75% for the one path and 25% for
1028 // the other, and pick the result with the biggest cost.
1029 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1030 FalseCost * Scaled64::get(3) + TrueCost);
1031 PredPathCost /= Scaled64::get(4);
1032 return PredPathCost;
1033 }
1034
isSelectKindSupported(SelectInst * SI)1035 bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
1036 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
1037 if (VectorCond)
1038 return false;
1039 TargetLowering::SelectSupportKind SelectKind;
1040 if (SI->getType()->isVectorTy())
1041 SelectKind = TargetLowering::ScalarCondVectorVal;
1042 else
1043 SelectKind = TargetLowering::ScalarValSelect;
1044 return TLI->isSelectSupported(SelectKind);
1045 }
1046