xref: /aosp_15_r20/external/swiftshader/third_party/llvm-16.0/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a set of utility VPlan to VPlan transformations.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlanTransforms.h"
15 #include "VPlanCFG.h"
16 #include "llvm/ADT/PostOrderIterator.h"
17 #include "llvm/ADT/SetVector.h"
18 #include "llvm/Analysis/IVDescriptors.h"
19 #include "llvm/Analysis/VectorUtils.h"
20 #include "llvm/IR/Intrinsics.h"
21 
22 using namespace llvm;
23 
VPInstructionsToVPRecipes(Loop * OrigLoop,VPlanPtr & Plan,function_ref<const InductionDescriptor * (PHINode *)> GetIntOrFpInductionDescriptor,SmallPtrSetImpl<Instruction * > & DeadInstructions,ScalarEvolution & SE,const TargetLibraryInfo & TLI)24 void VPlanTransforms::VPInstructionsToVPRecipes(
25     Loop *OrigLoop, VPlanPtr &Plan,
26     function_ref<const InductionDescriptor *(PHINode *)>
27         GetIntOrFpInductionDescriptor,
28     SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE,
29     const TargetLibraryInfo &TLI) {
30 
31   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
32       Plan->getEntry());
33   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
34     VPRecipeBase *Term = VPBB->getTerminator();
35     auto EndIter = Term ? Term->getIterator() : VPBB->end();
36     // Introduce each ingredient into VPlan.
37     for (VPRecipeBase &Ingredient :
38          make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
39 
40       VPValue *VPV = Ingredient.getVPSingleValue();
41       Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
42       if (DeadInstructions.count(Inst)) {
43         VPValue DummyValue;
44         VPV->replaceAllUsesWith(&DummyValue);
45         Ingredient.eraseFromParent();
46         continue;
47       }
48 
49       VPRecipeBase *NewRecipe = nullptr;
50       if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
51         auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
52         if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
53           VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
54           VPValue *Step =
55               vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
56           NewRecipe =
57               new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, true);
58         } else {
59           Plan->addVPValue(Phi, VPPhi);
60           continue;
61         }
62       } else {
63         assert(isa<VPInstruction>(&Ingredient) &&
64                "only VPInstructions expected here");
65         assert(!isa<PHINode>(Inst) && "phis should be handled above");
66         // Create VPWidenMemoryInstructionRecipe for loads and stores.
67         if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
68           NewRecipe = new VPWidenMemoryInstructionRecipe(
69               *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
70               nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
71         } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
72           NewRecipe = new VPWidenMemoryInstructionRecipe(
73               *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
74               Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/,
75               false /*Consecutive*/, false /*Reverse*/);
76         } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
77           NewRecipe = new VPWidenGEPRecipe(
78               GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop);
79         } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
80           NewRecipe =
81               new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()),
82                                     getVectorIntrinsicIDForCall(CI, &TLI));
83         } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
84           bool InvariantCond =
85               SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop);
86           NewRecipe = new VPWidenSelectRecipe(
87               *SI, Plan->mapToVPValues(SI->operands()), InvariantCond);
88         } else {
89           NewRecipe =
90               new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands()));
91         }
92       }
93 
94       NewRecipe->insertBefore(&Ingredient);
95       if (NewRecipe->getNumDefinedValues() == 1)
96         VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
97       else
98         assert(NewRecipe->getNumDefinedValues() == 0 &&
99                "Only recpies with zero or one defined values expected");
100       Ingredient.eraseFromParent();
101       Plan->removeVPValueFor(Inst);
102       for (auto *Def : NewRecipe->definedValues()) {
103         Plan->addVPValue(Inst, Def);
104       }
105     }
106   }
107 }
108 
sinkScalarOperands(VPlan & Plan)109 bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
110   auto Iter = vp_depth_first_deep(Plan.getEntry());
111   bool Changed = false;
112   // First, collect the operands of all recipes in replicate blocks as seeds for
113   // sinking.
114   SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList;
115   for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
116     VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
117     if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
118       continue;
119     VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
120     if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
121       continue;
122     for (auto &Recipe : *VPBB) {
123       for (VPValue *Op : Recipe.operands())
124         if (auto *Def = Op->getDefiningRecipe())
125           WorkList.insert(std::make_pair(VPBB, Def));
126     }
127   }
128 
129   bool ScalarVFOnly = Plan.hasScalarVFOnly();
130   // Try to sink each replicate or scalar IV steps recipe in the worklist.
131   for (unsigned I = 0; I != WorkList.size(); ++I) {
132     VPBasicBlock *SinkTo;
133     VPRecipeBase *SinkCandidate;
134     std::tie(SinkTo, SinkCandidate) = WorkList[I];
135     if (SinkCandidate->getParent() == SinkTo ||
136         SinkCandidate->mayHaveSideEffects() ||
137         SinkCandidate->mayReadOrWriteMemory())
138       continue;
139     if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
140       if (!ScalarVFOnly && RepR->isUniform())
141         continue;
142     } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
143       continue;
144 
145     bool NeedsDuplicating = false;
146     // All recipe users of the sink candidate must be in the same block SinkTo
147     // or all users outside of SinkTo must be uniform-after-vectorization (
148     // i.e., only first lane is used) . In the latter case, we need to duplicate
149     // SinkCandidate.
150     auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
151                             SinkCandidate](VPUser *U) {
152       auto *UI = dyn_cast<VPRecipeBase>(U);
153       if (!UI)
154         return false;
155       if (UI->getParent() == SinkTo)
156         return true;
157       NeedsDuplicating =
158           UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue());
159       // We only know how to duplicate VPRecipeRecipes for now.
160       return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
161     };
162     if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser))
163       continue;
164 
165     if (NeedsDuplicating) {
166       if (ScalarVFOnly)
167         continue;
168       Instruction *I = cast<Instruction>(
169           cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue());
170       auto *Clone =
171           new VPReplicateRecipe(I, SinkCandidate->operands(), true, false);
172       // TODO: add ".cloned" suffix to name of Clone's VPValue.
173 
174       Clone->insertBefore(SinkCandidate);
175       for (auto *U : to_vector(SinkCandidate->getVPSingleValue()->users())) {
176         auto *UI = cast<VPRecipeBase>(U);
177         if (UI->getParent() == SinkTo)
178           continue;
179 
180         for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) {
181           if (UI->getOperand(Idx) != SinkCandidate->getVPSingleValue())
182             continue;
183           UI->setOperand(Idx, Clone);
184         }
185       }
186     }
187     SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
188     for (VPValue *Op : SinkCandidate->operands())
189       if (auto *Def = Op->getDefiningRecipe())
190         WorkList.insert(std::make_pair(SinkTo, Def));
191     Changed = true;
192   }
193   return Changed;
194 }
195 
196 /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return
197 /// the mask.
getPredicatedMask(VPRegionBlock * R)198 VPValue *getPredicatedMask(VPRegionBlock *R) {
199   auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
200   if (!EntryBB || EntryBB->size() != 1 ||
201       !isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
202     return nullptr;
203 
204   return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
205 }
206 
207 /// If \p R is a triangle region, return the 'then' block of the triangle.
getPredicatedThenBlock(VPRegionBlock * R)208 static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
209   auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
210   if (EntryBB->getNumSuccessors() != 2)
211     return nullptr;
212 
213   auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
214   auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
215   if (!Succ0 || !Succ1)
216     return nullptr;
217 
218   if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
219     return nullptr;
220   if (Succ0->getSingleSuccessor() == Succ1)
221     return Succ0;
222   if (Succ1->getSingleSuccessor() == Succ0)
223     return Succ1;
224   return nullptr;
225 }
226 
mergeReplicateRegionsIntoSuccessors(VPlan & Plan)227 bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
228   SetVector<VPRegionBlock *> DeletedRegions;
229 
230   // Collect replicate regions followed by an empty block, followed by another
231   // replicate region with matching masks to process front. This is to avoid
232   // iterator invalidation issues while merging regions.
233   SmallVector<VPRegionBlock *, 8> WorkList;
234   for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>(
235            vp_depth_first_deep(Plan.getEntry()))) {
236     if (!Region1->isReplicator())
237       continue;
238     auto *MiddleBasicBlock =
239         dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
240     if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
241       continue;
242 
243     auto *Region2 =
244         dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
245     if (!Region2 || !Region2->isReplicator())
246       continue;
247 
248     VPValue *Mask1 = getPredicatedMask(Region1);
249     VPValue *Mask2 = getPredicatedMask(Region2);
250     if (!Mask1 || Mask1 != Mask2)
251       continue;
252 
253     assert(Mask1 && Mask2 && "both region must have conditions");
254     WorkList.push_back(Region1);
255   }
256 
257   // Move recipes from Region1 to its successor region, if both are triangles.
258   for (VPRegionBlock *Region1 : WorkList) {
259     if (DeletedRegions.contains(Region1))
260       continue;
261     auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());
262     auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
263 
264     VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
265     VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
266     if (!Then1 || !Then2)
267       continue;
268 
269     // Note: No fusion-preventing memory dependencies are expected in either
270     // region. Such dependencies should be rejected during earlier dependence
271     // checks, which guarantee accesses can be re-ordered for vectorization.
272     //
273     // Move recipes to the successor region.
274     for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
275       ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
276 
277     auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());
278     auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());
279 
280     // Move VPPredInstPHIRecipes from the merge block to the successor region's
281     // merge block. Update all users inside the successor region to use the
282     // original values.
283     for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
284       VPValue *PredInst1 =
285           cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
286       VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
287       for (VPUser *U : to_vector(Phi1ToMoveV->users())) {
288         auto *UI = dyn_cast<VPRecipeBase>(U);
289         if (!UI || UI->getParent() != Then2)
290           continue;
291         for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
292           if (Phi1ToMoveV != U->getOperand(I))
293             continue;
294           U->setOperand(I, PredInst1);
295         }
296       }
297 
298       Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
299     }
300 
301     // Finally, remove the first region.
302     for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {
303       VPBlockUtils::disconnectBlocks(Pred, Region1);
304       VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);
305     }
306     VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);
307     DeletedRegions.insert(Region1);
308   }
309 
310   for (VPRegionBlock *ToDelete : DeletedRegions)
311     delete ToDelete;
312   return !DeletedRegions.empty();
313 }
314 
mergeBlocksIntoPredecessors(VPlan & Plan)315 bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) {
316   SmallVector<VPBasicBlock *> WorkList;
317   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
318            vp_depth_first_deep(Plan.getEntry()))) {
319     auto *PredVPBB =
320         dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
321     if (PredVPBB && PredVPBB->getNumSuccessors() == 1)
322       WorkList.push_back(VPBB);
323   }
324 
325   for (VPBasicBlock *VPBB : WorkList) {
326     VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor());
327     for (VPRecipeBase &R : make_early_inc_range(*VPBB))
328       R.moveBefore(*PredVPBB, PredVPBB->end());
329     VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
330     auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent());
331     if (ParentRegion && ParentRegion->getExiting() == VPBB)
332       ParentRegion->setExiting(PredVPBB);
333     for (auto *Succ : to_vector(VPBB->successors())) {
334       VPBlockUtils::disconnectBlocks(VPBB, Succ);
335       VPBlockUtils::connectBlocks(PredVPBB, Succ);
336     }
337     delete VPBB;
338   }
339   return !WorkList.empty();
340 }
341 
removeRedundantInductionCasts(VPlan & Plan)342 void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) {
343   for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
344     auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
345     if (!IV || IV->getTruncInst())
346       continue;
347 
348     // A sequence of IR Casts has potentially been recorded for IV, which
349     // *must be bypassed* when the IV is vectorized, because the vectorized IV
350     // will produce the desired casted value. This sequence forms a def-use
351     // chain and is provided in reverse order, ending with the cast that uses
352     // the IV phi. Search for the recipe of the last cast in the chain and
353     // replace it with the original IV. Note that only the final cast is
354     // expected to have users outside the cast-chain and the dead casts left
355     // over will be cleaned up later.
356     auto &Casts = IV->getInductionDescriptor().getCastInsts();
357     VPValue *FindMyCast = IV;
358     for (Instruction *IRCast : reverse(Casts)) {
359       VPRecipeBase *FoundUserCast = nullptr;
360       for (auto *U : FindMyCast->users()) {
361         auto *UserCast = cast<VPRecipeBase>(U);
362         if (UserCast->getNumDefinedValues() == 1 &&
363             UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) {
364           FoundUserCast = UserCast;
365           break;
366         }
367       }
368       FindMyCast = FoundUserCast->getVPSingleValue();
369     }
370     FindMyCast->replaceAllUsesWith(IV);
371   }
372 }
373 
removeRedundantCanonicalIVs(VPlan & Plan)374 void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
375   VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
376   VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
377   for (VPUser *U : CanonicalIV->users()) {
378     WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
379     if (WidenNewIV)
380       break;
381   }
382 
383   if (!WidenNewIV)
384     return;
385 
386   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
387   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
388     auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
389 
390     if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() ||
391         WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType())
392       continue;
393 
394     // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
395     // everything WidenNewIV's users need. That is, WidenOriginalIV will
396     // generate a vector phi or all users of WidenNewIV demand the first lane
397     // only.
398     if (WidenOriginalIV->needsVectorIV() ||
399         vputils::onlyFirstLaneUsed(WidenNewIV)) {
400       WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
401       WidenNewIV->eraseFromParent();
402       return;
403     }
404   }
405 }
406 
removeDeadRecipes(VPlan & Plan)407 void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
408   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
409       Plan.getEntry());
410 
411   for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
412     // The recipes in the block are processed in reverse order, to catch chains
413     // of dead recipes.
414     for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
415       if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) {
416             return V->getNumUsers() > 0;
417           }))
418         continue;
419       R.eraseFromParent();
420     }
421   }
422 }
423 
optimizeInductions(VPlan & Plan,ScalarEvolution & SE)424 void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
425   SmallVector<VPRecipeBase *> ToRemove;
426   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
427   bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
428   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
429     auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
430     if (!WideIV)
431       continue;
432     if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {
433           return U->usesScalars(WideIV);
434         }))
435       continue;
436 
437     auto IP = HeaderVPBB->getFirstNonPhi();
438     VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
439     Type *ResultTy = WideIV->getPHINode()->getType();
440     if (Instruction *TruncI = WideIV->getTruncInst())
441       ResultTy = TruncI->getType();
442     const InductionDescriptor &ID = WideIV->getInductionDescriptor();
443     VPValue *Step =
444         vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE);
445     VPValue *BaseIV = CanonicalIV;
446     if (!CanonicalIV->isCanonical(ID, ResultTy)) {
447       BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV,
448                                      Step, ResultTy);
449       HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
450     }
451 
452     VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
453     HeaderVPBB->insert(Steps, IP);
454 
455     // Update scalar users of IV to use Step instead. Use SetVector to ensure
456     // the list of users doesn't contain duplicates.
457     SetVector<VPUser *> Users(WideIV->user_begin(), WideIV->user_end());
458     for (VPUser *U : Users) {
459       if (HasOnlyVectorVFs && !U->usesScalars(WideIV))
460         continue;
461       for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) {
462         if (U->getOperand(I) != WideIV)
463           continue;
464         U->setOperand(I, Steps);
465       }
466     }
467   }
468 }
469 
removeRedundantExpandSCEVRecipes(VPlan & Plan)470 void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) {
471   DenseMap<const SCEV *, VPValue *> SCEV2VPV;
472 
473   for (VPRecipeBase &R :
474        make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) {
475     auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R);
476     if (!ExpR)
477       continue;
478 
479     auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR});
480     if (I.second)
481       continue;
482     ExpR->replaceAllUsesWith(I.first->second);
483     ExpR->eraseFromParent();
484   }
485 }
486 
canSimplifyBranchOnCond(VPInstruction * Term)487 static bool canSimplifyBranchOnCond(VPInstruction *Term) {
488   VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0));
489   if (!Not || Not->getOpcode() != VPInstruction::Not)
490     return false;
491 
492   VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0));
493   return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask;
494 }
495 
optimizeForVFAndUF(VPlan & Plan,ElementCount BestVF,unsigned BestUF,PredicatedScalarEvolution & PSE)496 void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
497                                          unsigned BestUF,
498                                          PredicatedScalarEvolution &PSE) {
499   assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
500   assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
501   VPBasicBlock *ExitingVPBB =
502       Plan.getVectorLoopRegion()->getExitingBasicBlock();
503   auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
504   // Try to simplify the branch condition if TC <= VF * UF when preparing to
505   // execute the plan for the main vector loop. We only do this if the
506   // terminator is:
507   //  1. BranchOnCount, or
508   //  2. BranchOnCond where the input is Not(ActiveLaneMask).
509   if (!Term || (Term->getOpcode() != VPInstruction::BranchOnCount &&
510                 (Term->getOpcode() != VPInstruction::BranchOnCond ||
511                  !canSimplifyBranchOnCond(Term))))
512     return;
513 
514   Type *IdxTy =
515       Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType();
516   const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE);
517   ScalarEvolution &SE = *PSE.getSE();
518   const SCEV *C =
519       SE.getConstant(TripCount->getType(), BestVF.getKnownMinValue() * BestUF);
520   if (TripCount->isZero() ||
521       !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
522     return;
523 
524   LLVMContext &Ctx = SE.getContext();
525   auto *BOC =
526       new VPInstruction(VPInstruction::BranchOnCond,
527                         {Plan.getOrAddExternalDef(ConstantInt::getTrue(Ctx))});
528   Term->eraseFromParent();
529   ExitingVPBB->appendRecipe(BOC);
530   Plan.setVF(BestVF);
531   Plan.setUF(BestUF);
532   // TODO: Further simplifications are possible
533   //      1. Replace inductions with constants.
534   //      2. Replace vector loop region with VPBasicBlock.
535 }
536