1 //===- Construction of pass pipelines -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file provides the implementation of the PassBuilder based on our
11 /// static pass registry as well as related functionality. It also provides
12 /// helpers to aid in analyzing, debugging, and testing passes and pass
13 /// pipelines.
14 ///
15 //===----------------------------------------------------------------------===//
16
17 #include "llvm/Analysis/AliasAnalysis.h"
18 #include "llvm/Analysis/BasicAliasAnalysis.h"
19 #include "llvm/Analysis/CGSCCPassManager.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/Analysis/InlineAdvisor.h"
22 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
23 #include "llvm/Analysis/ProfileSummaryInfo.h"
24 #include "llvm/Analysis/ScopedNoAliasAA.h"
25 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Passes/OptimizationLevel.h"
28 #include "llvm/Passes/PassBuilder.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/PGOOptions.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
34 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
35 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
36 #include "llvm/Transforms/Coroutines/CoroEarly.h"
37 #include "llvm/Transforms/Coroutines/CoroElide.h"
38 #include "llvm/Transforms/Coroutines/CoroSplit.h"
39 #include "llvm/Transforms/IPO/AlwaysInliner.h"
40 #include "llvm/Transforms/IPO/Annotation2Metadata.h"
41 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
42 #include "llvm/Transforms/IPO/Attributor.h"
43 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
44 #include "llvm/Transforms/IPO/ConstantMerge.h"
45 #include "llvm/Transforms/IPO/CrossDSOCFI.h"
46 #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
47 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
48 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
49 #include "llvm/Transforms/IPO/FunctionAttrs.h"
50 #include "llvm/Transforms/IPO/GlobalDCE.h"
51 #include "llvm/Transforms/IPO/GlobalOpt.h"
52 #include "llvm/Transforms/IPO/GlobalSplit.h"
53 #include "llvm/Transforms/IPO/HotColdSplitting.h"
54 #include "llvm/Transforms/IPO/IROutliner.h"
55 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
56 #include "llvm/Transforms/IPO/Inliner.h"
57 #include "llvm/Transforms/IPO/LowerTypeTests.h"
58 #include "llvm/Transforms/IPO/MergeFunctions.h"
59 #include "llvm/Transforms/IPO/ModuleInliner.h"
60 #include "llvm/Transforms/IPO/OpenMPOpt.h"
61 #include "llvm/Transforms/IPO/PartialInlining.h"
62 #include "llvm/Transforms/IPO/SCCP.h"
63 #include "llvm/Transforms/IPO/SampleProfile.h"
64 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
65 #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
66 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
67 #include "llvm/Transforms/InstCombine/InstCombine.h"
68 #include "llvm/Transforms/Instrumentation/CGProfile.h"
69 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
70 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
71 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
72 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
73 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
74 #include "llvm/Transforms/Scalar/ADCE.h"
75 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
76 #include "llvm/Transforms/Scalar/AnnotationRemarks.h"
77 #include "llvm/Transforms/Scalar/BDCE.h"
78 #include "llvm/Transforms/Scalar/CallSiteSplitting.h"
79 #include "llvm/Transforms/Scalar/ConstraintElimination.h"
80 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
81 #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
82 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
83 #include "llvm/Transforms/Scalar/DivRemPairs.h"
84 #include "llvm/Transforms/Scalar/EarlyCSE.h"
85 #include "llvm/Transforms/Scalar/Float2Int.h"
86 #include "llvm/Transforms/Scalar/GVN.h"
87 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
88 #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
89 #include "llvm/Transforms/Scalar/JumpThreading.h"
90 #include "llvm/Transforms/Scalar/LICM.h"
91 #include "llvm/Transforms/Scalar/LoopDeletion.h"
92 #include "llvm/Transforms/Scalar/LoopDistribute.h"
93 #include "llvm/Transforms/Scalar/LoopFlatten.h"
94 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
95 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
96 #include "llvm/Transforms/Scalar/LoopInterchange.h"
97 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
98 #include "llvm/Transforms/Scalar/LoopPassManager.h"
99 #include "llvm/Transforms/Scalar/LoopRotation.h"
100 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
101 #include "llvm/Transforms/Scalar/LoopSink.h"
102 #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
103 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
104 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
105 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
106 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
107 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
108 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
109 #include "llvm/Transforms/Scalar/NewGVN.h"
110 #include "llvm/Transforms/Scalar/Reassociate.h"
111 #include "llvm/Transforms/Scalar/SCCP.h"
112 #include "llvm/Transforms/Scalar/SROA.h"
113 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
114 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
115 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
116 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
117 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
118 #include "llvm/Transforms/Utils/AddDiscriminators.h"
119 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
120 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
121 #include "llvm/Transforms/Utils/InjectTLIMappings.h"
122 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
123 #include "llvm/Transforms/Utils/Mem2Reg.h"
124 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
125 #include "llvm/Transforms/Utils/RelLookupTableConverter.h"
126 #include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
127 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
128 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
129 #include "llvm/Transforms/Vectorize/VectorCombine.h"
130
131 using namespace llvm;
132
133 static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
134 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
135 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
136 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
137 "Heuristics-based inliner version"),
138 clEnumValN(InliningAdvisorMode::Development, "development",
139 "Use development mode (runtime-loadable model)"),
140 clEnumValN(InliningAdvisorMode::Release, "release",
141 "Use release mode (AOT-compiled model)")));
142
143 static cl::opt<bool> EnableSyntheticCounts(
144 "enable-npm-synthetic-counts", cl::Hidden,
145 cl::desc("Run synthetic function entry count generation "
146 "pass"));
147
148 /// Flag to enable inline deferral during PGO.
149 static cl::opt<bool>
150 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
151 cl::Hidden,
152 cl::desc("Enable inline deferral during PGO"));
153
154 static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::Hidden,
155 cl::desc("Enable memory profiler"));
156
157 static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
158 cl::init(false), cl::Hidden,
159 cl::desc("Enable module inliner"));
160
161 static cl::opt<bool> PerformMandatoryInliningsFirst(
162 "mandatory-inlining-first", cl::init(true), cl::Hidden,
163 cl::desc("Perform mandatory inlinings module-wide, before performing "
164 "inlining"));
165
166 static cl::opt<bool> EnableO3NonTrivialUnswitching(
167 "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
168 cl::desc("Enable non-trivial loop unswitching for -O3"));
169
170 static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
171 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
172 cl::desc("Eagerly invalidate more analyses in default pipelines"));
173
174 static cl::opt<bool> EnableNoRerunSimplificationPipeline(
175 "enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden,
176 cl::desc(
177 "Prevent running the simplification pipeline on a function more "
178 "than once in the case that SCC mutations cause a function to be "
179 "visited multiple times as long as the function has not been changed"));
180
181 static cl::opt<bool> EnableMergeFunctions(
182 "enable-merge-functions", cl::init(false), cl::Hidden,
183 cl::desc("Enable function merging as part of the optimization pipeline"));
184
185 static cl::opt<bool> EnablePostPGOLoopRotation(
186 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
187 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
188
189 static cl::opt<bool> EnableGlobalAnalyses(
190 "enable-global-analyses", cl::init(true), cl::Hidden,
191 cl::desc("Enable inter-procedural analyses"));
192
193 static cl::opt<bool>
194 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
195 cl::desc("Run Partial inlinining pass"));
196
197 static cl::opt<bool> ExtraVectorizerPasses(
198 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
199 cl::desc("Run cleanup optimization passes after vectorization"));
200
201 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
202 cl::desc("Run the NewGVN pass"));
203
204 static cl::opt<bool> EnableLoopInterchange(
205 "enable-loopinterchange", cl::init(false), cl::Hidden,
206 cl::desc("Enable the experimental LoopInterchange Pass"));
207
208 static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
209 cl::init(false), cl::Hidden,
210 cl::desc("Enable Unroll And Jam Pass"));
211
212 static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
213 cl::Hidden,
214 cl::desc("Enable the LoopFlatten Pass"));
215
216 static cl::opt<bool>
217 EnableDFAJumpThreading("enable-dfa-jump-thread",
218 cl::desc("Enable DFA jump threading"),
219 cl::init(false), cl::Hidden);
220
221 static cl::opt<bool>
222 EnableHotColdSplit("hot-cold-split",
223 cl::desc("Enable hot-cold splitting pass"));
224
225 static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
226 cl::Hidden,
227 cl::desc("Enable ir outliner pass"));
228
229 static cl::opt<bool>
230 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
231 cl::desc("Disable pre-instrumentation inliner"));
232
233 static cl::opt<int> PreInlineThreshold(
234 "preinline-threshold", cl::Hidden, cl::init(75),
235 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
236 "(default = 75)"));
237
238 static cl::opt<bool>
239 EnableGVNHoist("enable-gvn-hoist",
240 cl::desc("Enable the GVN hoisting pass (default = off)"));
241
242 static cl::opt<bool>
243 EnableGVNSink("enable-gvn-sink",
244 cl::desc("Enable the GVN sinking pass (default = off)"));
245
246 // This option is used in simplifying testing SampleFDO optimizations for
247 // profile loading.
248 static cl::opt<bool>
249 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
250 cl::desc("Enable control height reduction optimization (CHR)"));
251
252 static cl::opt<bool> FlattenedProfileUsed(
253 "flattened-profile-used", cl::init(false), cl::Hidden,
254 cl::desc("Indicate the sample profile being used is flattened, i.e., "
255 "no inline hierachy exists in the profile"));
256
257 static cl::opt<bool> EnableOrderFileInstrumentation(
258 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
259 cl::desc("Enable order file instrumentation (default = off)"));
260
261 static cl::opt<bool>
262 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
263 cl::desc("Enable lowering of the matrix intrinsics"));
264
265 static cl::opt<bool> EnableConstraintElimination(
266 "enable-constraint-elimination", cl::init(false), cl::Hidden,
267 cl::desc(
268 "Enable pass to eliminate conditions based on linear constraints"));
269
270 static cl::opt<AttributorRunOption> AttributorRun(
271 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
272 cl::desc("Enable the attributor inter-procedural deduction pass"),
273 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
274 "enable all attributor runs"),
275 clEnumValN(AttributorRunOption::MODULE, "module",
276 "enable module-wide attributor runs"),
277 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
278 "enable call graph SCC attributor runs"),
279 clEnumValN(AttributorRunOption::NONE, "none",
280 "disable attributor runs")));
281
PipelineTuningOptions()282 PipelineTuningOptions::PipelineTuningOptions() {
283 LoopInterleaving = true;
284 LoopVectorization = true;
285 SLPVectorization = false;
286 LoopUnrolling = true;
287 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
288 LicmMssaOptCap = SetLicmMssaOptCap;
289 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
290 CallGraphProfile = true;
291 MergeFunctions = EnableMergeFunctions;
292 InlinerThreshold = -1;
293 EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
294 }
295
296 namespace llvm {
297 extern cl::opt<unsigned> MaxDevirtIterations;
298 extern cl::opt<bool> EnableKnowledgeRetention;
299 } // namespace llvm
300
invokePeepholeEPCallbacks(FunctionPassManager & FPM,OptimizationLevel Level)301 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
302 OptimizationLevel Level) {
303 for (auto &C : PeepholeEPCallbacks)
304 C(FPM, Level);
305 }
306
307 // Helper to add AnnotationRemarksPass.
addAnnotationRemarksPass(ModulePassManager & MPM)308 static void addAnnotationRemarksPass(ModulePassManager &MPM) {
309 MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
310 }
311
312 // Helper to check if the current compilation phase is preparing for LTO
isLTOPreLink(ThinOrFullLTOPhase Phase)313 static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
314 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
315 Phase == ThinOrFullLTOPhase::FullLTOPreLink;
316 }
317
318 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
319 FunctionPassManager
buildO1FunctionSimplificationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)320 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
321 ThinOrFullLTOPhase Phase) {
322
323 FunctionPassManager FPM;
324
325 // Form SSA out of local memory accesses after breaking apart aggregates into
326 // scalars.
327 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
328
329 // Catch trivial redundancies
330 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
331
332 // Hoisting of scalars and load expressions.
333 FPM.addPass(
334 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
335 FPM.addPass(InstCombinePass());
336
337 FPM.addPass(LibCallsShrinkWrapPass());
338
339 invokePeepholeEPCallbacks(FPM, Level);
340
341 FPM.addPass(
342 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
343
344 // Form canonically associated expression trees, and simplify the trees using
345 // basic mathematical properties. For example, this will form (nearly)
346 // minimal multiplication trees.
347 FPM.addPass(ReassociatePass());
348
349 // Add the primary loop simplification pipeline.
350 // FIXME: Currently this is split into two loop pass pipelines because we run
351 // some function passes in between them. These can and should be removed
352 // and/or replaced by scheduling the loop pass equivalents in the correct
353 // positions. But those equivalent passes aren't powerful enough yet.
354 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
355 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
356 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
357 // `LoopInstSimplify`.
358 LoopPassManager LPM1, LPM2;
359
360 // Simplify the loop body. We do this initially to clean up after other loop
361 // passes run, either when iterating on a loop or on inner loops with
362 // implications on the outer loop.
363 LPM1.addPass(LoopInstSimplifyPass());
364 LPM1.addPass(LoopSimplifyCFGPass());
365
366 // Try to remove as much code from the loop header as possible,
367 // to reduce amount of IR that will have to be duplicated. However,
368 // do not perform speculative hoisting the first time as LICM
369 // will destroy metadata that may not need to be destroyed if run
370 // after loop rotation.
371 // TODO: Investigate promotion cap for O1.
372 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
373 /*AllowSpeculation=*/false));
374
375 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
376 isLTOPreLink(Phase)));
377 // TODO: Investigate promotion cap for O1.
378 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
379 /*AllowSpeculation=*/true));
380 LPM1.addPass(SimpleLoopUnswitchPass());
381 if (EnableLoopFlatten)
382 LPM1.addPass(LoopFlattenPass());
383
384 LPM2.addPass(LoopIdiomRecognizePass());
385 LPM2.addPass(IndVarSimplifyPass());
386
387 for (auto &C : LateLoopOptimizationsEPCallbacks)
388 C(LPM2, Level);
389
390 LPM2.addPass(LoopDeletionPass());
391
392 if (EnableLoopInterchange)
393 LPM2.addPass(LoopInterchangePass());
394
395 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
396 // because it changes IR to makes profile annotation in back compile
397 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
398 // attributes so we need to make sure and allow the full unroll pass to pay
399 // attention to it.
400 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
401 PGOOpt->Action != PGOOptions::SampleUse)
402 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
403 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
404 PTO.ForgetAllSCEVInLoopUnroll));
405
406 for (auto &C : LoopOptimizerEndEPCallbacks)
407 C(LPM2, Level);
408
409 // We provide the opt remark emitter pass for LICM to use. We only need to do
410 // this once as it is immutable.
411 FPM.addPass(
412 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
413 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
414 /*UseMemorySSA=*/true,
415 /*UseBlockFrequencyInfo=*/true));
416 FPM.addPass(
417 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
418 FPM.addPass(InstCombinePass());
419 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
420 // *All* loop passes must preserve it, in order to be able to use it.
421 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
422 /*UseMemorySSA=*/false,
423 /*UseBlockFrequencyInfo=*/false));
424
425 // Delete small array after loop unroll.
426 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
427
428 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
429 FPM.addPass(MemCpyOptPass());
430
431 // Sparse conditional constant propagation.
432 // FIXME: It isn't clear why we do this *after* loop passes rather than
433 // before...
434 FPM.addPass(SCCPPass());
435
436 // Delete dead bit computations (instcombine runs after to fold away the dead
437 // computations, and then ADCE will run later to exploit any new DCE
438 // opportunities that creates).
439 FPM.addPass(BDCEPass());
440
441 // Run instcombine after redundancy and dead bit elimination to exploit
442 // opportunities opened up by them.
443 FPM.addPass(InstCombinePass());
444 invokePeepholeEPCallbacks(FPM, Level);
445
446 FPM.addPass(CoroElidePass());
447
448 for (auto &C : ScalarOptimizerLateEPCallbacks)
449 C(FPM, Level);
450
451 // Finally, do an expensive DCE pass to catch all the dead code exposed by
452 // the simplifications and basic cleanup after all the simplifications.
453 // TODO: Investigate if this is too expensive.
454 FPM.addPass(ADCEPass());
455 FPM.addPass(
456 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
457 FPM.addPass(InstCombinePass());
458 invokePeepholeEPCallbacks(FPM, Level);
459
460 return FPM;
461 }
462
463 FunctionPassManager
buildFunctionSimplificationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)464 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
465 ThinOrFullLTOPhase Phase) {
466 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
467
468 // The O1 pipeline has a separate pipeline creation function to simplify
469 // construction readability.
470 if (Level.getSpeedupLevel() == 1)
471 return buildO1FunctionSimplificationPipeline(Level, Phase);
472
473 FunctionPassManager FPM;
474
475 // Form SSA out of local memory accesses after breaking apart aggregates into
476 // scalars.
477 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
478
479 // Catch trivial redundancies
480 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
481 if (EnableKnowledgeRetention)
482 FPM.addPass(AssumeSimplifyPass());
483
484 // Hoisting of scalars and load expressions.
485 if (EnableGVNHoist)
486 FPM.addPass(GVNHoistPass());
487
488 // Global value numbering based sinking.
489 if (EnableGVNSink) {
490 FPM.addPass(GVNSinkPass());
491 FPM.addPass(
492 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
493 }
494
495 // Speculative execution if the target has divergent branches; otherwise nop.
496 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
497
498 // Optimize based on known information about branches, and cleanup afterward.
499 FPM.addPass(JumpThreadingPass());
500 FPM.addPass(CorrelatedValuePropagationPass());
501
502 FPM.addPass(
503 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
504 FPM.addPass(InstCombinePass());
505 if (Level == OptimizationLevel::O3)
506 FPM.addPass(AggressiveInstCombinePass());
507
508 if (EnableConstraintElimination)
509 FPM.addPass(ConstraintEliminationPass());
510
511 if (!Level.isOptimizingForSize())
512 FPM.addPass(LibCallsShrinkWrapPass());
513
514 invokePeepholeEPCallbacks(FPM, Level);
515
516 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
517 // using the size value profile. Don't perform this when optimizing for size.
518 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
519 !Level.isOptimizingForSize())
520 FPM.addPass(PGOMemOPSizeOpt());
521
522 FPM.addPass(TailCallElimPass());
523 FPM.addPass(
524 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
525
526 // Form canonically associated expression trees, and simplify the trees using
527 // basic mathematical properties. For example, this will form (nearly)
528 // minimal multiplication trees.
529 FPM.addPass(ReassociatePass());
530
531 // Add the primary loop simplification pipeline.
532 // FIXME: Currently this is split into two loop pass pipelines because we run
533 // some function passes in between them. These can and should be removed
534 // and/or replaced by scheduling the loop pass equivalents in the correct
535 // positions. But those equivalent passes aren't powerful enough yet.
536 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
537 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
538 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
539 // `LoopInstSimplify`.
540 LoopPassManager LPM1, LPM2;
541
542 // Simplify the loop body. We do this initially to clean up after other loop
543 // passes run, either when iterating on a loop or on inner loops with
544 // implications on the outer loop.
545 LPM1.addPass(LoopInstSimplifyPass());
546 LPM1.addPass(LoopSimplifyCFGPass());
547
548 // Try to remove as much code from the loop header as possible,
549 // to reduce amount of IR that will have to be duplicated. However,
550 // do not perform speculative hoisting the first time as LICM
551 // will destroy metadata that may not need to be destroyed if run
552 // after loop rotation.
553 // TODO: Investigate promotion cap for O1.
554 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
555 /*AllowSpeculation=*/false));
556
557 // Disable header duplication in loop rotation at -Oz.
558 LPM1.addPass(
559 LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
560 // TODO: Investigate promotion cap for O1.
561 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
562 /*AllowSpeculation=*/true));
563 LPM1.addPass(
564 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
565 EnableO3NonTrivialUnswitching));
566 if (EnableLoopFlatten)
567 LPM1.addPass(LoopFlattenPass());
568
569 LPM2.addPass(LoopIdiomRecognizePass());
570 LPM2.addPass(IndVarSimplifyPass());
571
572 for (auto &C : LateLoopOptimizationsEPCallbacks)
573 C(LPM2, Level);
574
575 LPM2.addPass(LoopDeletionPass());
576
577 if (EnableLoopInterchange)
578 LPM2.addPass(LoopInterchangePass());
579
580 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
581 // because it changes IR to makes profile annotation in back compile
582 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
583 // attributes so we need to make sure and allow the full unroll pass to pay
584 // attention to it.
585 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
586 PGOOpt->Action != PGOOptions::SampleUse)
587 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
588 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
589 PTO.ForgetAllSCEVInLoopUnroll));
590
591 for (auto &C : LoopOptimizerEndEPCallbacks)
592 C(LPM2, Level);
593
594 // We provide the opt remark emitter pass for LICM to use. We only need to do
595 // this once as it is immutable.
596 FPM.addPass(
597 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
598 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
599 /*UseMemorySSA=*/true,
600 /*UseBlockFrequencyInfo=*/true));
601 FPM.addPass(
602 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
603 FPM.addPass(InstCombinePass());
604 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
605 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
606 // *All* loop passes must preserve it, in order to be able to use it.
607 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
608 /*UseMemorySSA=*/false,
609 /*UseBlockFrequencyInfo=*/false));
610
611 // Delete small array after loop unroll.
612 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
613
614 // Try vectorization/scalarization transforms that are both improvements
615 // themselves and can allow further folds with GVN and InstCombine.
616 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
617
618 // Eliminate redundancies.
619 FPM.addPass(MergedLoadStoreMotionPass());
620 if (RunNewGVN)
621 FPM.addPass(NewGVNPass());
622 else
623 FPM.addPass(GVNPass());
624
625 // Sparse conditional constant propagation.
626 // FIXME: It isn't clear why we do this *after* loop passes rather than
627 // before...
628 FPM.addPass(SCCPPass());
629
630 // Delete dead bit computations (instcombine runs after to fold away the dead
631 // computations, and then ADCE will run later to exploit any new DCE
632 // opportunities that creates).
633 FPM.addPass(BDCEPass());
634
635 // Run instcombine after redundancy and dead bit elimination to exploit
636 // opportunities opened up by them.
637 FPM.addPass(InstCombinePass());
638 invokePeepholeEPCallbacks(FPM, Level);
639
640 // Re-consider control flow based optimizations after redundancy elimination,
641 // redo DCE, etc.
642 if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
643 FPM.addPass(DFAJumpThreadingPass());
644
645 FPM.addPass(JumpThreadingPass());
646 FPM.addPass(CorrelatedValuePropagationPass());
647
648 // Finally, do an expensive DCE pass to catch all the dead code exposed by
649 // the simplifications and basic cleanup after all the simplifications.
650 // TODO: Investigate if this is too expensive.
651 FPM.addPass(ADCEPass());
652
653 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
654 FPM.addPass(MemCpyOptPass());
655
656 FPM.addPass(DSEPass());
657 FPM.addPass(createFunctionToLoopPassAdaptor(
658 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
659 /*AllowSpeculation=*/true),
660 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
661
662 FPM.addPass(CoroElidePass());
663
664 for (auto &C : ScalarOptimizerLateEPCallbacks)
665 C(FPM, Level);
666
667 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
668 .convertSwitchRangeToICmp(true)
669 .hoistCommonInsts(true)
670 .sinkCommonInsts(true)));
671 FPM.addPass(InstCombinePass());
672 invokePeepholeEPCallbacks(FPM, Level);
673
674 // Don't add CHR pass for CSIRInstr build in PostLink as the profile
675 // is still the same as the PreLink compilation.
676 if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
677 ((PGOOpt->Action == PGOOptions::IRUse &&
678 (Phase != ThinOrFullLTOPhase::ThinLTOPostLink ||
679 PGOOpt->CSAction != PGOOptions::CSIRInstr)) ||
680 PGOOpt->Action == PGOOptions::SampleUse))
681 FPM.addPass(ControlHeightReductionPass());
682
683 return FPM;
684 }
685
addRequiredLTOPreLinkPasses(ModulePassManager & MPM)686 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
687 MPM.addPass(CanonicalizeAliasesPass());
688 MPM.addPass(NameAnonGlobalPass());
689 }
690
addPGOInstrPasses(ModulePassManager & MPM,OptimizationLevel Level,bool RunProfileGen,bool IsCS,std::string ProfileFile,std::string ProfileRemappingFile,ThinOrFullLTOPhase LTOPhase)691 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
692 OptimizationLevel Level, bool RunProfileGen,
693 bool IsCS, std::string ProfileFile,
694 std::string ProfileRemappingFile,
695 ThinOrFullLTOPhase LTOPhase) {
696 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
697 if (!IsCS && !DisablePreInliner) {
698 InlineParams IP;
699
700 IP.DefaultThreshold = PreInlineThreshold;
701
702 // FIXME: The hint threshold has the same value used by the regular inliner
703 // when not optimzing for size. This should probably be lowered after
704 // performance testing.
705 // FIXME: this comment is cargo culted from the old pass manager, revisit).
706 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
707 ModuleInlinerWrapperPass MIWP(
708 IP, /* MandatoryFirst */ true,
709 InlineContext{LTOPhase, InlinePass::EarlyInliner});
710 CGSCCPassManager &CGPipeline = MIWP.getPM();
711
712 FunctionPassManager FPM;
713 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
714 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
715 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
716 true))); // Merge & remove basic blocks.
717 FPM.addPass(InstCombinePass()); // Combine silly sequences.
718 invokePeepholeEPCallbacks(FPM, Level);
719
720 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
721 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
722
723 MPM.addPass(std::move(MIWP));
724
725 // Delete anything that is now dead to make sure that we don't instrument
726 // dead code. Instrumentation can end up keeping dead code around and
727 // dramatically increase code size.
728 MPM.addPass(GlobalDCEPass());
729 }
730
731 if (!RunProfileGen) {
732 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
733 MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
734 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
735 // RequireAnalysisPass for PSI before subsequent non-module passes.
736 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
737 return;
738 }
739
740 // Perform PGO instrumentation.
741 MPM.addPass(PGOInstrumentationGen(IsCS));
742
743 if (EnablePostPGOLoopRotation) {
744 // Disable header duplication in loop rotation at -Oz.
745 MPM.addPass(createModuleToFunctionPassAdaptor(
746 createFunctionToLoopPassAdaptor(
747 LoopRotatePass(Level != OptimizationLevel::Oz),
748 /*UseMemorySSA=*/false,
749 /*UseBlockFrequencyInfo=*/false),
750 PTO.EagerlyInvalidateAnalyses));
751 }
752
753 // Add the profile lowering pass.
754 InstrProfOptions Options;
755 if (!ProfileFile.empty())
756 Options.InstrProfileOutput = ProfileFile;
757 // Do counter promotion at Level greater than O0.
758 Options.DoCounterPromotion = true;
759 Options.UseBFIInPromotion = IsCS;
760 MPM.addPass(InstrProfiling(Options, IsCS));
761 }
762
addPGOInstrPassesForO0(ModulePassManager & MPM,bool RunProfileGen,bool IsCS,std::string ProfileFile,std::string ProfileRemappingFile)763 void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
764 bool RunProfileGen, bool IsCS,
765 std::string ProfileFile,
766 std::string ProfileRemappingFile) {
767 if (!RunProfileGen) {
768 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
769 MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
770 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
771 // RequireAnalysisPass for PSI before subsequent non-module passes.
772 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
773 return;
774 }
775
776 // Perform PGO instrumentation.
777 MPM.addPass(PGOInstrumentationGen(IsCS));
778 // Add the profile lowering pass.
779 InstrProfOptions Options;
780 if (!ProfileFile.empty())
781 Options.InstrProfileOutput = ProfileFile;
782 // Do not do counter promotion at O0.
783 Options.DoCounterPromotion = false;
784 Options.UseBFIInPromotion = IsCS;
785 MPM.addPass(InstrProfiling(Options, IsCS));
786 }
787
getInlineParamsFromOptLevel(OptimizationLevel Level)788 static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
789 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
790 }
791
792 ModuleInlinerWrapperPass
buildInlinerPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)793 PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
794 ThinOrFullLTOPhase Phase) {
795 InlineParams IP;
796 if (PTO.InlinerThreshold == -1)
797 IP = getInlineParamsFromOptLevel(Level);
798 else
799 IP = getInlineParams(PTO.InlinerThreshold);
800 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
801 // disable hot callsite inline (as much as possible [1]) because it makes
802 // profile annotation in the backend inaccurate.
803 //
804 // [1] Note the cost of a function could be below zero due to erased
805 // prologue / epilogue.
806 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
807 PGOOpt->Action == PGOOptions::SampleUse)
808 IP.HotCallSiteThreshold = 0;
809
810 if (PGOOpt)
811 IP.EnableDeferral = EnablePGOInlineDeferral;
812
813 ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
814 InlineContext{Phase, InlinePass::CGSCCInliner},
815 UseInlineAdvisor, MaxDevirtIterations);
816
817 // Require the GlobalsAA analysis for the module so we can query it within
818 // the CGSCC pipeline.
819 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
820 // Invalidate AAManager so it can be recreated and pick up the newly available
821 // GlobalsAA.
822 MIWP.addModulePass(
823 createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
824
825 // Require the ProfileSummaryAnalysis for the module so we can query it within
826 // the inliner pass.
827 MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
828
829 // Now begin the main postorder CGSCC pipeline.
830 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
831 // manager and trying to emulate its precise behavior. Much of this doesn't
832 // make a lot of sense and we should revisit the core CGSCC structure.
833 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
834
835 // Note: historically, the PruneEH pass was run first to deduce nounwind and
836 // generally clean up exception handling overhead. It isn't clear this is
837 // valuable as the inliner doesn't currently care whether it is inlining an
838 // invoke or a call.
839
840 if (AttributorRun & AttributorRunOption::CGSCC)
841 MainCGPipeline.addPass(AttributorCGSCCPass());
842
843 // Now deduce any function attributes based in the current code.
844 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
845
846 // When at O3 add argument promotion to the pass pipeline.
847 // FIXME: It isn't at all clear why this should be limited to O3.
848 if (Level == OptimizationLevel::O3)
849 MainCGPipeline.addPass(ArgumentPromotionPass());
850
851 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
852 // there are no OpenMP runtime calls present in the module.
853 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
854 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
855
856 for (auto &C : CGSCCOptimizerLateEPCallbacks)
857 C(MainCGPipeline, Level);
858
859 // Lastly, add the core function simplification pipeline nested inside the
860 // CGSCC walk.
861 MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
862 buildFunctionSimplificationPipeline(Level, Phase),
863 PTO.EagerlyInvalidateAnalyses, EnableNoRerunSimplificationPipeline));
864
865 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
866
867 if (EnableNoRerunSimplificationPipeline)
868 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
869 InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
870
871 return MIWP;
872 }
873
874 ModulePassManager
buildModuleInlinerPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)875 PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
876 ThinOrFullLTOPhase Phase) {
877 ModulePassManager MPM;
878
879 InlineParams IP = getInlineParamsFromOptLevel(Level);
880 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
881 // disable hot callsite inline (as much as possible [1]) because it makes
882 // profile annotation in the backend inaccurate.
883 //
884 // [1] Note the cost of a function could be below zero due to erased
885 // prologue / epilogue.
886 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
887 PGOOpt->Action == PGOOptions::SampleUse)
888 IP.HotCallSiteThreshold = 0;
889
890 if (PGOOpt)
891 IP.EnableDeferral = EnablePGOInlineDeferral;
892
893 // The inline deferral logic is used to avoid losing some
894 // inlining chance in future. It is helpful in SCC inliner, in which
895 // inlining is processed in bottom-up order.
896 // While in module inliner, the inlining order is a priority-based order
897 // by default. The inline deferral is unnecessary there. So we disable the
898 // inline deferral logic in module inliner.
899 IP.EnableDeferral = false;
900
901 MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
902
903 MPM.addPass(createModuleToFunctionPassAdaptor(
904 buildFunctionSimplificationPipeline(Level, Phase),
905 PTO.EagerlyInvalidateAnalyses));
906
907 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
908 CoroSplitPass(Level != OptimizationLevel::O0)));
909
910 return MPM;
911 }
912
913 ModulePassManager
buildModuleSimplificationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)914 PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
915 ThinOrFullLTOPhase Phase) {
916 ModulePassManager MPM;
917
918 // Place pseudo probe instrumentation as the first pass of the pipeline to
919 // minimize the impact of optimization changes.
920 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
921 Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
922 MPM.addPass(SampleProfileProbePass(TM));
923
924 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
925
926 // In ThinLTO mode, when flattened profile is used, all the available
927 // profile information will be annotated in PreLink phase so there is
928 // no need to load the profile again in PostLink.
929 bool LoadSampleProfile =
930 HasSampleProfile &&
931 !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
932
933 // During the ThinLTO backend phase we perform early indirect call promotion
934 // here, before globalopt. Otherwise imported available_externally functions
935 // look unreferenced and are removed. If we are going to load the sample
936 // profile then defer until later.
937 // TODO: See if we can move later and consolidate with the location where
938 // we perform ICP when we are loading a sample profile.
939 // TODO: We pass HasSampleProfile (whether there was a sample profile file
940 // passed to the compile) to the SamplePGO flag of ICP. This is used to
941 // determine whether the new direct calls are annotated with prof metadata.
942 // Ideally this should be determined from whether the IR is annotated with
943 // sample profile, and not whether the a sample profile was provided on the
944 // command line. E.g. for flattened profiles where we will not be reloading
945 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
946 // provide the sample profile file.
947 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
948 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
949
950 // Do basic inference of function attributes from known properties of system
951 // libraries and other oracles.
952 MPM.addPass(InferFunctionAttrsPass());
953 MPM.addPass(CoroEarlyPass());
954
955 // Create an early function pass manager to cleanup the output of the
956 // frontend.
957 FunctionPassManager EarlyFPM;
958 // Lower llvm.expect to metadata before attempting transforms.
959 // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
960 EarlyFPM.addPass(LowerExpectIntrinsicPass());
961 EarlyFPM.addPass(SimplifyCFGPass());
962 EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
963 EarlyFPM.addPass(EarlyCSEPass());
964 if (Level == OptimizationLevel::O3)
965 EarlyFPM.addPass(CallSiteSplittingPass());
966
967 // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
968 // to convert bitcast to direct calls so that they can be inlined during the
969 // profile annotation prepration step.
970 // More details about SamplePGO design can be found in:
971 // https://research.google.com/pubs/pub45290.html
972 // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
973 if (LoadSampleProfile)
974 EarlyFPM.addPass(InstCombinePass());
975 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM),
976 PTO.EagerlyInvalidateAnalyses));
977
978 if (LoadSampleProfile) {
979 // Annotate sample profile right after early FPM to ensure freshness of
980 // the debug info.
981 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
982 PGOOpt->ProfileRemappingFile, Phase));
983 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
984 // RequireAnalysisPass for PSI before subsequent non-module passes.
985 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
986 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
987 // for the profile annotation to be accurate in the LTO backend.
988 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
989 Phase != ThinOrFullLTOPhase::FullLTOPreLink)
990 // We perform early indirect call promotion here, before globalopt.
991 // This is important for the ThinLTO backend phase because otherwise
992 // imported available_externally functions look unreferenced and are
993 // removed.
994 MPM.addPass(
995 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
996 }
997
998 // Try to perform OpenMP specific optimizations on the module. This is a
999 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1000 if (Level != OptimizationLevel::O0)
1001 MPM.addPass(OpenMPOptPass());
1002
1003 if (AttributorRun & AttributorRunOption::MODULE)
1004 MPM.addPass(AttributorPass());
1005
1006 // Lower type metadata and the type.test intrinsic in the ThinLTO
1007 // post link pipeline after ICP. This is to enable usage of the type
1008 // tests in ICP sequences.
1009 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1010 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1011
1012 for (auto &C : PipelineEarlySimplificationEPCallbacks)
1013 C(MPM, Level);
1014
1015 // Interprocedural constant propagation now that basic cleanup has occurred
1016 // and prior to optimizing globals.
1017 // FIXME: This position in the pipeline hasn't been carefully considered in
1018 // years, it should be re-analyzed.
1019 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1020 Level != OptimizationLevel::Os &&
1021 Level != OptimizationLevel::Oz)));
1022
1023 // Attach metadata to indirect call sites indicating the set of functions
1024 // they may target at run-time. This should follow IPSCCP.
1025 MPM.addPass(CalledValuePropagationPass());
1026
1027 // Optimize globals to try and fold them into constants.
1028 MPM.addPass(GlobalOptPass());
1029
1030 // Promote any localized globals to SSA registers.
1031 // FIXME: Should this instead by a run of SROA?
1032 // FIXME: We should probably run instcombine and simplifycfg afterward to
1033 // delete control flows that are dead once globals have been folded to
1034 // constants.
1035 MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1036
1037 // Create a small function pass pipeline to cleanup after all the global
1038 // optimizations.
1039 FunctionPassManager GlobalCleanupPM;
1040 GlobalCleanupPM.addPass(InstCombinePass());
1041 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1042
1043 GlobalCleanupPM.addPass(
1044 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1045 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1046 PTO.EagerlyInvalidateAnalyses));
1047
1048 // Add all the requested passes for instrumentation PGO, if requested.
1049 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1050 (PGOOpt->Action == PGOOptions::IRInstr ||
1051 PGOOpt->Action == PGOOptions::IRUse)) {
1052 addPGOInstrPasses(MPM, Level,
1053 /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
1054 /* IsCS */ false, PGOOpt->ProfileFile,
1055 PGOOpt->ProfileRemappingFile, Phase);
1056 MPM.addPass(PGOIndirectCallPromotion(false, false));
1057 }
1058 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1059 PGOOpt->CSAction == PGOOptions::CSIRInstr)
1060 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1061
1062 // Synthesize function entry counts for non-PGO compilation.
1063 if (EnableSyntheticCounts && !PGOOpt)
1064 MPM.addPass(SyntheticCountsPropagation());
1065
1066 if (EnableModuleInliner)
1067 MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
1068 else
1069 MPM.addPass(buildInlinerPipeline(Level, Phase));
1070
1071 // Remove any dead arguments exposed by cleanups, constant folding globals,
1072 // and argument promotion.
1073 MPM.addPass(DeadArgumentEliminationPass());
1074
1075 MPM.addPass(CoroCleanupPass());
1076
1077 if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1078 MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
1079 MPM.addPass(ModuleMemProfilerPass());
1080 }
1081
1082 return MPM;
1083 }
1084
1085 /// TODO: Should LTO cause any differences to this set of passes?
addVectorPasses(OptimizationLevel Level,FunctionPassManager & FPM,bool IsFullLTO)1086 void PassBuilder::addVectorPasses(OptimizationLevel Level,
1087 FunctionPassManager &FPM, bool IsFullLTO) {
1088 FPM.addPass(LoopVectorizePass(
1089 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1090
1091 if (IsFullLTO) {
1092 // The vectorizer may have significantly shortened a loop body; unroll
1093 // again. Unroll small loops to hide loop backedge latency and saturate any
1094 // parallel execution resources of an out-of-order processor. We also then
1095 // need to clean up redundancies and loop invariant code.
1096 // FIXME: It would be really good to use a loop-integrated instruction
1097 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1098 // across the loop nests.
1099 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1100 if (EnableUnrollAndJam && PTO.LoopUnrolling)
1101 FPM.addPass(createFunctionToLoopPassAdaptor(
1102 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1103 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1104 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1105 PTO.ForgetAllSCEVInLoopUnroll)));
1106 FPM.addPass(WarnMissedTransformationsPass());
1107 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1108 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1109 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1110 // NOTE: we are very late in the pipeline, and we don't have any LICM
1111 // or SimplifyCFG passes scheduled after us, that would cleanup
1112 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1113 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1114 }
1115
1116 if (!IsFullLTO) {
1117 // Eliminate loads by forwarding stores from the previous iteration to loads
1118 // of the current iteration.
1119 FPM.addPass(LoopLoadEliminationPass());
1120 }
1121 // Cleanup after the loop optimization passes.
1122 FPM.addPass(InstCombinePass());
1123
1124 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1125 ExtraVectorPassManager ExtraPasses;
1126 // At higher optimization levels, try to clean up any runtime overlap and
1127 // alignment checks inserted by the vectorizer. We want to track correlated
1128 // runtime checks for two inner loops in the same outer loop, fold any
1129 // common computations, hoist loop-invariant aspects out of any outer loop,
1130 // and unswitch the runtime checks if possible. Once hoisted, we may have
1131 // dead (or speculatable) control flows or more combining opportunities.
1132 ExtraPasses.addPass(EarlyCSEPass());
1133 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1134 ExtraPasses.addPass(InstCombinePass());
1135 LoopPassManager LPM;
1136 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1137 /*AllowSpeculation=*/true));
1138 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1139 OptimizationLevel::O3));
1140 ExtraPasses.addPass(
1141 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
1142 ExtraPasses.addPass(
1143 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1144 /*UseBlockFrequencyInfo=*/true));
1145 ExtraPasses.addPass(
1146 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1147 ExtraPasses.addPass(InstCombinePass());
1148 FPM.addPass(std::move(ExtraPasses));
1149 }
1150
1151 // Now that we've formed fast to execute loop structures, we do further
1152 // optimizations. These are run afterward as they might block doing complex
1153 // analyses and transforms such as what are needed for loop vectorization.
1154
1155 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1156 // GVN, loop transforms, and others have already run, so it's now better to
1157 // convert to more optimized IR using more aggressive simplify CFG options.
1158 // The extra sinking transform can create larger basic blocks, so do this
1159 // before SLP vectorization.
1160 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1161 .forwardSwitchCondToPhi(true)
1162 .convertSwitchRangeToICmp(true)
1163 .convertSwitchToLookupTable(true)
1164 .needCanonicalLoops(false)
1165 .hoistCommonInsts(true)
1166 .sinkCommonInsts(true)));
1167
1168 if (IsFullLTO) {
1169 FPM.addPass(SCCPPass());
1170 FPM.addPass(InstCombinePass());
1171 FPM.addPass(BDCEPass());
1172 }
1173
1174 // Optimize parallel scalar instruction chains into SIMD instructions.
1175 if (PTO.SLPVectorization) {
1176 FPM.addPass(SLPVectorizerPass());
1177 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1178 FPM.addPass(EarlyCSEPass());
1179 }
1180 }
1181 // Enhance/cleanup vector code.
1182 FPM.addPass(VectorCombinePass());
1183
1184 if (!IsFullLTO) {
1185 FPM.addPass(InstCombinePass());
1186 // Unroll small loops to hide loop backedge latency and saturate any
1187 // parallel execution resources of an out-of-order processor. We also then
1188 // need to clean up redundancies and loop invariant code.
1189 // FIXME: It would be really good to use a loop-integrated instruction
1190 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1191 // across the loop nests.
1192 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1193 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1194 FPM.addPass(createFunctionToLoopPassAdaptor(
1195 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1196 }
1197 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1198 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1199 PTO.ForgetAllSCEVInLoopUnroll)));
1200 FPM.addPass(WarnMissedTransformationsPass());
1201 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1202 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1203 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1204 // NOTE: we are very late in the pipeline, and we don't have any LICM
1205 // or SimplifyCFG passes scheduled after us, that would cleanup
1206 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1207 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1208 FPM.addPass(InstCombinePass());
1209 FPM.addPass(
1210 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
1211 FPM.addPass(createFunctionToLoopPassAdaptor(
1212 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1213 /*AllowSpeculation=*/true),
1214 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1215 }
1216
1217 // Now that we've vectorized and unrolled loops, we may have more refined
1218 // alignment information, try to re-derive it here.
1219 FPM.addPass(AlignmentFromAssumptionsPass());
1220
1221 if (IsFullLTO)
1222 FPM.addPass(InstCombinePass());
1223 }
1224
1225 ModulePassManager
buildModuleOptimizationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase LTOPhase)1226 PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1227 ThinOrFullLTOPhase LTOPhase) {
1228 const bool LTOPreLink = (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink ||
1229 LTOPhase == ThinOrFullLTOPhase::FullLTOPreLink);
1230 ModulePassManager MPM;
1231
1232 // Optimize globals now that the module is fully simplified.
1233 MPM.addPass(GlobalOptPass());
1234 MPM.addPass(GlobalDCEPass());
1235
1236 // Run partial inlining pass to partially inline functions that have
1237 // large bodies.
1238 if (RunPartialInlining)
1239 MPM.addPass(PartialInlinerPass());
1240
1241 // Remove avail extern fns and globals definitions since we aren't compiling
1242 // an object file for later LTO. For LTO we want to preserve these so they
1243 // are eligible for inlining at link-time. Note if they are unreferenced they
1244 // will be removed by GlobalDCE later, so this only impacts referenced
1245 // available externally globals. Eventually they will be suppressed during
1246 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1247 // may make globals referenced by available external functions dead and saves
1248 // running remaining passes on the eliminated functions. These should be
1249 // preserved during prelinking for link-time inlining decisions.
1250 if (!LTOPreLink)
1251 MPM.addPass(EliminateAvailableExternallyPass());
1252
1253 if (EnableOrderFileInstrumentation)
1254 MPM.addPass(InstrOrderFilePass());
1255
1256 // Do RPO function attribute inference across the module to forward-propagate
1257 // attributes where applicable.
1258 // FIXME: Is this really an optimization rather than a canonicalization?
1259 MPM.addPass(ReversePostOrderFunctionAttrsPass());
1260
1261 // Do a post inline PGO instrumentation and use pass. This is a context
1262 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1263 // cross-module inline has not been done yet. The context sensitive
1264 // instrumentation is after all the inlines are done.
1265 if (!LTOPreLink && PGOOpt) {
1266 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1267 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1268 /* IsCS */ true, PGOOpt->CSProfileGenFile,
1269 PGOOpt->ProfileRemappingFile, LTOPhase);
1270 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1271 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1272 /* IsCS */ true, PGOOpt->ProfileFile,
1273 PGOOpt->ProfileRemappingFile, LTOPhase);
1274 }
1275
1276 // Re-compute GlobalsAA here prior to function passes. This is particularly
1277 // useful as the above will have inlined, DCE'ed, and function-attr
1278 // propagated everything. We should at this point have a reasonably minimal
1279 // and richly annotated call graph. By computing aliasing and mod/ref
1280 // information for all local globals here, the late loop passes and notably
1281 // the vectorizer will be able to use them to help recognize vectorizable
1282 // memory operations.
1283 MPM.addPass(RecomputeGlobalsAAPass());
1284
1285 for (auto &C : OptimizerEarlyEPCallbacks)
1286 C(MPM, Level);
1287
1288 FunctionPassManager OptimizePM;
1289 OptimizePM.addPass(Float2IntPass());
1290 OptimizePM.addPass(LowerConstantIntrinsicsPass());
1291
1292 if (EnableMatrix) {
1293 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1294 OptimizePM.addPass(EarlyCSEPass());
1295 }
1296
1297 // FIXME: We need to run some loop optimizations to re-rotate loops after
1298 // simplifycfg and others undo their rotation.
1299
1300 // Optimize the loop execution. These passes operate on entire loop nests
1301 // rather than on each loop in an inside-out manner, and so they are actually
1302 // function passes.
1303
1304 for (auto &C : VectorizerStartEPCallbacks)
1305 C(OptimizePM, Level);
1306
1307 LoopPassManager LPM;
1308 // First rotate loops that may have been un-rotated by prior passes.
1309 // Disable header duplication at -Oz.
1310 LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
1311 // Some loops may have become dead by now. Try to delete them.
1312 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1313 // this may need to be revisited once we run GVN before loop deletion
1314 // in the simplification pipeline.
1315 LPM.addPass(LoopDeletionPass());
1316 OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1317 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1318
1319 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1320 // into separate loop that would otherwise inhibit vectorization. This is
1321 // currently only performed for loops marked with the metadata
1322 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1323 OptimizePM.addPass(LoopDistributePass());
1324
1325 // Populates the VFABI attribute with the scalar-to-vector mappings
1326 // from the TargetLibraryInfo.
1327 OptimizePM.addPass(InjectTLIMappings());
1328
1329 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1330
1331 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1332 // canonicalization pass that enables other optimizations. As a result,
1333 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1334 // result too early.
1335 OptimizePM.addPass(LoopSinkPass());
1336
1337 // And finally clean up LCSSA form before generating code.
1338 OptimizePM.addPass(InstSimplifyPass());
1339
1340 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1341 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1342 // flattening of blocks.
1343 OptimizePM.addPass(DivRemPairsPass());
1344
1345 // Try to annotate calls that were created during optimization.
1346 OptimizePM.addPass(TailCallElimPass());
1347
1348 // LoopSink (and other loop passes since the last simplifyCFG) might have
1349 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1350 OptimizePM.addPass(
1351 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1352
1353 // Add the core optimizing pipeline.
1354 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1355 PTO.EagerlyInvalidateAnalyses));
1356
1357 for (auto &C : OptimizerLastEPCallbacks)
1358 C(MPM, Level);
1359
1360 // Split out cold code. Splitting is done late to avoid hiding context from
1361 // other optimizations and inadvertently regressing performance. The tradeoff
1362 // is that this has a higher code size cost than splitting early.
1363 if (EnableHotColdSplit && !LTOPreLink)
1364 MPM.addPass(HotColdSplittingPass());
1365
1366 // Search the code for similar regions of code. If enough similar regions can
1367 // be found where extracting the regions into their own function will decrease
1368 // the size of the program, we extract the regions, a deduplicate the
1369 // structurally similar regions.
1370 if (EnableIROutliner)
1371 MPM.addPass(IROutlinerPass());
1372
1373 // Merge functions if requested.
1374 if (PTO.MergeFunctions)
1375 MPM.addPass(MergeFunctionsPass());
1376
1377 // Now we need to do some global optimization transforms.
1378 // FIXME: It would seem like these should come first in the optimization
1379 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1380 // ordering here.
1381 MPM.addPass(GlobalDCEPass());
1382 MPM.addPass(ConstantMergePass());
1383
1384 if (PTO.CallGraphProfile && !LTOPreLink)
1385 MPM.addPass(CGProfilePass());
1386
1387 // TODO: Relative look table converter pass caused an issue when full lto is
1388 // enabled. See https://reviews.llvm.org/D94355 for more details.
1389 // Until the issue fixed, disable this pass during pre-linking phase.
1390 if (!LTOPreLink)
1391 MPM.addPass(RelLookupTableConverterPass());
1392
1393 return MPM;
1394 }
1395
1396 ModulePassManager
buildPerModuleDefaultPipeline(OptimizationLevel Level,bool LTOPreLink)1397 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1398 bool LTOPreLink) {
1399 assert(Level != OptimizationLevel::O0 &&
1400 "Must request optimizations for the default pipeline!");
1401
1402 ModulePassManager MPM;
1403
1404 // Convert @llvm.global.annotations to !annotation metadata.
1405 MPM.addPass(Annotation2MetadataPass());
1406
1407 // Force any function attributes we want the rest of the pipeline to observe.
1408 MPM.addPass(ForceFunctionAttrsPass());
1409
1410 // Apply module pipeline start EP callback.
1411 for (auto &C : PipelineStartEPCallbacks)
1412 C(MPM, Level);
1413
1414 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1415 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1416
1417 const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1418 ? ThinOrFullLTOPhase::FullLTOPreLink
1419 : ThinOrFullLTOPhase::None;
1420 // Add the core simplification pipeline.
1421 MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase));
1422
1423 // Now add the optimization pipeline.
1424 MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase));
1425
1426 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1427 PGOOpt->Action == PGOOptions::SampleUse)
1428 MPM.addPass(PseudoProbeUpdatePass());
1429
1430 // Emit annotation remarks.
1431 addAnnotationRemarksPass(MPM);
1432
1433 if (LTOPreLink)
1434 addRequiredLTOPreLinkPasses(MPM);
1435
1436 return MPM;
1437 }
1438
1439 ModulePassManager
buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)1440 PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1441 assert(Level != OptimizationLevel::O0 &&
1442 "Must request optimizations for the default pipeline!");
1443
1444 ModulePassManager MPM;
1445
1446 // Convert @llvm.global.annotations to !annotation metadata.
1447 MPM.addPass(Annotation2MetadataPass());
1448
1449 // Force any function attributes we want the rest of the pipeline to observe.
1450 MPM.addPass(ForceFunctionAttrsPass());
1451
1452 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1453 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1454
1455 // Apply module pipeline start EP callback.
1456 for (auto &C : PipelineStartEPCallbacks)
1457 C(MPM, Level);
1458
1459 // If we are planning to perform ThinLTO later, we don't bloat the code with
1460 // unrolling/vectorization/... now. Just simplify the module as much as we
1461 // can.
1462 MPM.addPass(buildModuleSimplificationPipeline(
1463 Level, ThinOrFullLTOPhase::ThinLTOPreLink));
1464
1465 // Run partial inlining pass to partially inline functions that have
1466 // large bodies.
1467 // FIXME: It isn't clear whether this is really the right place to run this
1468 // in ThinLTO. Because there is another canonicalization and simplification
1469 // phase that will run after the thin link, running this here ends up with
1470 // less information than will be available later and it may grow functions in
1471 // ways that aren't beneficial.
1472 if (RunPartialInlining)
1473 MPM.addPass(PartialInlinerPass());
1474
1475 // Reduce the size of the IR as much as possible.
1476 MPM.addPass(GlobalOptPass());
1477
1478 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1479 PGOOpt->Action == PGOOptions::SampleUse)
1480 MPM.addPass(PseudoProbeUpdatePass());
1481
1482 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1483 // optimization is going to be done in PostLink stage, but clang can't add
1484 // callbacks there in case of in-process ThinLTO called by linker.
1485 for (auto &C : OptimizerEarlyEPCallbacks)
1486 C(MPM, Level);
1487 for (auto &C : OptimizerLastEPCallbacks)
1488 C(MPM, Level);
1489
1490 // Emit annotation remarks.
1491 addAnnotationRemarksPass(MPM);
1492
1493 addRequiredLTOPreLinkPasses(MPM);
1494
1495 return MPM;
1496 }
1497
buildThinLTODefaultPipeline(OptimizationLevel Level,const ModuleSummaryIndex * ImportSummary)1498 ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1499 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1500 ModulePassManager MPM;
1501
1502 // Convert @llvm.global.annotations to !annotation metadata.
1503 MPM.addPass(Annotation2MetadataPass());
1504
1505 if (ImportSummary) {
1506 // These passes import type identifier resolutions for whole-program
1507 // devirtualization and CFI. They must run early because other passes may
1508 // disturb the specific instruction patterns that these passes look for,
1509 // creating dependencies on resolutions that may not appear in the summary.
1510 //
1511 // For example, GVN may transform the pattern assume(type.test) appearing in
1512 // two basic blocks into assume(phi(type.test, type.test)), which would
1513 // transform a dependency on a WPD resolution into a dependency on a type
1514 // identifier resolution for CFI.
1515 //
1516 // Also, WPD has access to more precise information than ICP and can
1517 // devirtualize more effectively, so it should operate on the IR first.
1518 //
1519 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1520 // metadata and intrinsics.
1521 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1522 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1523 }
1524
1525 if (Level == OptimizationLevel::O0) {
1526 // Run a second time to clean up any type tests left behind by WPD for use
1527 // in ICP.
1528 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1529 // Drop available_externally and unreferenced globals. This is necessary
1530 // with ThinLTO in order to avoid leaving undefined references to dead
1531 // globals in the object file.
1532 MPM.addPass(EliminateAvailableExternallyPass());
1533 MPM.addPass(GlobalDCEPass());
1534 return MPM;
1535 }
1536
1537 // Force any function attributes we want the rest of the pipeline to observe.
1538 MPM.addPass(ForceFunctionAttrsPass());
1539
1540 // Add the core simplification pipeline.
1541 MPM.addPass(buildModuleSimplificationPipeline(
1542 Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1543
1544 // Now add the optimization pipeline.
1545 MPM.addPass(buildModuleOptimizationPipeline(
1546 Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1547
1548 // Emit annotation remarks.
1549 addAnnotationRemarksPass(MPM);
1550
1551 return MPM;
1552 }
1553
1554 ModulePassManager
buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)1555 PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1556 assert(Level != OptimizationLevel::O0 &&
1557 "Must request optimizations for the default pipeline!");
1558 // FIXME: We should use a customized pre-link pipeline!
1559 return buildPerModuleDefaultPipeline(Level,
1560 /* LTOPreLink */ true);
1561 }
1562
1563 ModulePassManager
buildLTODefaultPipeline(OptimizationLevel Level,ModuleSummaryIndex * ExportSummary)1564 PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1565 ModuleSummaryIndex *ExportSummary) {
1566 ModulePassManager MPM;
1567
1568 // Convert @llvm.global.annotations to !annotation metadata.
1569 MPM.addPass(Annotation2MetadataPass());
1570
1571 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
1572 C(MPM, Level);
1573
1574 // Create a function that performs CFI checks for cross-DSO calls with targets
1575 // in the current module.
1576 MPM.addPass(CrossDSOCFIPass());
1577
1578 if (Level == OptimizationLevel::O0) {
1579 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1580 // metadata and intrinsics.
1581 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1582 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1583 // Run a second time to clean up any type tests left behind by WPD for use
1584 // in ICP.
1585 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1586
1587 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1588 C(MPM, Level);
1589
1590 // Emit annotation remarks.
1591 addAnnotationRemarksPass(MPM);
1592
1593 return MPM;
1594 }
1595
1596 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1597 // Load sample profile before running the LTO optimization pipeline.
1598 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1599 PGOOpt->ProfileRemappingFile,
1600 ThinOrFullLTOPhase::FullLTOPostLink));
1601 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1602 // RequireAnalysisPass for PSI before subsequent non-module passes.
1603 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1604 }
1605
1606 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1607 MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1608
1609 // Remove unused virtual tables to improve the quality of code generated by
1610 // whole-program devirtualization and bitset lowering.
1611 MPM.addPass(GlobalDCEPass());
1612
1613 // Force any function attributes we want the rest of the pipeline to observe.
1614 MPM.addPass(ForceFunctionAttrsPass());
1615
1616 // Do basic inference of function attributes from known properties of system
1617 // libraries and other oracles.
1618 MPM.addPass(InferFunctionAttrsPass());
1619
1620 if (Level.getSpeedupLevel() > 1) {
1621 MPM.addPass(createModuleToFunctionPassAdaptor(
1622 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1623
1624 // Indirect call promotion. This should promote all the targets that are
1625 // left by the earlier promotion pass that promotes intra-module targets.
1626 // This two-step promotion is to save the compile time. For LTO, it should
1627 // produce the same result as if we only do promotion here.
1628 MPM.addPass(PGOIndirectCallPromotion(
1629 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1630
1631 // Propagate constants at call sites into the functions they call. This
1632 // opens opportunities for globalopt (and inlining) by substituting function
1633 // pointers passed as arguments to direct uses of functions.
1634 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1635 Level != OptimizationLevel::Os &&
1636 Level != OptimizationLevel::Oz)));
1637
1638 // Attach metadata to indirect call sites indicating the set of functions
1639 // they may target at run-time. This should follow IPSCCP.
1640 MPM.addPass(CalledValuePropagationPass());
1641 }
1642
1643 // Now deduce any function attributes based in the current code.
1644 MPM.addPass(
1645 createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1646
1647 // Do RPO function attribute inference across the module to forward-propagate
1648 // attributes where applicable.
1649 // FIXME: Is this really an optimization rather than a canonicalization?
1650 MPM.addPass(ReversePostOrderFunctionAttrsPass());
1651
1652 // Use in-range annotations on GEP indices to split globals where beneficial.
1653 MPM.addPass(GlobalSplitPass());
1654
1655 // Run whole program optimization of virtual call when the list of callees
1656 // is fixed.
1657 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1658
1659 // Stop here at -O1.
1660 if (Level == OptimizationLevel::O1) {
1661 // The LowerTypeTestsPass needs to run to lower type metadata and the
1662 // type.test intrinsics. The pass does nothing if CFI is disabled.
1663 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1664 // Run a second time to clean up any type tests left behind by WPD for use
1665 // in ICP (which is performed earlier than this in the regular LTO
1666 // pipeline).
1667 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1668
1669 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1670 C(MPM, Level);
1671
1672 // Emit annotation remarks.
1673 addAnnotationRemarksPass(MPM);
1674
1675 return MPM;
1676 }
1677
1678 // Optimize globals to try and fold them into constants.
1679 MPM.addPass(GlobalOptPass());
1680
1681 // Promote any localized globals to SSA registers.
1682 MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1683
1684 // Linking modules together can lead to duplicate global constant, only
1685 // keep one copy of each constant.
1686 MPM.addPass(ConstantMergePass());
1687
1688 // Reduce the code after globalopt and ipsccp. Both can open up significant
1689 // simplification opportunities, and both can propagate functions through
1690 // function pointers. When this happens, we often have to resolve varargs
1691 // calls, etc, so let instcombine do this.
1692 FunctionPassManager PeepholeFPM;
1693 PeepholeFPM.addPass(InstCombinePass());
1694 if (Level == OptimizationLevel::O3)
1695 PeepholeFPM.addPass(AggressiveInstCombinePass());
1696 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1697
1698 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1699 PTO.EagerlyInvalidateAnalyses));
1700
1701 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1702 // generally clean up exception handling overhead. It isn't clear this is
1703 // valuable as the inliner doesn't currently care whether it is inlining an
1704 // invoke or a call.
1705 // Run the inliner now.
1706 MPM.addPass(ModuleInlinerWrapperPass(
1707 getInlineParamsFromOptLevel(Level),
1708 /* MandatoryFirst */ true,
1709 InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
1710 InlinePass::CGSCCInliner}));
1711
1712 // Optimize globals again after we ran the inliner.
1713 MPM.addPass(GlobalOptPass());
1714
1715 // Run the OpenMPOpt pass again after global optimizations.
1716 MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1717
1718 // Garbage collect dead functions.
1719 MPM.addPass(GlobalDCEPass());
1720
1721 // If we didn't decide to inline a function, check to see if we can
1722 // transform it to pass arguments by value instead of by reference.
1723 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
1724
1725 // Remove unused arguments from functions.
1726 MPM.addPass(DeadArgumentEliminationPass());
1727
1728 FunctionPassManager FPM;
1729 // The IPO Passes may leave cruft around. Clean up after them.
1730 FPM.addPass(InstCombinePass());
1731 invokePeepholeEPCallbacks(FPM, Level);
1732
1733 if (EnableConstraintElimination)
1734 FPM.addPass(ConstraintEliminationPass());
1735
1736 FPM.addPass(JumpThreadingPass());
1737
1738 // Do a post inline PGO instrumentation and use pass. This is a context
1739 // sensitive PGO pass.
1740 if (PGOOpt) {
1741 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1742 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1743 /* IsCS */ true, PGOOpt->CSProfileGenFile,
1744 PGOOpt->ProfileRemappingFile,
1745 ThinOrFullLTOPhase::FullLTOPostLink);
1746 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1747 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1748 /* IsCS */ true, PGOOpt->ProfileFile,
1749 PGOOpt->ProfileRemappingFile,
1750 ThinOrFullLTOPhase::FullLTOPostLink);
1751 }
1752
1753 // Break up allocas
1754 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
1755
1756 // LTO provides additional opportunities for tailcall elimination due to
1757 // link-time inlining, and visibility of nocapture attribute.
1758 FPM.addPass(TailCallElimPass());
1759
1760 // Run a few AA driver optimizations here and now to cleanup the code.
1761 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
1762 PTO.EagerlyInvalidateAnalyses));
1763
1764 MPM.addPass(
1765 createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1766
1767 // Require the GlobalsAA analysis for the module so we can query it within
1768 // MainFPM.
1769 MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
1770 // Invalidate AAManager so it can be recreated and pick up the newly available
1771 // GlobalsAA.
1772 MPM.addPass(
1773 createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
1774
1775 FunctionPassManager MainFPM;
1776 MainFPM.addPass(createFunctionToLoopPassAdaptor(
1777 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1778 /*AllowSpeculation=*/true),
1779 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1780
1781 if (RunNewGVN)
1782 MainFPM.addPass(NewGVNPass());
1783 else
1784 MainFPM.addPass(GVNPass());
1785
1786 // Remove dead memcpy()'s.
1787 MainFPM.addPass(MemCpyOptPass());
1788
1789 // Nuke dead stores.
1790 MainFPM.addPass(DSEPass());
1791 MainFPM.addPass(MergedLoadStoreMotionPass());
1792
1793 LoopPassManager LPM;
1794 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1795 LPM.addPass(LoopFlattenPass());
1796 LPM.addPass(IndVarSimplifyPass());
1797 LPM.addPass(LoopDeletionPass());
1798 // FIXME: Add loop interchange.
1799
1800 // Unroll small loops and perform peeling.
1801 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1802 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1803 PTO.ForgetAllSCEVInLoopUnroll));
1804 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1805 // *All* loop passes must preserve it, in order to be able to use it.
1806 MainFPM.addPass(createFunctionToLoopPassAdaptor(
1807 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1808
1809 MainFPM.addPass(LoopDistributePass());
1810
1811 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1812
1813 // Run the OpenMPOpt CGSCC pass again late.
1814 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1815 OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
1816
1817 invokePeepholeEPCallbacks(MainFPM, Level);
1818 MainFPM.addPass(JumpThreadingPass());
1819 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
1820 PTO.EagerlyInvalidateAnalyses));
1821
1822 // Lower type metadata and the type.test intrinsic. This pass supports
1823 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1824 // to be run at link time if CFI is enabled. This pass does nothing if
1825 // CFI is disabled.
1826 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1827 // Run a second time to clean up any type tests left behind by WPD for use
1828 // in ICP (which is performed earlier than this in the regular LTO pipeline).
1829 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1830
1831 // Enable splitting late in the FullLTO post-link pipeline.
1832 if (EnableHotColdSplit)
1833 MPM.addPass(HotColdSplittingPass());
1834
1835 // Add late LTO optimization passes.
1836 // Delete basic blocks, which optimization passes may have killed.
1837 MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass(
1838 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
1839 true))));
1840
1841 // Drop bodies of available eternally objects to improve GlobalDCE.
1842 MPM.addPass(EliminateAvailableExternallyPass());
1843
1844 // Now that we have optimized the program, discard unreachable functions.
1845 MPM.addPass(GlobalDCEPass());
1846
1847 if (PTO.MergeFunctions)
1848 MPM.addPass(MergeFunctionsPass());
1849
1850 if (PTO.CallGraphProfile)
1851 MPM.addPass(CGProfilePass());
1852
1853 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1854 C(MPM, Level);
1855
1856 // Emit annotation remarks.
1857 addAnnotationRemarksPass(MPM);
1858
1859 return MPM;
1860 }
1861
buildO0DefaultPipeline(OptimizationLevel Level,bool LTOPreLink)1862 ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
1863 bool LTOPreLink) {
1864 assert(Level == OptimizationLevel::O0 &&
1865 "buildO0DefaultPipeline should only be used with O0");
1866
1867 ModulePassManager MPM;
1868
1869 // Perform pseudo probe instrumentation in O0 mode. This is for the
1870 // consistency between different build modes. For example, a LTO build can be
1871 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
1872 // the postlink will require pseudo probe instrumentation in the prelink.
1873 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1874 MPM.addPass(SampleProfileProbePass(TM));
1875
1876 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
1877 PGOOpt->Action == PGOOptions::IRUse))
1878 addPGOInstrPassesForO0(
1879 MPM,
1880 /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
1881 /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1882
1883 for (auto &C : PipelineStartEPCallbacks)
1884 C(MPM, Level);
1885
1886 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1887 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1888
1889 for (auto &C : PipelineEarlySimplificationEPCallbacks)
1890 C(MPM, Level);
1891
1892 // Build a minimal pipeline based on the semantics required by LLVM,
1893 // which is just that always inlining occurs. Further, disable generating
1894 // lifetime intrinsics to avoid enabling further optimizations during
1895 // code generation.
1896 MPM.addPass(AlwaysInlinerPass(
1897 /*InsertLifetimeIntrinsics=*/false));
1898
1899 if (PTO.MergeFunctions)
1900 MPM.addPass(MergeFunctionsPass());
1901
1902 if (EnableMatrix)
1903 MPM.addPass(
1904 createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
1905
1906 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
1907 CGSCCPassManager CGPM;
1908 for (auto &C : CGSCCOptimizerLateEPCallbacks)
1909 C(CGPM, Level);
1910 if (!CGPM.isEmpty())
1911 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
1912 }
1913 if (!LateLoopOptimizationsEPCallbacks.empty()) {
1914 LoopPassManager LPM;
1915 for (auto &C : LateLoopOptimizationsEPCallbacks)
1916 C(LPM, Level);
1917 if (!LPM.isEmpty()) {
1918 MPM.addPass(createModuleToFunctionPassAdaptor(
1919 createFunctionToLoopPassAdaptor(std::move(LPM))));
1920 }
1921 }
1922 if (!LoopOptimizerEndEPCallbacks.empty()) {
1923 LoopPassManager LPM;
1924 for (auto &C : LoopOptimizerEndEPCallbacks)
1925 C(LPM, Level);
1926 if (!LPM.isEmpty()) {
1927 MPM.addPass(createModuleToFunctionPassAdaptor(
1928 createFunctionToLoopPassAdaptor(std::move(LPM))));
1929 }
1930 }
1931 if (!ScalarOptimizerLateEPCallbacks.empty()) {
1932 FunctionPassManager FPM;
1933 for (auto &C : ScalarOptimizerLateEPCallbacks)
1934 C(FPM, Level);
1935 if (!FPM.isEmpty())
1936 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
1937 }
1938
1939 for (auto &C : OptimizerEarlyEPCallbacks)
1940 C(MPM, Level);
1941
1942 if (!VectorizerStartEPCallbacks.empty()) {
1943 FunctionPassManager FPM;
1944 for (auto &C : VectorizerStartEPCallbacks)
1945 C(FPM, Level);
1946 if (!FPM.isEmpty())
1947 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
1948 }
1949
1950 ModulePassManager CoroPM;
1951 CoroPM.addPass(CoroEarlyPass());
1952 CGSCCPassManager CGPM;
1953 CGPM.addPass(CoroSplitPass());
1954 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
1955 CoroPM.addPass(CoroCleanupPass());
1956 CoroPM.addPass(GlobalDCEPass());
1957 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
1958
1959 for (auto &C : OptimizerLastEPCallbacks)
1960 C(MPM, Level);
1961
1962 if (LTOPreLink)
1963 addRequiredLTOPreLinkPasses(MPM);
1964
1965 MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
1966
1967 return MPM;
1968 }
1969
buildDefaultAAPipeline()1970 AAManager PassBuilder::buildDefaultAAPipeline() {
1971 AAManager AA;
1972
1973 // The order in which these are registered determines their priority when
1974 // being queried.
1975
1976 // First we register the basic alias analysis that provides the majority of
1977 // per-function local AA logic. This is a stateless, on-demand local set of
1978 // AA techniques.
1979 AA.registerFunctionAnalysis<BasicAA>();
1980
1981 // Next we query fast, specialized alias analyses that wrap IR-embedded
1982 // information about aliasing.
1983 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
1984 AA.registerFunctionAnalysis<TypeBasedAA>();
1985
1986 // Add support for querying global aliasing information when available.
1987 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
1988 // analysis, all that the `AAManager` can do is query for any *cached*
1989 // results from `GlobalsAA` through a readonly proxy.
1990 if (EnableGlobalAnalyses)
1991 AA.registerModuleAnalysis<GlobalsAA>();
1992
1993 // Add target-specific alias analyses.
1994 if (TM)
1995 TM->registerDefaultAliasAnalyses(AA);
1996
1997 return AA;
1998 }
1999