xref: /aosp_15_r20/art/compiler/optimizing/loop_analysis.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1*795d594fSAndroid Build Coastguard Worker /*
2*795d594fSAndroid Build Coastguard Worker  * Copyright (C) 2018 The Android Open Source Project
3*795d594fSAndroid Build Coastguard Worker  *
4*795d594fSAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*795d594fSAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*795d594fSAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*795d594fSAndroid Build Coastguard Worker  *
8*795d594fSAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*795d594fSAndroid Build Coastguard Worker  *
10*795d594fSAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*795d594fSAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*795d594fSAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*795d594fSAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*795d594fSAndroid Build Coastguard Worker  * limitations under the License.
15*795d594fSAndroid Build Coastguard Worker  */
16*795d594fSAndroid Build Coastguard Worker 
17*795d594fSAndroid Build Coastguard Worker #include "loop_analysis.h"
18*795d594fSAndroid Build Coastguard Worker 
19*795d594fSAndroid Build Coastguard Worker #include "base/bit_vector-inl.h"
20*795d594fSAndroid Build Coastguard Worker #include "code_generator.h"
21*795d594fSAndroid Build Coastguard Worker #include "induction_var_range.h"
22*795d594fSAndroid Build Coastguard Worker 
23*795d594fSAndroid Build Coastguard Worker namespace art HIDDEN {
24*795d594fSAndroid Build Coastguard Worker 
CalculateLoopBasicProperties(HLoopInformation * loop_info,LoopAnalysisInfo * analysis_results,int64_t trip_count)25*795d594fSAndroid Build Coastguard Worker void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
26*795d594fSAndroid Build Coastguard Worker                                                 LoopAnalysisInfo* analysis_results,
27*795d594fSAndroid Build Coastguard Worker                                                 int64_t trip_count) {
28*795d594fSAndroid Build Coastguard Worker   analysis_results->trip_count_ = trip_count;
29*795d594fSAndroid Build Coastguard Worker 
30*795d594fSAndroid Build Coastguard Worker   for (HBlocksInLoopIterator block_it(*loop_info);
31*795d594fSAndroid Build Coastguard Worker        !block_it.Done();
32*795d594fSAndroid Build Coastguard Worker        block_it.Advance()) {
33*795d594fSAndroid Build Coastguard Worker     HBasicBlock* block = block_it.Current();
34*795d594fSAndroid Build Coastguard Worker 
35*795d594fSAndroid Build Coastguard Worker     // Check whether one of the successor is loop exit.
36*795d594fSAndroid Build Coastguard Worker     for (HBasicBlock* successor : block->GetSuccessors()) {
37*795d594fSAndroid Build Coastguard Worker       if (!loop_info->Contains(*successor)) {
38*795d594fSAndroid Build Coastguard Worker         analysis_results->exits_num_++;
39*795d594fSAndroid Build Coastguard Worker 
40*795d594fSAndroid Build Coastguard Worker         // We track number of invariant loop exits which correspond to HIf instruction and
41*795d594fSAndroid Build Coastguard Worker         // can be eliminated by loop peeling; other control flow instruction are ignored and will
42*795d594fSAndroid Build Coastguard Worker         // not cause loop peeling to happen as they either cannot be inside a loop, or by
43*795d594fSAndroid Build Coastguard Worker         // definition cannot be loop exits (unconditional instructions), or are not beneficial for
44*795d594fSAndroid Build Coastguard Worker         // the optimization.
45*795d594fSAndroid Build Coastguard Worker         HIf* hif = block->GetLastInstruction()->AsIfOrNull();
46*795d594fSAndroid Build Coastguard Worker         if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
47*795d594fSAndroid Build Coastguard Worker           analysis_results->invariant_exits_num_++;
48*795d594fSAndroid Build Coastguard Worker         }
49*795d594fSAndroid Build Coastguard Worker       }
50*795d594fSAndroid Build Coastguard Worker     }
51*795d594fSAndroid Build Coastguard Worker 
52*795d594fSAndroid Build Coastguard Worker     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
53*795d594fSAndroid Build Coastguard Worker       HInstruction* instruction = it.Current();
54*795d594fSAndroid Build Coastguard Worker       if (it.Current()->GetType() == DataType::Type::kInt64) {
55*795d594fSAndroid Build Coastguard Worker         analysis_results->has_long_type_instructions_ = true;
56*795d594fSAndroid Build Coastguard Worker       }
57*795d594fSAndroid Build Coastguard Worker       if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) {
58*795d594fSAndroid Build Coastguard Worker         analysis_results->has_instructions_preventing_scalar_peeling_ = true;
59*795d594fSAndroid Build Coastguard Worker         analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
60*795d594fSAndroid Build Coastguard Worker       }
61*795d594fSAndroid Build Coastguard Worker       analysis_results->instr_num_++;
62*795d594fSAndroid Build Coastguard Worker     }
63*795d594fSAndroid Build Coastguard Worker     analysis_results->bb_num_++;
64*795d594fSAndroid Build Coastguard Worker   }
65*795d594fSAndroid Build Coastguard Worker }
66*795d594fSAndroid Build Coastguard Worker 
GetLoopTripCount(HLoopInformation * loop_info,const InductionVarRange * induction_range)67*795d594fSAndroid Build Coastguard Worker int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info,
68*795d594fSAndroid Build Coastguard Worker                                        const InductionVarRange* induction_range) {
69*795d594fSAndroid Build Coastguard Worker   int64_t trip_count;
70*795d594fSAndroid Build Coastguard Worker   if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) {
71*795d594fSAndroid Build Coastguard Worker     trip_count = LoopAnalysisInfo::kUnknownTripCount;
72*795d594fSAndroid Build Coastguard Worker   }
73*795d594fSAndroid Build Coastguard Worker   return trip_count;
74*795d594fSAndroid Build Coastguard Worker }
75*795d594fSAndroid Build Coastguard Worker 
76*795d594fSAndroid Build Coastguard Worker // Default implementation of loop helper; used for all targets unless a custom implementation
77*795d594fSAndroid Build Coastguard Worker // is provided. Enables scalar loop peeling and unrolling with the most conservative heuristics.
78*795d594fSAndroid Build Coastguard Worker class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper {
79*795d594fSAndroid Build Coastguard Worker  public:
ArchDefaultLoopHelper(const CodeGenerator & codegen)80*795d594fSAndroid Build Coastguard Worker   explicit ArchDefaultLoopHelper(const CodeGenerator& codegen) : ArchNoOptsLoopHelper(codegen) {}
81*795d594fSAndroid Build Coastguard Worker   // Scalar loop unrolling parameters and heuristics.
82*795d594fSAndroid Build Coastguard Worker   //
83*795d594fSAndroid Build Coastguard Worker   // Maximum possible unrolling factor.
84*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kScalarMaxUnrollFactor = 2;
85*795d594fSAndroid Build Coastguard Worker   // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
86*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kScalarHeuristicMaxBodySizeInstr = 17;
87*795d594fSAndroid Build Coastguard Worker   // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
88*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
89*795d594fSAndroid Build Coastguard Worker   // Maximum number of instructions to be created as a result of full unrolling.
90*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kScalarHeuristicFullyUnrolledMaxInstrThreshold = 35;
91*795d594fSAndroid Build Coastguard Worker 
IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo * analysis_info) const92*795d594fSAndroid Build Coastguard Worker   bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const override {
93*795d594fSAndroid Build Coastguard Worker     return analysis_info->HasLongTypeInstructions() ||
94*795d594fSAndroid Build Coastguard Worker            IsLoopTooBig(analysis_info,
95*795d594fSAndroid Build Coastguard Worker                         kScalarHeuristicMaxBodySizeInstr,
96*795d594fSAndroid Build Coastguard Worker                         kScalarHeuristicMaxBodySizeBlocks);
97*795d594fSAndroid Build Coastguard Worker   }
98*795d594fSAndroid Build Coastguard Worker 
GetScalarUnrollingFactor(const LoopAnalysisInfo * analysis_info) const99*795d594fSAndroid Build Coastguard Worker   uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const override {
100*795d594fSAndroid Build Coastguard Worker     int64_t trip_count = analysis_info->GetTripCount();
101*795d594fSAndroid Build Coastguard Worker     // Unroll only loops with known trip count.
102*795d594fSAndroid Build Coastguard Worker     if (trip_count == LoopAnalysisInfo::kUnknownTripCount) {
103*795d594fSAndroid Build Coastguard Worker       return LoopAnalysisInfo::kNoUnrollingFactor;
104*795d594fSAndroid Build Coastguard Worker     }
105*795d594fSAndroid Build Coastguard Worker     uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor;
106*795d594fSAndroid Build Coastguard Worker     if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
107*795d594fSAndroid Build Coastguard Worker       return LoopAnalysisInfo::kNoUnrollingFactor;
108*795d594fSAndroid Build Coastguard Worker     }
109*795d594fSAndroid Build Coastguard Worker 
110*795d594fSAndroid Build Coastguard Worker     return desired_unrolling_factor;
111*795d594fSAndroid Build Coastguard Worker   }
112*795d594fSAndroid Build Coastguard Worker 
IsLoopPeelingEnabled() const113*795d594fSAndroid Build Coastguard Worker   bool IsLoopPeelingEnabled() const override { return true; }
114*795d594fSAndroid Build Coastguard Worker 
IsFullUnrollingBeneficial(LoopAnalysisInfo * analysis_info) const115*795d594fSAndroid Build Coastguard Worker   bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info) const override {
116*795d594fSAndroid Build Coastguard Worker     int64_t trip_count = analysis_info->GetTripCount();
117*795d594fSAndroid Build Coastguard Worker     // We assume that trip count is known.
118*795d594fSAndroid Build Coastguard Worker     DCHECK_NE(trip_count, LoopAnalysisInfo::kUnknownTripCount);
119*795d594fSAndroid Build Coastguard Worker     size_t instr_num = analysis_info->GetNumberOfInstructions();
120*795d594fSAndroid Build Coastguard Worker     return (trip_count * instr_num < kScalarHeuristicFullyUnrolledMaxInstrThreshold);
121*795d594fSAndroid Build Coastguard Worker   }
122*795d594fSAndroid Build Coastguard Worker 
123*795d594fSAndroid Build Coastguard Worker  protected:
IsLoopTooBig(LoopAnalysisInfo * loop_analysis_info,size_t instr_threshold,size_t bb_threshold) const124*795d594fSAndroid Build Coastguard Worker   bool IsLoopTooBig(LoopAnalysisInfo* loop_analysis_info,
125*795d594fSAndroid Build Coastguard Worker                     size_t instr_threshold,
126*795d594fSAndroid Build Coastguard Worker                     size_t bb_threshold) const {
127*795d594fSAndroid Build Coastguard Worker     size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
128*795d594fSAndroid Build Coastguard Worker     size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
129*795d594fSAndroid Build Coastguard Worker     return (instr_num >= instr_threshold || bb_num >= bb_threshold);
130*795d594fSAndroid Build Coastguard Worker   }
131*795d594fSAndroid Build Coastguard Worker };
132*795d594fSAndroid Build Coastguard Worker 
133*795d594fSAndroid Build Coastguard Worker // Custom implementation of loop helper for arm64 target. Enables heuristics for scalar loop
134*795d594fSAndroid Build Coastguard Worker // peeling and unrolling and supports SIMD loop unrolling.
135*795d594fSAndroid Build Coastguard Worker class Arm64LoopHelper : public ArchDefaultLoopHelper {
136*795d594fSAndroid Build Coastguard Worker  public:
Arm64LoopHelper(const CodeGenerator & codegen)137*795d594fSAndroid Build Coastguard Worker   explicit Arm64LoopHelper(const CodeGenerator& codegen) : ArchDefaultLoopHelper(codegen) {}
138*795d594fSAndroid Build Coastguard Worker   // SIMD loop unrolling parameters and heuristics.
139*795d594fSAndroid Build Coastguard Worker   //
140*795d594fSAndroid Build Coastguard Worker   // Maximum possible unrolling factor.
141*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
142*795d594fSAndroid Build Coastguard Worker   // Loop's maximum instruction count. Loops with higher count will not be unrolled.
143*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
144*795d594fSAndroid Build Coastguard Worker 
145*795d594fSAndroid Build Coastguard Worker   // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
146*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
147*795d594fSAndroid Build Coastguard Worker   // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
148*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
149*795d594fSAndroid Build Coastguard Worker 
IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo * loop_analysis_info) const150*795d594fSAndroid Build Coastguard Worker   bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const override {
151*795d594fSAndroid Build Coastguard Worker     return IsLoopTooBig(loop_analysis_info,
152*795d594fSAndroid Build Coastguard Worker                         kArm64ScalarHeuristicMaxBodySizeInstr,
153*795d594fSAndroid Build Coastguard Worker                         kArm64ScalarHeuristicMaxBodySizeBlocks);
154*795d594fSAndroid Build Coastguard Worker   }
155*795d594fSAndroid Build Coastguard Worker 
GetSIMDUnrollingFactor(HBasicBlock * block,int64_t trip_count,uint32_t max_peel,uint32_t vector_length) const156*795d594fSAndroid Build Coastguard Worker   uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
157*795d594fSAndroid Build Coastguard Worker                                   int64_t trip_count,
158*795d594fSAndroid Build Coastguard Worker                                   uint32_t max_peel,
159*795d594fSAndroid Build Coastguard Worker                                   uint32_t vector_length) const override {
160*795d594fSAndroid Build Coastguard Worker     // Don't unroll with insufficient iterations.
161*795d594fSAndroid Build Coastguard Worker     // TODO: Unroll loops with unknown trip count.
162*795d594fSAndroid Build Coastguard Worker     DCHECK_NE(vector_length, 0u);
163*795d594fSAndroid Build Coastguard Worker     // TODO: Unroll loops in predicated vectorization mode.
164*795d594fSAndroid Build Coastguard Worker     if (codegen_.SupportsPredicatedSIMD()) {
165*795d594fSAndroid Build Coastguard Worker       return LoopAnalysisInfo::kNoUnrollingFactor;
166*795d594fSAndroid Build Coastguard Worker     }
167*795d594fSAndroid Build Coastguard Worker     if (trip_count < (2 * vector_length + max_peel)) {
168*795d594fSAndroid Build Coastguard Worker       return LoopAnalysisInfo::kNoUnrollingFactor;
169*795d594fSAndroid Build Coastguard Worker     }
170*795d594fSAndroid Build Coastguard Worker     // Don't unroll for large loop body size.
171*795d594fSAndroid Build Coastguard Worker     uint32_t instruction_count = block->GetInstructions().CountSize();
172*795d594fSAndroid Build Coastguard Worker     if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
173*795d594fSAndroid Build Coastguard Worker       return LoopAnalysisInfo::kNoUnrollingFactor;
174*795d594fSAndroid Build Coastguard Worker     }
175*795d594fSAndroid Build Coastguard Worker     // Find a beneficial unroll factor with the following restrictions:
176*795d594fSAndroid Build Coastguard Worker     //  - At least one iteration of the transformed loop should be executed.
177*795d594fSAndroid Build Coastguard Worker     //  - The loop body shouldn't be "too big" (heuristic).
178*795d594fSAndroid Build Coastguard Worker 
179*795d594fSAndroid Build Coastguard Worker     uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count;
180*795d594fSAndroid Build Coastguard Worker     uint32_t uf2 = (trip_count - max_peel) / vector_length;
181*795d594fSAndroid Build Coastguard Worker     uint32_t unroll_factor =
182*795d594fSAndroid Build Coastguard Worker         TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor}));
183*795d594fSAndroid Build Coastguard Worker     DCHECK_GE(unroll_factor, 1u);
184*795d594fSAndroid Build Coastguard Worker     return unroll_factor;
185*795d594fSAndroid Build Coastguard Worker   }
186*795d594fSAndroid Build Coastguard Worker };
187*795d594fSAndroid Build Coastguard Worker 
188*795d594fSAndroid Build Coastguard Worker // Custom implementation of loop helper for X86_64 target. Enables heuristics for scalar loop
189*795d594fSAndroid Build Coastguard Worker // peeling and unrolling and supports SIMD loop unrolling.
190*795d594fSAndroid Build Coastguard Worker class X86_64LoopHelper : public ArchDefaultLoopHelper {
191*795d594fSAndroid Build Coastguard Worker   // mapping of machine instruction count for most used IR instructions
192*795d594fSAndroid Build Coastguard Worker   // Few IRs generate different number of instructions based on input and result type.
193*795d594fSAndroid Build Coastguard Worker   // We checked top java apps, benchmarks and used the most generated instruction count.
GetMachineInstructionCount(HInstruction * inst) const194*795d594fSAndroid Build Coastguard Worker   uint32_t GetMachineInstructionCount(HInstruction* inst) const {
195*795d594fSAndroid Build Coastguard Worker     switch (inst->GetKind()) {
196*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kAbs:
197*795d594fSAndroid Build Coastguard Worker         return 3;
198*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kAdd:
199*795d594fSAndroid Build Coastguard Worker         return 1;
200*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kAnd:
201*795d594fSAndroid Build Coastguard Worker         return 1;
202*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kArrayLength:
203*795d594fSAndroid Build Coastguard Worker         return 1;
204*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kArrayGet:
205*795d594fSAndroid Build Coastguard Worker         return 1;
206*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kArraySet:
207*795d594fSAndroid Build Coastguard Worker         return 1;
208*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kBoundsCheck:
209*795d594fSAndroid Build Coastguard Worker         return 2;
210*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kCheckCast:
211*795d594fSAndroid Build Coastguard Worker         return 9;
212*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kDiv:
213*795d594fSAndroid Build Coastguard Worker         return 8;
214*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kDivZeroCheck:
215*795d594fSAndroid Build Coastguard Worker         return 2;
216*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kEqual:
217*795d594fSAndroid Build Coastguard Worker         return 3;
218*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kGreaterThan:
219*795d594fSAndroid Build Coastguard Worker         return 3;
220*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kGreaterThanOrEqual:
221*795d594fSAndroid Build Coastguard Worker         return 3;
222*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kIf:
223*795d594fSAndroid Build Coastguard Worker         return 2;
224*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kInstanceFieldGet:
225*795d594fSAndroid Build Coastguard Worker         return 2;
226*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kInstanceFieldSet:
227*795d594fSAndroid Build Coastguard Worker         return 1;
228*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kLessThan:
229*795d594fSAndroid Build Coastguard Worker         return 3;
230*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kLessThanOrEqual:
231*795d594fSAndroid Build Coastguard Worker         return 3;
232*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kMax:
233*795d594fSAndroid Build Coastguard Worker         return 2;
234*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kMin:
235*795d594fSAndroid Build Coastguard Worker         return 2;
236*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kMul:
237*795d594fSAndroid Build Coastguard Worker         return 1;
238*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kNotEqual:
239*795d594fSAndroid Build Coastguard Worker         return 3;
240*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kOr:
241*795d594fSAndroid Build Coastguard Worker         return 1;
242*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kRem:
243*795d594fSAndroid Build Coastguard Worker         return 11;
244*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kSelect:
245*795d594fSAndroid Build Coastguard Worker         return 2;
246*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kShl:
247*795d594fSAndroid Build Coastguard Worker         return 1;
248*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kShr:
249*795d594fSAndroid Build Coastguard Worker         return 1;
250*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kSub:
251*795d594fSAndroid Build Coastguard Worker         return 1;
252*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kTypeConversion:
253*795d594fSAndroid Build Coastguard Worker         return 1;
254*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kUShr:
255*795d594fSAndroid Build Coastguard Worker         return 1;
256*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecReplicateScalar:
257*795d594fSAndroid Build Coastguard Worker         return 2;
258*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecExtractScalar:
259*795d594fSAndroid Build Coastguard Worker         return 1;
260*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecReduce:
261*795d594fSAndroid Build Coastguard Worker         return 4;
262*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecNeg:
263*795d594fSAndroid Build Coastguard Worker         return 2;
264*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecAbs:
265*795d594fSAndroid Build Coastguard Worker         return 4;
266*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecNot:
267*795d594fSAndroid Build Coastguard Worker         return 3;
268*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecAdd:
269*795d594fSAndroid Build Coastguard Worker         return 1;
270*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecSub:
271*795d594fSAndroid Build Coastguard Worker         return 1;
272*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecMul:
273*795d594fSAndroid Build Coastguard Worker         return 1;
274*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecDiv:
275*795d594fSAndroid Build Coastguard Worker         return 1;
276*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecMax:
277*795d594fSAndroid Build Coastguard Worker         return 1;
278*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecMin:
279*795d594fSAndroid Build Coastguard Worker         return 1;
280*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecOr:
281*795d594fSAndroid Build Coastguard Worker         return 1;
282*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecXor:
283*795d594fSAndroid Build Coastguard Worker         return 1;
284*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecShl:
285*795d594fSAndroid Build Coastguard Worker         return 1;
286*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecShr:
287*795d594fSAndroid Build Coastguard Worker         return 1;
288*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecLoad:
289*795d594fSAndroid Build Coastguard Worker         return 1;
290*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kVecStore:
291*795d594fSAndroid Build Coastguard Worker         return 1;
292*795d594fSAndroid Build Coastguard Worker       case HInstruction::InstructionKind::kXor:
293*795d594fSAndroid Build Coastguard Worker         return 1;
294*795d594fSAndroid Build Coastguard Worker       default:
295*795d594fSAndroid Build Coastguard Worker         return 1;
296*795d594fSAndroid Build Coastguard Worker     }
297*795d594fSAndroid Build Coastguard Worker   }
298*795d594fSAndroid Build Coastguard Worker 
299*795d594fSAndroid Build Coastguard Worker   // Maximum possible unrolling factor.
300*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kX86_64MaxUnrollFactor = 2;  // pow(2,2) = 4
301*795d594fSAndroid Build Coastguard Worker 
302*795d594fSAndroid Build Coastguard Worker   // According to Intel® 64 and IA-32 Architectures Optimization Reference Manual,
303*795d594fSAndroid Build Coastguard Worker   // avoid excessive loop unrolling to ensure LSD (loop stream decoder) is operating efficiently.
304*795d594fSAndroid Build Coastguard Worker   // This variable takes care that unrolled loop instructions should not exceed LSD size.
305*795d594fSAndroid Build Coastguard Worker   // For Intel Atom processors (silvermont & goldmont), LSD size is 28
306*795d594fSAndroid Build Coastguard Worker   // TODO - identify architecture and LSD size at runtime
307*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kX86_64UnrolledMaxBodySizeInstr = 28;
308*795d594fSAndroid Build Coastguard Worker 
309*795d594fSAndroid Build Coastguard Worker   // Loop's maximum basic block count. Loops with higher count will not be partial
310*795d594fSAndroid Build Coastguard Worker   // unrolled (unknown iterations).
311*795d594fSAndroid Build Coastguard Worker   static constexpr uint32_t kX86_64UnknownIterMaxBodySizeBlocks = 2;
312*795d594fSAndroid Build Coastguard Worker 
313*795d594fSAndroid Build Coastguard Worker   uint32_t GetUnrollingFactor(HLoopInformation* loop_info, HBasicBlock* header) const;
314*795d594fSAndroid Build Coastguard Worker 
315*795d594fSAndroid Build Coastguard Worker  public:
X86_64LoopHelper(const CodeGenerator & codegen)316*795d594fSAndroid Build Coastguard Worker   explicit X86_64LoopHelper(const CodeGenerator& codegen) : ArchDefaultLoopHelper(codegen) {}
317*795d594fSAndroid Build Coastguard Worker 
GetSIMDUnrollingFactor(HBasicBlock * block,int64_t trip_count,uint32_t max_peel,uint32_t vector_length) const318*795d594fSAndroid Build Coastguard Worker   uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
319*795d594fSAndroid Build Coastguard Worker                                   int64_t trip_count,
320*795d594fSAndroid Build Coastguard Worker                                   uint32_t max_peel,
321*795d594fSAndroid Build Coastguard Worker                                   uint32_t vector_length) const override {
322*795d594fSAndroid Build Coastguard Worker     DCHECK_NE(vector_length, 0u);
323*795d594fSAndroid Build Coastguard Worker     HLoopInformation* loop_info = block->GetLoopInformation();
324*795d594fSAndroid Build Coastguard Worker     DCHECK(loop_info);
325*795d594fSAndroid Build Coastguard Worker     HBasicBlock* header = loop_info->GetHeader();
326*795d594fSAndroid Build Coastguard Worker     DCHECK(header);
327*795d594fSAndroid Build Coastguard Worker     uint32_t unroll_factor = 0;
328*795d594fSAndroid Build Coastguard Worker 
329*795d594fSAndroid Build Coastguard Worker     if ((trip_count == 0) || (trip_count == LoopAnalysisInfo::kUnknownTripCount)) {
330*795d594fSAndroid Build Coastguard Worker       // Don't unroll for large loop body size.
331*795d594fSAndroid Build Coastguard Worker       unroll_factor = GetUnrollingFactor(loop_info, header);
332*795d594fSAndroid Build Coastguard Worker       if (unroll_factor <= 1) {
333*795d594fSAndroid Build Coastguard Worker         return LoopAnalysisInfo::kNoUnrollingFactor;
334*795d594fSAndroid Build Coastguard Worker       }
335*795d594fSAndroid Build Coastguard Worker     } else {
336*795d594fSAndroid Build Coastguard Worker       // Don't unroll with insufficient iterations.
337*795d594fSAndroid Build Coastguard Worker       if (trip_count < (2 * vector_length + max_peel)) {
338*795d594fSAndroid Build Coastguard Worker         return LoopAnalysisInfo::kNoUnrollingFactor;
339*795d594fSAndroid Build Coastguard Worker       }
340*795d594fSAndroid Build Coastguard Worker 
341*795d594fSAndroid Build Coastguard Worker       // Don't unroll for large loop body size.
342*795d594fSAndroid Build Coastguard Worker       uint32_t unroll_cnt = GetUnrollingFactor(loop_info, header);
343*795d594fSAndroid Build Coastguard Worker       if (unroll_cnt <= 1) {
344*795d594fSAndroid Build Coastguard Worker         return LoopAnalysisInfo::kNoUnrollingFactor;
345*795d594fSAndroid Build Coastguard Worker       }
346*795d594fSAndroid Build Coastguard Worker 
347*795d594fSAndroid Build Coastguard Worker       // Find a beneficial unroll factor with the following restrictions:
348*795d594fSAndroid Build Coastguard Worker       //  - At least one iteration of the transformed loop should be executed.
349*795d594fSAndroid Build Coastguard Worker       //  - The loop body shouldn't be "too big" (heuristic).
350*795d594fSAndroid Build Coastguard Worker       uint32_t uf2 = (trip_count - max_peel) / vector_length;
351*795d594fSAndroid Build Coastguard Worker       unroll_factor = TruncToPowerOfTwo(std::min(uf2, unroll_cnt));
352*795d594fSAndroid Build Coastguard Worker       DCHECK_GE(unroll_factor, 1u);
353*795d594fSAndroid Build Coastguard Worker     }
354*795d594fSAndroid Build Coastguard Worker 
355*795d594fSAndroid Build Coastguard Worker     return unroll_factor;
356*795d594fSAndroid Build Coastguard Worker   }
357*795d594fSAndroid Build Coastguard Worker };
358*795d594fSAndroid Build Coastguard Worker 
GetUnrollingFactor(HLoopInformation * loop_info,HBasicBlock * header) const359*795d594fSAndroid Build Coastguard Worker uint32_t X86_64LoopHelper::GetUnrollingFactor(HLoopInformation* loop_info,
360*795d594fSAndroid Build Coastguard Worker                                               HBasicBlock* header) const {
361*795d594fSAndroid Build Coastguard Worker   uint32_t num_inst = 0, num_inst_header = 0, num_inst_loop_body = 0;
362*795d594fSAndroid Build Coastguard Worker   for (HBlocksInLoopIterator it(*loop_info); !it.Done(); it.Advance()) {
363*795d594fSAndroid Build Coastguard Worker     HBasicBlock* block = it.Current();
364*795d594fSAndroid Build Coastguard Worker     DCHECK(block);
365*795d594fSAndroid Build Coastguard Worker     num_inst = 0;
366*795d594fSAndroid Build Coastguard Worker 
367*795d594fSAndroid Build Coastguard Worker     for (HInstructionIterator it1(block->GetInstructions()); !it1.Done(); it1.Advance()) {
368*795d594fSAndroid Build Coastguard Worker       HInstruction* inst = it1.Current();
369*795d594fSAndroid Build Coastguard Worker       DCHECK(inst);
370*795d594fSAndroid Build Coastguard Worker 
371*795d594fSAndroid Build Coastguard Worker       // SuspendCheck inside loop is handled with Goto.
372*795d594fSAndroid Build Coastguard Worker       // Ignoring SuspendCheck & Goto as partially unrolled loop body will have only one Goto.
373*795d594fSAndroid Build Coastguard Worker       // Instruction count for Goto is being handled during unroll factor calculation below.
374*795d594fSAndroid Build Coastguard Worker       if (inst->IsSuspendCheck() || inst->IsGoto()) {
375*795d594fSAndroid Build Coastguard Worker         continue;
376*795d594fSAndroid Build Coastguard Worker       }
377*795d594fSAndroid Build Coastguard Worker 
378*795d594fSAndroid Build Coastguard Worker       num_inst += GetMachineInstructionCount(inst);
379*795d594fSAndroid Build Coastguard Worker     }
380*795d594fSAndroid Build Coastguard Worker 
381*795d594fSAndroid Build Coastguard Worker     if (block == header) {
382*795d594fSAndroid Build Coastguard Worker       num_inst_header = num_inst;
383*795d594fSAndroid Build Coastguard Worker     } else {
384*795d594fSAndroid Build Coastguard Worker       num_inst_loop_body += num_inst;
385*795d594fSAndroid Build Coastguard Worker     }
386*795d594fSAndroid Build Coastguard Worker   }
387*795d594fSAndroid Build Coastguard Worker 
388*795d594fSAndroid Build Coastguard Worker   // Calculate actual unroll factor.
389*795d594fSAndroid Build Coastguard Worker   uint32_t unrolling_factor = kX86_64MaxUnrollFactor;
390*795d594fSAndroid Build Coastguard Worker   uint32_t unrolling_inst = kX86_64UnrolledMaxBodySizeInstr;
391*795d594fSAndroid Build Coastguard Worker   // "-3" for one Goto instruction.
392*795d594fSAndroid Build Coastguard Worker   uint32_t desired_size = unrolling_inst - num_inst_header - 3;
393*795d594fSAndroid Build Coastguard Worker   if (desired_size < (2 * num_inst_loop_body)) {
394*795d594fSAndroid Build Coastguard Worker     return 1;
395*795d594fSAndroid Build Coastguard Worker   }
396*795d594fSAndroid Build Coastguard Worker 
397*795d594fSAndroid Build Coastguard Worker   while (unrolling_factor > 0) {
398*795d594fSAndroid Build Coastguard Worker     if ((desired_size >> unrolling_factor) >= num_inst_loop_body) {
399*795d594fSAndroid Build Coastguard Worker       break;
400*795d594fSAndroid Build Coastguard Worker     }
401*795d594fSAndroid Build Coastguard Worker     unrolling_factor--;
402*795d594fSAndroid Build Coastguard Worker   }
403*795d594fSAndroid Build Coastguard Worker 
404*795d594fSAndroid Build Coastguard Worker   return (1 << unrolling_factor);
405*795d594fSAndroid Build Coastguard Worker }
406*795d594fSAndroid Build Coastguard Worker 
Create(const CodeGenerator & codegen,ArenaAllocator * allocator)407*795d594fSAndroid Build Coastguard Worker ArchNoOptsLoopHelper* ArchNoOptsLoopHelper::Create(const CodeGenerator& codegen,
408*795d594fSAndroid Build Coastguard Worker                                                    ArenaAllocator* allocator) {
409*795d594fSAndroid Build Coastguard Worker   InstructionSet isa = codegen.GetInstructionSet();
410*795d594fSAndroid Build Coastguard Worker   switch (isa) {
411*795d594fSAndroid Build Coastguard Worker     case InstructionSet::kArm64: {
412*795d594fSAndroid Build Coastguard Worker       return new (allocator) Arm64LoopHelper(codegen);
413*795d594fSAndroid Build Coastguard Worker     }
414*795d594fSAndroid Build Coastguard Worker     case InstructionSet::kX86_64: {
415*795d594fSAndroid Build Coastguard Worker       return new (allocator) X86_64LoopHelper(codegen);
416*795d594fSAndroid Build Coastguard Worker     }
417*795d594fSAndroid Build Coastguard Worker     default: {
418*795d594fSAndroid Build Coastguard Worker       return new (allocator) ArchDefaultLoopHelper(codegen);
419*795d594fSAndroid Build Coastguard Worker     }
420*795d594fSAndroid Build Coastguard Worker   }
421*795d594fSAndroid Build Coastguard Worker }
422*795d594fSAndroid Build Coastguard Worker 
423*795d594fSAndroid Build Coastguard Worker }  // namespace art
424