1*795d594fSAndroid Build Coastguard Worker /*
2*795d594fSAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project
3*795d594fSAndroid Build Coastguard Worker *
4*795d594fSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*795d594fSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*795d594fSAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*795d594fSAndroid Build Coastguard Worker *
8*795d594fSAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*795d594fSAndroid Build Coastguard Worker *
10*795d594fSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*795d594fSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*795d594fSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*795d594fSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*795d594fSAndroid Build Coastguard Worker * limitations under the License.
15*795d594fSAndroid Build Coastguard Worker */
16*795d594fSAndroid Build Coastguard Worker
17*795d594fSAndroid Build Coastguard Worker #include "loop_analysis.h"
18*795d594fSAndroid Build Coastguard Worker
19*795d594fSAndroid Build Coastguard Worker #include "base/bit_vector-inl.h"
20*795d594fSAndroid Build Coastguard Worker #include "code_generator.h"
21*795d594fSAndroid Build Coastguard Worker #include "induction_var_range.h"
22*795d594fSAndroid Build Coastguard Worker
23*795d594fSAndroid Build Coastguard Worker namespace art HIDDEN {
24*795d594fSAndroid Build Coastguard Worker
CalculateLoopBasicProperties(HLoopInformation * loop_info,LoopAnalysisInfo * analysis_results,int64_t trip_count)25*795d594fSAndroid Build Coastguard Worker void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
26*795d594fSAndroid Build Coastguard Worker LoopAnalysisInfo* analysis_results,
27*795d594fSAndroid Build Coastguard Worker int64_t trip_count) {
28*795d594fSAndroid Build Coastguard Worker analysis_results->trip_count_ = trip_count;
29*795d594fSAndroid Build Coastguard Worker
30*795d594fSAndroid Build Coastguard Worker for (HBlocksInLoopIterator block_it(*loop_info);
31*795d594fSAndroid Build Coastguard Worker !block_it.Done();
32*795d594fSAndroid Build Coastguard Worker block_it.Advance()) {
33*795d594fSAndroid Build Coastguard Worker HBasicBlock* block = block_it.Current();
34*795d594fSAndroid Build Coastguard Worker
35*795d594fSAndroid Build Coastguard Worker // Check whether one of the successor is loop exit.
36*795d594fSAndroid Build Coastguard Worker for (HBasicBlock* successor : block->GetSuccessors()) {
37*795d594fSAndroid Build Coastguard Worker if (!loop_info->Contains(*successor)) {
38*795d594fSAndroid Build Coastguard Worker analysis_results->exits_num_++;
39*795d594fSAndroid Build Coastguard Worker
40*795d594fSAndroid Build Coastguard Worker // We track number of invariant loop exits which correspond to HIf instruction and
41*795d594fSAndroid Build Coastguard Worker // can be eliminated by loop peeling; other control flow instruction are ignored and will
42*795d594fSAndroid Build Coastguard Worker // not cause loop peeling to happen as they either cannot be inside a loop, or by
43*795d594fSAndroid Build Coastguard Worker // definition cannot be loop exits (unconditional instructions), or are not beneficial for
44*795d594fSAndroid Build Coastguard Worker // the optimization.
45*795d594fSAndroid Build Coastguard Worker HIf* hif = block->GetLastInstruction()->AsIfOrNull();
46*795d594fSAndroid Build Coastguard Worker if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
47*795d594fSAndroid Build Coastguard Worker analysis_results->invariant_exits_num_++;
48*795d594fSAndroid Build Coastguard Worker }
49*795d594fSAndroid Build Coastguard Worker }
50*795d594fSAndroid Build Coastguard Worker }
51*795d594fSAndroid Build Coastguard Worker
52*795d594fSAndroid Build Coastguard Worker for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
53*795d594fSAndroid Build Coastguard Worker HInstruction* instruction = it.Current();
54*795d594fSAndroid Build Coastguard Worker if (it.Current()->GetType() == DataType::Type::kInt64) {
55*795d594fSAndroid Build Coastguard Worker analysis_results->has_long_type_instructions_ = true;
56*795d594fSAndroid Build Coastguard Worker }
57*795d594fSAndroid Build Coastguard Worker if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) {
58*795d594fSAndroid Build Coastguard Worker analysis_results->has_instructions_preventing_scalar_peeling_ = true;
59*795d594fSAndroid Build Coastguard Worker analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
60*795d594fSAndroid Build Coastguard Worker }
61*795d594fSAndroid Build Coastguard Worker analysis_results->instr_num_++;
62*795d594fSAndroid Build Coastguard Worker }
63*795d594fSAndroid Build Coastguard Worker analysis_results->bb_num_++;
64*795d594fSAndroid Build Coastguard Worker }
65*795d594fSAndroid Build Coastguard Worker }
66*795d594fSAndroid Build Coastguard Worker
GetLoopTripCount(HLoopInformation * loop_info,const InductionVarRange * induction_range)67*795d594fSAndroid Build Coastguard Worker int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info,
68*795d594fSAndroid Build Coastguard Worker const InductionVarRange* induction_range) {
69*795d594fSAndroid Build Coastguard Worker int64_t trip_count;
70*795d594fSAndroid Build Coastguard Worker if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) {
71*795d594fSAndroid Build Coastguard Worker trip_count = LoopAnalysisInfo::kUnknownTripCount;
72*795d594fSAndroid Build Coastguard Worker }
73*795d594fSAndroid Build Coastguard Worker return trip_count;
74*795d594fSAndroid Build Coastguard Worker }
75*795d594fSAndroid Build Coastguard Worker
76*795d594fSAndroid Build Coastguard Worker // Default implementation of loop helper; used for all targets unless a custom implementation
77*795d594fSAndroid Build Coastguard Worker // is provided. Enables scalar loop peeling and unrolling with the most conservative heuristics.
78*795d594fSAndroid Build Coastguard Worker class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper {
79*795d594fSAndroid Build Coastguard Worker public:
ArchDefaultLoopHelper(const CodeGenerator & codegen)80*795d594fSAndroid Build Coastguard Worker explicit ArchDefaultLoopHelper(const CodeGenerator& codegen) : ArchNoOptsLoopHelper(codegen) {}
81*795d594fSAndroid Build Coastguard Worker // Scalar loop unrolling parameters and heuristics.
82*795d594fSAndroid Build Coastguard Worker //
83*795d594fSAndroid Build Coastguard Worker // Maximum possible unrolling factor.
84*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kScalarMaxUnrollFactor = 2;
85*795d594fSAndroid Build Coastguard Worker // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
86*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kScalarHeuristicMaxBodySizeInstr = 17;
87*795d594fSAndroid Build Coastguard Worker // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
88*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
89*795d594fSAndroid Build Coastguard Worker // Maximum number of instructions to be created as a result of full unrolling.
90*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kScalarHeuristicFullyUnrolledMaxInstrThreshold = 35;
91*795d594fSAndroid Build Coastguard Worker
IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo * analysis_info) const92*795d594fSAndroid Build Coastguard Worker bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const override {
93*795d594fSAndroid Build Coastguard Worker return analysis_info->HasLongTypeInstructions() ||
94*795d594fSAndroid Build Coastguard Worker IsLoopTooBig(analysis_info,
95*795d594fSAndroid Build Coastguard Worker kScalarHeuristicMaxBodySizeInstr,
96*795d594fSAndroid Build Coastguard Worker kScalarHeuristicMaxBodySizeBlocks);
97*795d594fSAndroid Build Coastguard Worker }
98*795d594fSAndroid Build Coastguard Worker
GetScalarUnrollingFactor(const LoopAnalysisInfo * analysis_info) const99*795d594fSAndroid Build Coastguard Worker uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const override {
100*795d594fSAndroid Build Coastguard Worker int64_t trip_count = analysis_info->GetTripCount();
101*795d594fSAndroid Build Coastguard Worker // Unroll only loops with known trip count.
102*795d594fSAndroid Build Coastguard Worker if (trip_count == LoopAnalysisInfo::kUnknownTripCount) {
103*795d594fSAndroid Build Coastguard Worker return LoopAnalysisInfo::kNoUnrollingFactor;
104*795d594fSAndroid Build Coastguard Worker }
105*795d594fSAndroid Build Coastguard Worker uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor;
106*795d594fSAndroid Build Coastguard Worker if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
107*795d594fSAndroid Build Coastguard Worker return LoopAnalysisInfo::kNoUnrollingFactor;
108*795d594fSAndroid Build Coastguard Worker }
109*795d594fSAndroid Build Coastguard Worker
110*795d594fSAndroid Build Coastguard Worker return desired_unrolling_factor;
111*795d594fSAndroid Build Coastguard Worker }
112*795d594fSAndroid Build Coastguard Worker
IsLoopPeelingEnabled() const113*795d594fSAndroid Build Coastguard Worker bool IsLoopPeelingEnabled() const override { return true; }
114*795d594fSAndroid Build Coastguard Worker
IsFullUnrollingBeneficial(LoopAnalysisInfo * analysis_info) const115*795d594fSAndroid Build Coastguard Worker bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info) const override {
116*795d594fSAndroid Build Coastguard Worker int64_t trip_count = analysis_info->GetTripCount();
117*795d594fSAndroid Build Coastguard Worker // We assume that trip count is known.
118*795d594fSAndroid Build Coastguard Worker DCHECK_NE(trip_count, LoopAnalysisInfo::kUnknownTripCount);
119*795d594fSAndroid Build Coastguard Worker size_t instr_num = analysis_info->GetNumberOfInstructions();
120*795d594fSAndroid Build Coastguard Worker return (trip_count * instr_num < kScalarHeuristicFullyUnrolledMaxInstrThreshold);
121*795d594fSAndroid Build Coastguard Worker }
122*795d594fSAndroid Build Coastguard Worker
123*795d594fSAndroid Build Coastguard Worker protected:
IsLoopTooBig(LoopAnalysisInfo * loop_analysis_info,size_t instr_threshold,size_t bb_threshold) const124*795d594fSAndroid Build Coastguard Worker bool IsLoopTooBig(LoopAnalysisInfo* loop_analysis_info,
125*795d594fSAndroid Build Coastguard Worker size_t instr_threshold,
126*795d594fSAndroid Build Coastguard Worker size_t bb_threshold) const {
127*795d594fSAndroid Build Coastguard Worker size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
128*795d594fSAndroid Build Coastguard Worker size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
129*795d594fSAndroid Build Coastguard Worker return (instr_num >= instr_threshold || bb_num >= bb_threshold);
130*795d594fSAndroid Build Coastguard Worker }
131*795d594fSAndroid Build Coastguard Worker };
132*795d594fSAndroid Build Coastguard Worker
133*795d594fSAndroid Build Coastguard Worker // Custom implementation of loop helper for arm64 target. Enables heuristics for scalar loop
134*795d594fSAndroid Build Coastguard Worker // peeling and unrolling and supports SIMD loop unrolling.
135*795d594fSAndroid Build Coastguard Worker class Arm64LoopHelper : public ArchDefaultLoopHelper {
136*795d594fSAndroid Build Coastguard Worker public:
Arm64LoopHelper(const CodeGenerator & codegen)137*795d594fSAndroid Build Coastguard Worker explicit Arm64LoopHelper(const CodeGenerator& codegen) : ArchDefaultLoopHelper(codegen) {}
138*795d594fSAndroid Build Coastguard Worker // SIMD loop unrolling parameters and heuristics.
139*795d594fSAndroid Build Coastguard Worker //
140*795d594fSAndroid Build Coastguard Worker // Maximum possible unrolling factor.
141*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
142*795d594fSAndroid Build Coastguard Worker // Loop's maximum instruction count. Loops with higher count will not be unrolled.
143*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
144*795d594fSAndroid Build Coastguard Worker
145*795d594fSAndroid Build Coastguard Worker // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
146*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
147*795d594fSAndroid Build Coastguard Worker // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
148*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
149*795d594fSAndroid Build Coastguard Worker
IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo * loop_analysis_info) const150*795d594fSAndroid Build Coastguard Worker bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const override {
151*795d594fSAndroid Build Coastguard Worker return IsLoopTooBig(loop_analysis_info,
152*795d594fSAndroid Build Coastguard Worker kArm64ScalarHeuristicMaxBodySizeInstr,
153*795d594fSAndroid Build Coastguard Worker kArm64ScalarHeuristicMaxBodySizeBlocks);
154*795d594fSAndroid Build Coastguard Worker }
155*795d594fSAndroid Build Coastguard Worker
GetSIMDUnrollingFactor(HBasicBlock * block,int64_t trip_count,uint32_t max_peel,uint32_t vector_length) const156*795d594fSAndroid Build Coastguard Worker uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
157*795d594fSAndroid Build Coastguard Worker int64_t trip_count,
158*795d594fSAndroid Build Coastguard Worker uint32_t max_peel,
159*795d594fSAndroid Build Coastguard Worker uint32_t vector_length) const override {
160*795d594fSAndroid Build Coastguard Worker // Don't unroll with insufficient iterations.
161*795d594fSAndroid Build Coastguard Worker // TODO: Unroll loops with unknown trip count.
162*795d594fSAndroid Build Coastguard Worker DCHECK_NE(vector_length, 0u);
163*795d594fSAndroid Build Coastguard Worker // TODO: Unroll loops in predicated vectorization mode.
164*795d594fSAndroid Build Coastguard Worker if (codegen_.SupportsPredicatedSIMD()) {
165*795d594fSAndroid Build Coastguard Worker return LoopAnalysisInfo::kNoUnrollingFactor;
166*795d594fSAndroid Build Coastguard Worker }
167*795d594fSAndroid Build Coastguard Worker if (trip_count < (2 * vector_length + max_peel)) {
168*795d594fSAndroid Build Coastguard Worker return LoopAnalysisInfo::kNoUnrollingFactor;
169*795d594fSAndroid Build Coastguard Worker }
170*795d594fSAndroid Build Coastguard Worker // Don't unroll for large loop body size.
171*795d594fSAndroid Build Coastguard Worker uint32_t instruction_count = block->GetInstructions().CountSize();
172*795d594fSAndroid Build Coastguard Worker if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
173*795d594fSAndroid Build Coastguard Worker return LoopAnalysisInfo::kNoUnrollingFactor;
174*795d594fSAndroid Build Coastguard Worker }
175*795d594fSAndroid Build Coastguard Worker // Find a beneficial unroll factor with the following restrictions:
176*795d594fSAndroid Build Coastguard Worker // - At least one iteration of the transformed loop should be executed.
177*795d594fSAndroid Build Coastguard Worker // - The loop body shouldn't be "too big" (heuristic).
178*795d594fSAndroid Build Coastguard Worker
179*795d594fSAndroid Build Coastguard Worker uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count;
180*795d594fSAndroid Build Coastguard Worker uint32_t uf2 = (trip_count - max_peel) / vector_length;
181*795d594fSAndroid Build Coastguard Worker uint32_t unroll_factor =
182*795d594fSAndroid Build Coastguard Worker TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor}));
183*795d594fSAndroid Build Coastguard Worker DCHECK_GE(unroll_factor, 1u);
184*795d594fSAndroid Build Coastguard Worker return unroll_factor;
185*795d594fSAndroid Build Coastguard Worker }
186*795d594fSAndroid Build Coastguard Worker };
187*795d594fSAndroid Build Coastguard Worker
188*795d594fSAndroid Build Coastguard Worker // Custom implementation of loop helper for X86_64 target. Enables heuristics for scalar loop
189*795d594fSAndroid Build Coastguard Worker // peeling and unrolling and supports SIMD loop unrolling.
190*795d594fSAndroid Build Coastguard Worker class X86_64LoopHelper : public ArchDefaultLoopHelper {
191*795d594fSAndroid Build Coastguard Worker // mapping of machine instruction count for most used IR instructions
192*795d594fSAndroid Build Coastguard Worker // Few IRs generate different number of instructions based on input and result type.
193*795d594fSAndroid Build Coastguard Worker // We checked top java apps, benchmarks and used the most generated instruction count.
GetMachineInstructionCount(HInstruction * inst) const194*795d594fSAndroid Build Coastguard Worker uint32_t GetMachineInstructionCount(HInstruction* inst) const {
195*795d594fSAndroid Build Coastguard Worker switch (inst->GetKind()) {
196*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kAbs:
197*795d594fSAndroid Build Coastguard Worker return 3;
198*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kAdd:
199*795d594fSAndroid Build Coastguard Worker return 1;
200*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kAnd:
201*795d594fSAndroid Build Coastguard Worker return 1;
202*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kArrayLength:
203*795d594fSAndroid Build Coastguard Worker return 1;
204*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kArrayGet:
205*795d594fSAndroid Build Coastguard Worker return 1;
206*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kArraySet:
207*795d594fSAndroid Build Coastguard Worker return 1;
208*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kBoundsCheck:
209*795d594fSAndroid Build Coastguard Worker return 2;
210*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kCheckCast:
211*795d594fSAndroid Build Coastguard Worker return 9;
212*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kDiv:
213*795d594fSAndroid Build Coastguard Worker return 8;
214*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kDivZeroCheck:
215*795d594fSAndroid Build Coastguard Worker return 2;
216*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kEqual:
217*795d594fSAndroid Build Coastguard Worker return 3;
218*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kGreaterThan:
219*795d594fSAndroid Build Coastguard Worker return 3;
220*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kGreaterThanOrEqual:
221*795d594fSAndroid Build Coastguard Worker return 3;
222*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kIf:
223*795d594fSAndroid Build Coastguard Worker return 2;
224*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kInstanceFieldGet:
225*795d594fSAndroid Build Coastguard Worker return 2;
226*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kInstanceFieldSet:
227*795d594fSAndroid Build Coastguard Worker return 1;
228*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kLessThan:
229*795d594fSAndroid Build Coastguard Worker return 3;
230*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kLessThanOrEqual:
231*795d594fSAndroid Build Coastguard Worker return 3;
232*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kMax:
233*795d594fSAndroid Build Coastguard Worker return 2;
234*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kMin:
235*795d594fSAndroid Build Coastguard Worker return 2;
236*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kMul:
237*795d594fSAndroid Build Coastguard Worker return 1;
238*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kNotEqual:
239*795d594fSAndroid Build Coastguard Worker return 3;
240*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kOr:
241*795d594fSAndroid Build Coastguard Worker return 1;
242*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kRem:
243*795d594fSAndroid Build Coastguard Worker return 11;
244*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kSelect:
245*795d594fSAndroid Build Coastguard Worker return 2;
246*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kShl:
247*795d594fSAndroid Build Coastguard Worker return 1;
248*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kShr:
249*795d594fSAndroid Build Coastguard Worker return 1;
250*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kSub:
251*795d594fSAndroid Build Coastguard Worker return 1;
252*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kTypeConversion:
253*795d594fSAndroid Build Coastguard Worker return 1;
254*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kUShr:
255*795d594fSAndroid Build Coastguard Worker return 1;
256*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecReplicateScalar:
257*795d594fSAndroid Build Coastguard Worker return 2;
258*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecExtractScalar:
259*795d594fSAndroid Build Coastguard Worker return 1;
260*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecReduce:
261*795d594fSAndroid Build Coastguard Worker return 4;
262*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecNeg:
263*795d594fSAndroid Build Coastguard Worker return 2;
264*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecAbs:
265*795d594fSAndroid Build Coastguard Worker return 4;
266*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecNot:
267*795d594fSAndroid Build Coastguard Worker return 3;
268*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecAdd:
269*795d594fSAndroid Build Coastguard Worker return 1;
270*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecSub:
271*795d594fSAndroid Build Coastguard Worker return 1;
272*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecMul:
273*795d594fSAndroid Build Coastguard Worker return 1;
274*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecDiv:
275*795d594fSAndroid Build Coastguard Worker return 1;
276*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecMax:
277*795d594fSAndroid Build Coastguard Worker return 1;
278*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecMin:
279*795d594fSAndroid Build Coastguard Worker return 1;
280*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecOr:
281*795d594fSAndroid Build Coastguard Worker return 1;
282*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecXor:
283*795d594fSAndroid Build Coastguard Worker return 1;
284*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecShl:
285*795d594fSAndroid Build Coastguard Worker return 1;
286*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecShr:
287*795d594fSAndroid Build Coastguard Worker return 1;
288*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecLoad:
289*795d594fSAndroid Build Coastguard Worker return 1;
290*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kVecStore:
291*795d594fSAndroid Build Coastguard Worker return 1;
292*795d594fSAndroid Build Coastguard Worker case HInstruction::InstructionKind::kXor:
293*795d594fSAndroid Build Coastguard Worker return 1;
294*795d594fSAndroid Build Coastguard Worker default:
295*795d594fSAndroid Build Coastguard Worker return 1;
296*795d594fSAndroid Build Coastguard Worker }
297*795d594fSAndroid Build Coastguard Worker }
298*795d594fSAndroid Build Coastguard Worker
299*795d594fSAndroid Build Coastguard Worker // Maximum possible unrolling factor.
300*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kX86_64MaxUnrollFactor = 2; // pow(2,2) = 4
301*795d594fSAndroid Build Coastguard Worker
302*795d594fSAndroid Build Coastguard Worker // According to Intel® 64 and IA-32 Architectures Optimization Reference Manual,
303*795d594fSAndroid Build Coastguard Worker // avoid excessive loop unrolling to ensure LSD (loop stream decoder) is operating efficiently.
304*795d594fSAndroid Build Coastguard Worker // This variable takes care that unrolled loop instructions should not exceed LSD size.
305*795d594fSAndroid Build Coastguard Worker // For Intel Atom processors (silvermont & goldmont), LSD size is 28
306*795d594fSAndroid Build Coastguard Worker // TODO - identify architecture and LSD size at runtime
307*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kX86_64UnrolledMaxBodySizeInstr = 28;
308*795d594fSAndroid Build Coastguard Worker
309*795d594fSAndroid Build Coastguard Worker // Loop's maximum basic block count. Loops with higher count will not be partial
310*795d594fSAndroid Build Coastguard Worker // unrolled (unknown iterations).
311*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t kX86_64UnknownIterMaxBodySizeBlocks = 2;
312*795d594fSAndroid Build Coastguard Worker
313*795d594fSAndroid Build Coastguard Worker uint32_t GetUnrollingFactor(HLoopInformation* loop_info, HBasicBlock* header) const;
314*795d594fSAndroid Build Coastguard Worker
315*795d594fSAndroid Build Coastguard Worker public:
X86_64LoopHelper(const CodeGenerator & codegen)316*795d594fSAndroid Build Coastguard Worker explicit X86_64LoopHelper(const CodeGenerator& codegen) : ArchDefaultLoopHelper(codegen) {}
317*795d594fSAndroid Build Coastguard Worker
GetSIMDUnrollingFactor(HBasicBlock * block,int64_t trip_count,uint32_t max_peel,uint32_t vector_length) const318*795d594fSAndroid Build Coastguard Worker uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
319*795d594fSAndroid Build Coastguard Worker int64_t trip_count,
320*795d594fSAndroid Build Coastguard Worker uint32_t max_peel,
321*795d594fSAndroid Build Coastguard Worker uint32_t vector_length) const override {
322*795d594fSAndroid Build Coastguard Worker DCHECK_NE(vector_length, 0u);
323*795d594fSAndroid Build Coastguard Worker HLoopInformation* loop_info = block->GetLoopInformation();
324*795d594fSAndroid Build Coastguard Worker DCHECK(loop_info);
325*795d594fSAndroid Build Coastguard Worker HBasicBlock* header = loop_info->GetHeader();
326*795d594fSAndroid Build Coastguard Worker DCHECK(header);
327*795d594fSAndroid Build Coastguard Worker uint32_t unroll_factor = 0;
328*795d594fSAndroid Build Coastguard Worker
329*795d594fSAndroid Build Coastguard Worker if ((trip_count == 0) || (trip_count == LoopAnalysisInfo::kUnknownTripCount)) {
330*795d594fSAndroid Build Coastguard Worker // Don't unroll for large loop body size.
331*795d594fSAndroid Build Coastguard Worker unroll_factor = GetUnrollingFactor(loop_info, header);
332*795d594fSAndroid Build Coastguard Worker if (unroll_factor <= 1) {
333*795d594fSAndroid Build Coastguard Worker return LoopAnalysisInfo::kNoUnrollingFactor;
334*795d594fSAndroid Build Coastguard Worker }
335*795d594fSAndroid Build Coastguard Worker } else {
336*795d594fSAndroid Build Coastguard Worker // Don't unroll with insufficient iterations.
337*795d594fSAndroid Build Coastguard Worker if (trip_count < (2 * vector_length + max_peel)) {
338*795d594fSAndroid Build Coastguard Worker return LoopAnalysisInfo::kNoUnrollingFactor;
339*795d594fSAndroid Build Coastguard Worker }
340*795d594fSAndroid Build Coastguard Worker
341*795d594fSAndroid Build Coastguard Worker // Don't unroll for large loop body size.
342*795d594fSAndroid Build Coastguard Worker uint32_t unroll_cnt = GetUnrollingFactor(loop_info, header);
343*795d594fSAndroid Build Coastguard Worker if (unroll_cnt <= 1) {
344*795d594fSAndroid Build Coastguard Worker return LoopAnalysisInfo::kNoUnrollingFactor;
345*795d594fSAndroid Build Coastguard Worker }
346*795d594fSAndroid Build Coastguard Worker
347*795d594fSAndroid Build Coastguard Worker // Find a beneficial unroll factor with the following restrictions:
348*795d594fSAndroid Build Coastguard Worker // - At least one iteration of the transformed loop should be executed.
349*795d594fSAndroid Build Coastguard Worker // - The loop body shouldn't be "too big" (heuristic).
350*795d594fSAndroid Build Coastguard Worker uint32_t uf2 = (trip_count - max_peel) / vector_length;
351*795d594fSAndroid Build Coastguard Worker unroll_factor = TruncToPowerOfTwo(std::min(uf2, unroll_cnt));
352*795d594fSAndroid Build Coastguard Worker DCHECK_GE(unroll_factor, 1u);
353*795d594fSAndroid Build Coastguard Worker }
354*795d594fSAndroid Build Coastguard Worker
355*795d594fSAndroid Build Coastguard Worker return unroll_factor;
356*795d594fSAndroid Build Coastguard Worker }
357*795d594fSAndroid Build Coastguard Worker };
358*795d594fSAndroid Build Coastguard Worker
GetUnrollingFactor(HLoopInformation * loop_info,HBasicBlock * header) const359*795d594fSAndroid Build Coastguard Worker uint32_t X86_64LoopHelper::GetUnrollingFactor(HLoopInformation* loop_info,
360*795d594fSAndroid Build Coastguard Worker HBasicBlock* header) const {
361*795d594fSAndroid Build Coastguard Worker uint32_t num_inst = 0, num_inst_header = 0, num_inst_loop_body = 0;
362*795d594fSAndroid Build Coastguard Worker for (HBlocksInLoopIterator it(*loop_info); !it.Done(); it.Advance()) {
363*795d594fSAndroid Build Coastguard Worker HBasicBlock* block = it.Current();
364*795d594fSAndroid Build Coastguard Worker DCHECK(block);
365*795d594fSAndroid Build Coastguard Worker num_inst = 0;
366*795d594fSAndroid Build Coastguard Worker
367*795d594fSAndroid Build Coastguard Worker for (HInstructionIterator it1(block->GetInstructions()); !it1.Done(); it1.Advance()) {
368*795d594fSAndroid Build Coastguard Worker HInstruction* inst = it1.Current();
369*795d594fSAndroid Build Coastguard Worker DCHECK(inst);
370*795d594fSAndroid Build Coastguard Worker
371*795d594fSAndroid Build Coastguard Worker // SuspendCheck inside loop is handled with Goto.
372*795d594fSAndroid Build Coastguard Worker // Ignoring SuspendCheck & Goto as partially unrolled loop body will have only one Goto.
373*795d594fSAndroid Build Coastguard Worker // Instruction count for Goto is being handled during unroll factor calculation below.
374*795d594fSAndroid Build Coastguard Worker if (inst->IsSuspendCheck() || inst->IsGoto()) {
375*795d594fSAndroid Build Coastguard Worker continue;
376*795d594fSAndroid Build Coastguard Worker }
377*795d594fSAndroid Build Coastguard Worker
378*795d594fSAndroid Build Coastguard Worker num_inst += GetMachineInstructionCount(inst);
379*795d594fSAndroid Build Coastguard Worker }
380*795d594fSAndroid Build Coastguard Worker
381*795d594fSAndroid Build Coastguard Worker if (block == header) {
382*795d594fSAndroid Build Coastguard Worker num_inst_header = num_inst;
383*795d594fSAndroid Build Coastguard Worker } else {
384*795d594fSAndroid Build Coastguard Worker num_inst_loop_body += num_inst;
385*795d594fSAndroid Build Coastguard Worker }
386*795d594fSAndroid Build Coastguard Worker }
387*795d594fSAndroid Build Coastguard Worker
388*795d594fSAndroid Build Coastguard Worker // Calculate actual unroll factor.
389*795d594fSAndroid Build Coastguard Worker uint32_t unrolling_factor = kX86_64MaxUnrollFactor;
390*795d594fSAndroid Build Coastguard Worker uint32_t unrolling_inst = kX86_64UnrolledMaxBodySizeInstr;
391*795d594fSAndroid Build Coastguard Worker // "-3" for one Goto instruction.
392*795d594fSAndroid Build Coastguard Worker uint32_t desired_size = unrolling_inst - num_inst_header - 3;
393*795d594fSAndroid Build Coastguard Worker if (desired_size < (2 * num_inst_loop_body)) {
394*795d594fSAndroid Build Coastguard Worker return 1;
395*795d594fSAndroid Build Coastguard Worker }
396*795d594fSAndroid Build Coastguard Worker
397*795d594fSAndroid Build Coastguard Worker while (unrolling_factor > 0) {
398*795d594fSAndroid Build Coastguard Worker if ((desired_size >> unrolling_factor) >= num_inst_loop_body) {
399*795d594fSAndroid Build Coastguard Worker break;
400*795d594fSAndroid Build Coastguard Worker }
401*795d594fSAndroid Build Coastguard Worker unrolling_factor--;
402*795d594fSAndroid Build Coastguard Worker }
403*795d594fSAndroid Build Coastguard Worker
404*795d594fSAndroid Build Coastguard Worker return (1 << unrolling_factor);
405*795d594fSAndroid Build Coastguard Worker }
406*795d594fSAndroid Build Coastguard Worker
Create(const CodeGenerator & codegen,ArenaAllocator * allocator)407*795d594fSAndroid Build Coastguard Worker ArchNoOptsLoopHelper* ArchNoOptsLoopHelper::Create(const CodeGenerator& codegen,
408*795d594fSAndroid Build Coastguard Worker ArenaAllocator* allocator) {
409*795d594fSAndroid Build Coastguard Worker InstructionSet isa = codegen.GetInstructionSet();
410*795d594fSAndroid Build Coastguard Worker switch (isa) {
411*795d594fSAndroid Build Coastguard Worker case InstructionSet::kArm64: {
412*795d594fSAndroid Build Coastguard Worker return new (allocator) Arm64LoopHelper(codegen);
413*795d594fSAndroid Build Coastguard Worker }
414*795d594fSAndroid Build Coastguard Worker case InstructionSet::kX86_64: {
415*795d594fSAndroid Build Coastguard Worker return new (allocator) X86_64LoopHelper(codegen);
416*795d594fSAndroid Build Coastguard Worker }
417*795d594fSAndroid Build Coastguard Worker default: {
418*795d594fSAndroid Build Coastguard Worker return new (allocator) ArchDefaultLoopHelper(codegen);
419*795d594fSAndroid Build Coastguard Worker }
420*795d594fSAndroid Build Coastguard Worker }
421*795d594fSAndroid Build Coastguard Worker }
422*795d594fSAndroid Build Coastguard Worker
423*795d594fSAndroid Build Coastguard Worker } // namespace art
424