1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "android-base/logging.h"
18 #include "base/macros.h"
19 #include "code_generator.h"
20 #include "driver/compiler_options.h"
21 #include "loop_optimization.h"
22 #include "optimizing/data_type.h"
23 #include "optimizing/nodes.h"
24 #include "optimizing_unit_test.h"
25
26 namespace art HIDDEN {
27
28 // Base class for loop optimization tests.
29 class LoopOptimizationTestBase : public OptimizingUnitTest {
30 protected:
SetUp()31 void SetUp() override {
32 OptimizingUnitTest::SetUp();
33
34 BuildGraph();
35 iva_ = new (GetAllocator()) HInductionVarAnalysis(graph_);
36 if (compiler_options_ == nullptr) {
37 compiler_options_ = CommonCompilerTest::CreateCompilerOptions(kRuntimeISA, "default");
38 DCHECK(compiler_options_ != nullptr);
39 }
40 codegen_ = CodeGenerator::Create(graph_, *compiler_options_);
41 DCHECK(codegen_.get() != nullptr);
42 loop_opt_ = new (GetAllocator()) HLoopOptimization(
43 graph_, *codegen_.get(), iva_, /* stats= */ nullptr);
44 }
45
TearDown()46 void TearDown() override {
47 codegen_.reset();
48 compiler_options_.reset();
49 graph_ = nullptr;
50 ResetPoolAndAllocator();
51 OptimizingUnitTest::TearDown();
52 }
53
54 virtual void BuildGraph() = 0;
55
56 // Run loop optimization and optionally check the graph.
PerformAnalysis(bool run_checker)57 void PerformAnalysis(bool run_checker) {
58 graph_->BuildDominatorTree();
59
60 // Check the graph is valid before loop optimization.
61 std::ostringstream oss;
62 if (run_checker) {
63 ASSERT_TRUE(CheckGraph(oss)) << oss.str();
64 }
65
66 iva_->Run();
67 loop_opt_->Run();
68
69 // Check the graph is valid after loop optimization.
70 if (run_checker) {
71 ASSERT_TRUE(CheckGraph(oss)) << oss.str();
72 }
73 }
74
75 // General building fields.
76 std::unique_ptr<CompilerOptions> compiler_options_;
77 std::unique_ptr<CodeGenerator> codegen_;
78 HInductionVarAnalysis* iva_;
79 HLoopOptimization* loop_opt_;
80
81 HBasicBlock* return_block_;
82
83 HInstruction* parameter_;
84 };
85
86 /**
87 * Fixture class for the loop optimization tests. These unit tests mostly focus
88 * on constructing the loop hierarchy. Checker tests are also used to test
89 * specific optimizations.
90 */
91 class LoopOptimizationTest : public LoopOptimizationTestBase {
92 protected:
~LoopOptimizationTest()93 virtual ~LoopOptimizationTest() {}
94
95 /** Constructs bare minimum graph. */
BuildGraph()96 void BuildGraph() override {
97 return_block_ = InitEntryMainExitGraph();
98 graph_->SetNumberOfVRegs(1);
99 parameter_ = MakeParam(DataType::Type::kInt32);
100 }
101
102 /** Adds a loop nest at given position before successor. */
AddLoop(HBasicBlock * position,HBasicBlock * successor)103 HBasicBlock* AddLoop(HBasicBlock* position, HBasicBlock* successor) {
104 HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_);
105 HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_);
106 graph_->AddBlock(header);
107 graph_->AddBlock(body);
108 // Control flow.
109 position->ReplaceSuccessor(successor, header);
110 header->AddSuccessor(body);
111 header->AddSuccessor(successor);
112 MakeIf(header, parameter_);
113 body->AddSuccessor(header);
114 MakeGoto(body);
115 return header;
116 }
117
118 /** Constructs string representation of computed loop hierarchy. */
LoopStructure()119 std::string LoopStructure() {
120 return LoopStructureRecurse(loop_opt_->top_loop_);
121 }
122
123 // Helper method
LoopStructureRecurse(HLoopOptimization::LoopNode * node)124 std::string LoopStructureRecurse(HLoopOptimization::LoopNode* node) {
125 std::string s;
126 for ( ; node != nullptr; node = node->next) {
127 s.append("[");
128 s.append(LoopStructureRecurse(node->inner));
129 s.append("]");
130 }
131 return s;
132 }
133 };
134
135 #ifdef ART_ENABLE_CODEGEN_arm64
136 // Unit tests for predicated vectorization.
137 class PredicatedSimdLoopOptimizationTest : public LoopOptimizationTestBase {
138 protected:
SetUp()139 void SetUp() override {
140 // Predicated SIMD is only supported by SVE on Arm64.
141 compiler_options_ = CommonCompilerTest::CreateCompilerOptions(InstructionSet::kArm64,
142 "default",
143 "sve");
144 LoopOptimizationTestBase::SetUp();
145 }
146
~PredicatedSimdLoopOptimizationTest()147 virtual ~PredicatedSimdLoopOptimizationTest() {}
148
149 // Constructs a graph with a diamond loop which should be vectorizable with predicated
150 // vectorization. This graph includes a basic loop induction (consisting of Phi, Add, If and
151 // SuspendCheck instructions) to control the loop as well as an if comparison (consisting of
152 // Parameter, GreaterThanOrEqual and If instructions) to control the diamond loop.
153 //
154 // entry
155 // |
156 // preheader
157 // |
158 // return <------------ header <----------------+
159 // | | |
160 // exit diamond_top |
161 // / \ |
162 // diamond_true diamond_false |
163 // \ / |
164 // back_edge |
165 // | |
166 // +---------------------+
BuildGraph()167 void BuildGraph() override {
168 return_block_ = InitEntryMainExitGraphWithReturnVoid();
169 HBasicBlock* back_edge;
170 std::tie(std::ignore, header_, back_edge) = CreateWhileLoop(return_block_);
171 std::tie(diamond_top_, diamond_true_, std::ignore) = CreateDiamondPattern(back_edge);
172
173 parameter_ = MakeParam(DataType::Type::kInt32);
174 std::tie(phi_, std::ignore) = MakeLinearLoopVar(header_, back_edge, 0, 1);
175 MakeSuspendCheck(header_);
176 HInstruction* trip = MakeCondition(header_,
177 kCondGE,
178 phi_,
179 graph_->GetIntConstant(kArm64DefaultSVEVectorLength));
180 MakeIf(header_, trip);
181 diamond_hif_ = MakeIf(diamond_top_, parameter_);
182 }
183
184 // Add an ArraySet to the loop which will be vectorized, thus setting the type of vector
185 // instructions in the graph to the given vector_type. This needs to be called to ensure the loop
186 // is not simplified by SimplifyInduction or SimplifyBlocks before vectorization.
AddArraySetToLoop(DataType::Type vector_type)187 void AddArraySetToLoop(DataType::Type vector_type) {
188 // Ensure the data type is a java type so it can be stored in a TypeField. The actual type does
189 // not matter as long as the size is the same so it can still be vectorized.
190 DataType::Type new_type = DataType::SignedIntegralTypeFromSize(DataType::Size(vector_type));
191
192 // Add an array set to prevent the loop from being optimized away before vectorization.
193 // Note: This uses an integer parameter and not an array reference to avoid the difficulties in
194 // allocating an array. The instruction is still treated as a valid ArraySet by loop
195 // optimization.
196 diamond_true_->AddInstruction(new (GetAllocator()) HArraySet(parameter_,
197 phi_,
198 graph_->GetIntConstant(1),
199 new_type,
200 /* dex_pc= */ 0));
201 }
202
203 // Replace the input of diamond_hif_ with a new condition of the given types.
ReplaceIfCondition(DataType::Type l_type,DataType::Type r_type,HBasicBlock * condition_block,IfCondition cond)204 void ReplaceIfCondition(DataType::Type l_type,
205 DataType::Type r_type,
206 HBasicBlock* condition_block,
207 IfCondition cond) {
208 AddArraySetToLoop(l_type);
209 HInstruction* l_param = MakeParam(l_type);
210 HInstruction* r_param = MakeParam(r_type);
211 HCondition* condition = MakeCondition(condition_block, cond, l_param, r_param);
212 diamond_hif_->ReplaceInput(condition, 0);
213 }
214
215 // Is loop optimization able to vectorize predicated code?
IsPredicatedVectorizationSupported()216 bool IsPredicatedVectorizationSupported() {
217 // Mirror the check guarding TryVectorizePredicated in TryOptimizeInnerLoopFinite.
218 return kForceTryPredicatedSIMD && loop_opt_->IsInPredicatedVectorizationMode();
219 }
220
221 HBasicBlock* header_;
222 HBasicBlock* diamond_top_;
223 HBasicBlock* diamond_true_;
224
225 HPhi* phi_;
226 HIf* diamond_hif_;
227 };
228
229 #endif // ART_ENABLE_CODEGEN_arm64
230
231 //
232 // The actual tests.
233 //
234
235 // Loop structure tests can't run the graph checker because they don't create valid graphs.
236
TEST_F(LoopOptimizationTest,NoLoops)237 TEST_F(LoopOptimizationTest, NoLoops) {
238 PerformAnalysis(/*run_checker=*/ false);
239 EXPECT_EQ("", LoopStructure());
240 }
241
TEST_F(LoopOptimizationTest,SingleLoop)242 TEST_F(LoopOptimizationTest, SingleLoop) {
243 AddLoop(entry_block_, return_block_);
244 PerformAnalysis(/*run_checker=*/ false);
245 EXPECT_EQ("[]", LoopStructure());
246 }
247
TEST_F(LoopOptimizationTest,LoopNest10)248 TEST_F(LoopOptimizationTest, LoopNest10) {
249 HBasicBlock* b = entry_block_;
250 HBasicBlock* s = return_block_;
251 for (int i = 0; i < 10; i++) {
252 s = AddLoop(b, s);
253 b = s->GetSuccessors()[0];
254 }
255 PerformAnalysis(/*run_checker=*/ false);
256 EXPECT_EQ("[[[[[[[[[[]]]]]]]]]]", LoopStructure());
257 }
258
TEST_F(LoopOptimizationTest,LoopSequence10)259 TEST_F(LoopOptimizationTest, LoopSequence10) {
260 HBasicBlock* b = entry_block_;
261 HBasicBlock* s = return_block_;
262 for (int i = 0; i < 10; i++) {
263 b = AddLoop(b, s);
264 s = b->GetSuccessors()[1];
265 }
266 PerformAnalysis(/*run_checker=*/ false);
267 EXPECT_EQ("[][][][][][][][][][]", LoopStructure());
268 }
269
TEST_F(LoopOptimizationTest,LoopSequenceOfNests)270 TEST_F(LoopOptimizationTest, LoopSequenceOfNests) {
271 HBasicBlock* b = entry_block_;
272 HBasicBlock* s = return_block_;
273 for (int i = 0; i < 10; i++) {
274 b = AddLoop(b, s);
275 s = b->GetSuccessors()[1];
276 HBasicBlock* bi = b->GetSuccessors()[0];
277 HBasicBlock* si = b;
278 for (int j = 0; j < i; j++) {
279 si = AddLoop(bi, si);
280 bi = si->GetSuccessors()[0];
281 }
282 }
283 PerformAnalysis(/*run_checker=*/ false);
284 EXPECT_EQ("[]"
285 "[[]]"
286 "[[[]]]"
287 "[[[[]]]]"
288 "[[[[[]]]]]"
289 "[[[[[[]]]]]]"
290 "[[[[[[[]]]]]]]"
291 "[[[[[[[[]]]]]]]]"
292 "[[[[[[[[[]]]]]]]]]"
293 "[[[[[[[[[[]]]]]]]]]]",
294 LoopStructure());
295 }
296
TEST_F(LoopOptimizationTest,LoopNestWithSequence)297 TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
298 HBasicBlock* b = entry_block_;
299 HBasicBlock* s = return_block_;
300 for (int i = 0; i < 10; i++) {
301 s = AddLoop(b, s);
302 b = s->GetSuccessors()[0];
303 }
304 b = s;
305 s = b->GetSuccessors()[1];
306 for (int i = 0; i < 9; i++) {
307 b = AddLoop(b, s);
308 s = b->GetSuccessors()[1];
309 }
310 PerformAnalysis(/*run_checker=*/ false);
311 EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure());
312 }
313
314 // Check that SimplifyLoop() doesn't invalidate data flow when ordering loop headers'
315 // predecessors.
316 //
317 // This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
TEST_F(LoopOptimizationTest,SimplifyLoopReoderPredecessors)318 TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
319 // Can't use AddLoop as we want special order for blocks predecessors.
320 HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_);
321 HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_);
322 graph_->AddBlock(header);
323 graph_->AddBlock(body);
324
325 // Control flow: make a loop back edge first in the list of predecessors.
326 entry_block_->RemoveSuccessor(return_block_);
327 body->AddSuccessor(header);
328 entry_block_->AddSuccessor(header);
329 header->AddSuccessor(body);
330 header->AddSuccessor(return_block_);
331 DCHECK(header->GetSuccessors()[1] == return_block_);
332
333 // Data flow.
334 MakeIf(header, parameter_);
335 MakeGoto(body);
336
337 HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
338 header->AddPhi(phi);
339 HInstruction* add = MakeBinOp<HAdd>(body, DataType::Type::kInt32, phi, parameter_);
340
341 phi->AddInput(add);
342 phi->AddInput(parameter_);
343
344 graph_->ClearLoopInformation();
345 graph_->ClearDominanceInformation();
346 graph_->BuildDominatorTree();
347
348 // BuildDominatorTree inserts a block beetween loop header and entry block.
349 EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_);
350
351 // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs
352 // are still mapped correctly to the block predecessors.
353 for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
354 HInstruction* input = phi->InputAt(i);
355 EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
356 }
357 }
358
359 // Test that SimplifyLoop() processes the multiple-preheaders loops correctly.
360 //
361 // This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
TEST_F(LoopOptimizationTest,SimplifyLoopSinglePreheader)362 TEST_F(LoopOptimizationTest, SimplifyLoopSinglePreheader) {
363 HBasicBlock* header = AddLoop(entry_block_, return_block_);
364
365 header->InsertInstructionBefore(
366 new (GetAllocator()) HSuspendCheck(), header->GetLastInstruction());
367
368 // Insert an if construct before the loop so it will have two preheaders.
369 HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_);
370 HBasicBlock* preheader0 = new (GetAllocator()) HBasicBlock(graph_);
371 HBasicBlock* preheader1 = new (GetAllocator()) HBasicBlock(graph_);
372
373 graph_->AddBlock(if_block);
374 graph_->AddBlock(preheader0);
375 graph_->AddBlock(preheader1);
376
377 // Fix successors/predecessors.
378 entry_block_->ReplaceSuccessor(header, if_block);
379 if_block->AddSuccessor(preheader0);
380 if_block->AddSuccessor(preheader1);
381 preheader0->AddSuccessor(header);
382 preheader1->AddSuccessor(header);
383
384 MakeIf(if_block, parameter_);
385 MakeGoto(preheader0);
386 MakeGoto(preheader1);
387
388 HBasicBlock* body = header->GetSuccessors()[0];
389 DCHECK(body != return_block_);
390
391 // Add some data flow.
392 HIntConstant* const_0 = graph_->GetIntConstant(0);
393 HIntConstant* const_1 = graph_->GetIntConstant(1);
394 HIntConstant* const_2 = graph_->GetIntConstant(2);
395
396 HAdd* preheader0_add = MakeBinOp<HAdd>(preheader0, DataType::Type::kInt32, parameter_, const_0);
397 HAdd* preheader1_add = MakeBinOp<HAdd>(preheader1, DataType::Type::kInt32, parameter_, const_1);
398
399 HPhi* header_phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
400 header->AddPhi(header_phi);
401
402 HAdd* body_add = MakeBinOp<HAdd>(body, DataType::Type::kInt32, parameter_, const_2);
403
404 DCHECK(header->GetPredecessors()[0] == body);
405 DCHECK(header->GetPredecessors()[1] == preheader0);
406 DCHECK(header->GetPredecessors()[2] == preheader1);
407
408 header_phi->AddInput(body_add);
409 header_phi->AddInput(preheader0_add);
410 header_phi->AddInput(preheader1_add);
411
412 graph_->ClearLoopInformation();
413 graph_->ClearDominanceInformation();
414 graph_->BuildDominatorTree();
415
416 EXPECT_EQ(header->GetPredecessors().size(), 2u);
417 EXPECT_EQ(header->GetPredecessors()[1], body);
418
419 HBasicBlock* new_preheader = header->GetLoopInformation()->GetPreHeader();
420 EXPECT_EQ(preheader0->GetSingleSuccessor(), new_preheader);
421 EXPECT_EQ(preheader1->GetSingleSuccessor(), new_preheader);
422
423 EXPECT_EQ(new_preheader->GetPhis().CountSize(), 1u);
424 HPhi* new_preheader_phi = new_preheader->GetFirstPhi()->AsPhi();
425 EXPECT_EQ(new_preheader_phi->InputCount(), 2u);
426 EXPECT_EQ(new_preheader_phi->InputAt(0), preheader0_add);
427 EXPECT_EQ(new_preheader_phi->InputAt(1), preheader1_add);
428
429 EXPECT_EQ(header_phi->InputCount(), 2u);
430 EXPECT_EQ(header_phi->InputAt(0), new_preheader_phi);
431 EXPECT_EQ(header_phi->InputAt(1), body_add);
432 }
433
434 #ifdef ART_ENABLE_CODEGEN_arm64
435 #define FOR_EACH_CONDITION_INSTRUCTION(M, CondType) \
436 M(EQ, CondType) \
437 M(NE, CondType) \
438 M(LT, CondType) \
439 M(LE, CondType) \
440 M(GT, CondType) \
441 M(GE, CondType) \
442 M(B, CondType) \
443 M(BE, CondType) \
444 M(A, CondType) \
445 M(AE, CondType)
446
447 // Define tests ensuring that all types of conditions can be handled in predicated vectorization
448 // for diamond loops.
449 #define DEFINE_CONDITION_TESTS(Name, CondType) \
450 TEST_F(PredicatedSimdLoopOptimizationTest, VectorizeCondition##Name##CondType) { \
451 if (!IsPredicatedVectorizationSupported()) { \
452 GTEST_SKIP() << "Predicated SIMD is not enabled."; \
453 } \
454 ReplaceIfCondition(DataType::Type::k##CondType, \
455 DataType::Type::k##CondType, \
456 diamond_top_, \
457 kCond##Name); \
458 PerformAnalysis(/*run_checker=*/ true); \
459 EXPECT_TRUE(graph_->HasPredicatedSIMD()); \
460 }
461 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Uint8)
462 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Int8)
463 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Uint16)
464 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Int16)
465 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Int32)
466 #undef DEFINE_CONDITION_TESTS
467 #undef FOR_EACH_CONDITION_INSTRUCTION
468 #endif // ART_ENABLE_CODEGEN_arm64
469
470 } // namespace art
471