xref: /aosp_15_r20/art/compiler/optimizing/loop_optimization_test.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "android-base/logging.h"
18 #include "base/macros.h"
19 #include "code_generator.h"
20 #include "driver/compiler_options.h"
21 #include "loop_optimization.h"
22 #include "optimizing/data_type.h"
23 #include "optimizing/nodes.h"
24 #include "optimizing_unit_test.h"
25 
26 namespace art HIDDEN {
27 
28 // Base class for loop optimization tests.
29 class LoopOptimizationTestBase : public OptimizingUnitTest {
30  protected:
SetUp()31   void SetUp() override {
32     OptimizingUnitTest::SetUp();
33 
34     BuildGraph();
35     iva_  = new (GetAllocator()) HInductionVarAnalysis(graph_);
36     if (compiler_options_ == nullptr) {
37       compiler_options_ = CommonCompilerTest::CreateCompilerOptions(kRuntimeISA, "default");
38       DCHECK(compiler_options_ != nullptr);
39     }
40     codegen_ = CodeGenerator::Create(graph_, *compiler_options_);
41     DCHECK(codegen_.get() != nullptr);
42     loop_opt_ = new (GetAllocator()) HLoopOptimization(
43         graph_, *codegen_.get(), iva_, /* stats= */ nullptr);
44   }
45 
TearDown()46   void TearDown() override {
47     codegen_.reset();
48     compiler_options_.reset();
49     graph_ = nullptr;
50     ResetPoolAndAllocator();
51     OptimizingUnitTest::TearDown();
52   }
53 
54   virtual void BuildGraph() = 0;
55 
56   // Run loop optimization and optionally check the graph.
PerformAnalysis(bool run_checker)57   void PerformAnalysis(bool run_checker) {
58     graph_->BuildDominatorTree();
59 
60     // Check the graph is valid before loop optimization.
61     std::ostringstream oss;
62     if (run_checker) {
63       ASSERT_TRUE(CheckGraph(oss)) << oss.str();
64     }
65 
66     iva_->Run();
67     loop_opt_->Run();
68 
69     // Check the graph is valid after loop optimization.
70     if (run_checker) {
71       ASSERT_TRUE(CheckGraph(oss)) << oss.str();
72     }
73   }
74 
75   // General building fields.
76   std::unique_ptr<CompilerOptions> compiler_options_;
77   std::unique_ptr<CodeGenerator> codegen_;
78   HInductionVarAnalysis* iva_;
79   HLoopOptimization* loop_opt_;
80 
81   HBasicBlock* return_block_;
82 
83   HInstruction* parameter_;
84 };
85 
86 /**
87  * Fixture class for the loop optimization tests. These unit tests mostly focus
88  * on constructing the loop hierarchy. Checker tests are also used to test
89  * specific optimizations.
90  */
91 class LoopOptimizationTest : public LoopOptimizationTestBase {
92  protected:
~LoopOptimizationTest()93   virtual ~LoopOptimizationTest() {}
94 
95   /** Constructs bare minimum graph. */
BuildGraph()96   void BuildGraph() override {
97     return_block_ = InitEntryMainExitGraph();
98     graph_->SetNumberOfVRegs(1);
99     parameter_ = MakeParam(DataType::Type::kInt32);
100   }
101 
102   /** Adds a loop nest at given position before successor. */
AddLoop(HBasicBlock * position,HBasicBlock * successor)103   HBasicBlock* AddLoop(HBasicBlock* position, HBasicBlock* successor) {
104     HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_);
105     HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_);
106     graph_->AddBlock(header);
107     graph_->AddBlock(body);
108     // Control flow.
109     position->ReplaceSuccessor(successor, header);
110     header->AddSuccessor(body);
111     header->AddSuccessor(successor);
112     MakeIf(header, parameter_);
113     body->AddSuccessor(header);
114     MakeGoto(body);
115     return header;
116   }
117 
118   /** Constructs string representation of computed loop hierarchy. */
LoopStructure()119   std::string LoopStructure() {
120     return LoopStructureRecurse(loop_opt_->top_loop_);
121   }
122 
123   // Helper method
LoopStructureRecurse(HLoopOptimization::LoopNode * node)124   std::string LoopStructureRecurse(HLoopOptimization::LoopNode* node) {
125     std::string s;
126     for ( ; node != nullptr; node = node->next) {
127       s.append("[");
128       s.append(LoopStructureRecurse(node->inner));
129       s.append("]");
130     }
131     return s;
132   }
133 };
134 
135 #ifdef ART_ENABLE_CODEGEN_arm64
136 // Unit tests for predicated vectorization.
137 class PredicatedSimdLoopOptimizationTest : public LoopOptimizationTestBase {
138  protected:
SetUp()139   void SetUp() override {
140     // Predicated SIMD is only supported by SVE on Arm64.
141     compiler_options_ = CommonCompilerTest::CreateCompilerOptions(InstructionSet::kArm64,
142                                                                   "default",
143                                                                   "sve");
144     LoopOptimizationTestBase::SetUp();
145   }
146 
~PredicatedSimdLoopOptimizationTest()147   virtual ~PredicatedSimdLoopOptimizationTest() {}
148 
149   // Constructs a graph with a diamond loop which should be vectorizable with predicated
150   // vectorization. This graph includes a basic loop induction (consisting of Phi, Add, If and
151   // SuspendCheck instructions) to control the loop as well as an if comparison (consisting of
152   // Parameter, GreaterThanOrEqual and If instructions) to control the diamond loop.
153   //
154   //                       entry
155   //                         |
156   //                      preheader
157   //                         |
158   //  return <------------ header <----------------+
159   //     |                   |                     |
160   //   exit             diamond_top                |
161   //                       /   \                   |
162   //            diamond_true  diamond_false        |
163   //                       \   /                   |
164   //                     back_edge                 |
165   //                         |                     |
166   //                         +---------------------+
BuildGraph()167   void BuildGraph() override {
168     return_block_ = InitEntryMainExitGraphWithReturnVoid();
169     HBasicBlock* back_edge;
170     std::tie(std::ignore, header_, back_edge) = CreateWhileLoop(return_block_);
171     std::tie(diamond_top_, diamond_true_, std::ignore) = CreateDiamondPattern(back_edge);
172 
173     parameter_ = MakeParam(DataType::Type::kInt32);
174     std::tie(phi_, std::ignore) = MakeLinearLoopVar(header_, back_edge, 0, 1);
175     MakeSuspendCheck(header_);
176     HInstruction* trip = MakeCondition(header_,
177                                        kCondGE,
178                                        phi_,
179                                        graph_->GetIntConstant(kArm64DefaultSVEVectorLength));
180     MakeIf(header_, trip);
181     diamond_hif_ = MakeIf(diamond_top_, parameter_);
182   }
183 
184   // Add an ArraySet to the loop which will be vectorized, thus setting the type of vector
185   // instructions in the graph to the given vector_type. This needs to be called to ensure the loop
186   // is not simplified by SimplifyInduction or SimplifyBlocks before vectorization.
AddArraySetToLoop(DataType::Type vector_type)187   void AddArraySetToLoop(DataType::Type vector_type) {
188     // Ensure the data type is a java type so it can be stored in a TypeField. The actual type does
189     // not matter as long as the size is the same so it can still be vectorized.
190     DataType::Type new_type = DataType::SignedIntegralTypeFromSize(DataType::Size(vector_type));
191 
192     // Add an array set to prevent the loop from being optimized away before vectorization.
193     // Note: This uses an integer parameter and not an array reference to avoid the difficulties in
194     // allocating an array. The instruction is still treated as a valid ArraySet by loop
195     // optimization.
196     diamond_true_->AddInstruction(new (GetAllocator()) HArraySet(parameter_,
197                                                                  phi_,
198                                                                  graph_->GetIntConstant(1),
199                                                                  new_type,
200                                                                  /* dex_pc= */ 0));
201   }
202 
203   // Replace the input of diamond_hif_ with a new condition of the given types.
ReplaceIfCondition(DataType::Type l_type,DataType::Type r_type,HBasicBlock * condition_block,IfCondition cond)204   void ReplaceIfCondition(DataType::Type l_type,
205                           DataType::Type r_type,
206                           HBasicBlock* condition_block,
207                           IfCondition cond) {
208     AddArraySetToLoop(l_type);
209     HInstruction* l_param = MakeParam(l_type);
210     HInstruction* r_param = MakeParam(r_type);
211     HCondition* condition = MakeCondition(condition_block, cond, l_param, r_param);
212     diamond_hif_->ReplaceInput(condition, 0);
213   }
214 
215   // Is loop optimization able to vectorize predicated code?
IsPredicatedVectorizationSupported()216   bool IsPredicatedVectorizationSupported() {
217     // Mirror the check guarding TryVectorizePredicated in TryOptimizeInnerLoopFinite.
218     return kForceTryPredicatedSIMD && loop_opt_->IsInPredicatedVectorizationMode();
219   }
220 
221   HBasicBlock* header_;
222   HBasicBlock* diamond_top_;
223   HBasicBlock* diamond_true_;
224 
225   HPhi* phi_;
226   HIf* diamond_hif_;
227 };
228 
229 #endif  // ART_ENABLE_CODEGEN_arm64
230 
231 //
232 // The actual tests.
233 //
234 
235 // Loop structure tests can't run the graph checker because they don't create valid graphs.
236 
TEST_F(LoopOptimizationTest,NoLoops)237 TEST_F(LoopOptimizationTest, NoLoops) {
238   PerformAnalysis(/*run_checker=*/ false);
239   EXPECT_EQ("", LoopStructure());
240 }
241 
TEST_F(LoopOptimizationTest,SingleLoop)242 TEST_F(LoopOptimizationTest, SingleLoop) {
243   AddLoop(entry_block_, return_block_);
244   PerformAnalysis(/*run_checker=*/ false);
245   EXPECT_EQ("[]", LoopStructure());
246 }
247 
TEST_F(LoopOptimizationTest,LoopNest10)248 TEST_F(LoopOptimizationTest, LoopNest10) {
249   HBasicBlock* b = entry_block_;
250   HBasicBlock* s = return_block_;
251   for (int i = 0; i < 10; i++) {
252     s = AddLoop(b, s);
253     b = s->GetSuccessors()[0];
254   }
255   PerformAnalysis(/*run_checker=*/ false);
256   EXPECT_EQ("[[[[[[[[[[]]]]]]]]]]", LoopStructure());
257 }
258 
TEST_F(LoopOptimizationTest,LoopSequence10)259 TEST_F(LoopOptimizationTest, LoopSequence10) {
260   HBasicBlock* b = entry_block_;
261   HBasicBlock* s = return_block_;
262   for (int i = 0; i < 10; i++) {
263     b = AddLoop(b, s);
264     s = b->GetSuccessors()[1];
265   }
266   PerformAnalysis(/*run_checker=*/ false);
267   EXPECT_EQ("[][][][][][][][][][]", LoopStructure());
268 }
269 
TEST_F(LoopOptimizationTest,LoopSequenceOfNests)270 TEST_F(LoopOptimizationTest, LoopSequenceOfNests) {
271   HBasicBlock* b = entry_block_;
272   HBasicBlock* s = return_block_;
273   for (int i = 0; i < 10; i++) {
274     b = AddLoop(b, s);
275     s = b->GetSuccessors()[1];
276     HBasicBlock* bi = b->GetSuccessors()[0];
277     HBasicBlock* si = b;
278     for (int j = 0; j < i; j++) {
279       si = AddLoop(bi, si);
280       bi = si->GetSuccessors()[0];
281     }
282   }
283   PerformAnalysis(/*run_checker=*/ false);
284   EXPECT_EQ("[]"
285             "[[]]"
286             "[[[]]]"
287             "[[[[]]]]"
288             "[[[[[]]]]]"
289             "[[[[[[]]]]]]"
290             "[[[[[[[]]]]]]]"
291             "[[[[[[[[]]]]]]]]"
292             "[[[[[[[[[]]]]]]]]]"
293             "[[[[[[[[[[]]]]]]]]]]",
294             LoopStructure());
295 }
296 
TEST_F(LoopOptimizationTest,LoopNestWithSequence)297 TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
298   HBasicBlock* b = entry_block_;
299   HBasicBlock* s = return_block_;
300   for (int i = 0; i < 10; i++) {
301     s = AddLoop(b, s);
302     b = s->GetSuccessors()[0];
303   }
304   b = s;
305   s = b->GetSuccessors()[1];
306   for (int i = 0; i < 9; i++) {
307     b = AddLoop(b, s);
308     s = b->GetSuccessors()[1];
309   }
310   PerformAnalysis(/*run_checker=*/ false);
311   EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure());
312 }
313 
314 // Check that SimplifyLoop() doesn't invalidate data flow when ordering loop headers'
315 // predecessors.
316 //
317 // This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
TEST_F(LoopOptimizationTest,SimplifyLoopReoderPredecessors)318 TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
319   // Can't use AddLoop as we want special order for blocks predecessors.
320   HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_);
321   HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_);
322   graph_->AddBlock(header);
323   graph_->AddBlock(body);
324 
325   // Control flow: make a loop back edge first in the list of predecessors.
326   entry_block_->RemoveSuccessor(return_block_);
327   body->AddSuccessor(header);
328   entry_block_->AddSuccessor(header);
329   header->AddSuccessor(body);
330   header->AddSuccessor(return_block_);
331   DCHECK(header->GetSuccessors()[1] == return_block_);
332 
333   // Data flow.
334   MakeIf(header, parameter_);
335   MakeGoto(body);
336 
337   HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
338   header->AddPhi(phi);
339   HInstruction* add = MakeBinOp<HAdd>(body, DataType::Type::kInt32, phi, parameter_);
340 
341   phi->AddInput(add);
342   phi->AddInput(parameter_);
343 
344   graph_->ClearLoopInformation();
345   graph_->ClearDominanceInformation();
346   graph_->BuildDominatorTree();
347 
348   // BuildDominatorTree inserts a block beetween loop header and entry block.
349   EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_);
350 
351   // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs
352   // are still mapped correctly to the block predecessors.
353   for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
354     HInstruction* input = phi->InputAt(i);
355     EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
356   }
357 }
358 
359 // Test that SimplifyLoop() processes the multiple-preheaders loops correctly.
360 //
361 // This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
TEST_F(LoopOptimizationTest,SimplifyLoopSinglePreheader)362 TEST_F(LoopOptimizationTest, SimplifyLoopSinglePreheader) {
363   HBasicBlock* header = AddLoop(entry_block_, return_block_);
364 
365   header->InsertInstructionBefore(
366       new (GetAllocator()) HSuspendCheck(), header->GetLastInstruction());
367 
368   // Insert an if construct before the loop so it will have two preheaders.
369   HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_);
370   HBasicBlock* preheader0 = new (GetAllocator()) HBasicBlock(graph_);
371   HBasicBlock* preheader1 = new (GetAllocator()) HBasicBlock(graph_);
372 
373   graph_->AddBlock(if_block);
374   graph_->AddBlock(preheader0);
375   graph_->AddBlock(preheader1);
376 
377   // Fix successors/predecessors.
378   entry_block_->ReplaceSuccessor(header, if_block);
379   if_block->AddSuccessor(preheader0);
380   if_block->AddSuccessor(preheader1);
381   preheader0->AddSuccessor(header);
382   preheader1->AddSuccessor(header);
383 
384   MakeIf(if_block, parameter_);
385   MakeGoto(preheader0);
386   MakeGoto(preheader1);
387 
388   HBasicBlock* body = header->GetSuccessors()[0];
389   DCHECK(body != return_block_);
390 
391   // Add some data flow.
392   HIntConstant* const_0 = graph_->GetIntConstant(0);
393   HIntConstant* const_1 = graph_->GetIntConstant(1);
394   HIntConstant* const_2 = graph_->GetIntConstant(2);
395 
396   HAdd* preheader0_add = MakeBinOp<HAdd>(preheader0, DataType::Type::kInt32, parameter_, const_0);
397   HAdd* preheader1_add = MakeBinOp<HAdd>(preheader1, DataType::Type::kInt32, parameter_, const_1);
398 
399   HPhi* header_phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
400   header->AddPhi(header_phi);
401 
402   HAdd* body_add = MakeBinOp<HAdd>(body, DataType::Type::kInt32, parameter_, const_2);
403 
404   DCHECK(header->GetPredecessors()[0] == body);
405   DCHECK(header->GetPredecessors()[1] == preheader0);
406   DCHECK(header->GetPredecessors()[2] == preheader1);
407 
408   header_phi->AddInput(body_add);
409   header_phi->AddInput(preheader0_add);
410   header_phi->AddInput(preheader1_add);
411 
412   graph_->ClearLoopInformation();
413   graph_->ClearDominanceInformation();
414   graph_->BuildDominatorTree();
415 
416   EXPECT_EQ(header->GetPredecessors().size(), 2u);
417   EXPECT_EQ(header->GetPredecessors()[1], body);
418 
419   HBasicBlock* new_preheader = header->GetLoopInformation()->GetPreHeader();
420   EXPECT_EQ(preheader0->GetSingleSuccessor(), new_preheader);
421   EXPECT_EQ(preheader1->GetSingleSuccessor(), new_preheader);
422 
423   EXPECT_EQ(new_preheader->GetPhis().CountSize(), 1u);
424   HPhi* new_preheader_phi = new_preheader->GetFirstPhi()->AsPhi();
425   EXPECT_EQ(new_preheader_phi->InputCount(), 2u);
426   EXPECT_EQ(new_preheader_phi->InputAt(0), preheader0_add);
427   EXPECT_EQ(new_preheader_phi->InputAt(1), preheader1_add);
428 
429   EXPECT_EQ(header_phi->InputCount(), 2u);
430   EXPECT_EQ(header_phi->InputAt(0), new_preheader_phi);
431   EXPECT_EQ(header_phi->InputAt(1), body_add);
432 }
433 
434 #ifdef ART_ENABLE_CODEGEN_arm64
435 #define FOR_EACH_CONDITION_INSTRUCTION(M, CondType) \
436   M(EQ, CondType)                                   \
437   M(NE, CondType)                                   \
438   M(LT, CondType)                                   \
439   M(LE, CondType)                                   \
440   M(GT, CondType)                                   \
441   M(GE, CondType)                                   \
442   M(B, CondType)                                    \
443   M(BE, CondType)                                   \
444   M(A, CondType)                                    \
445   M(AE, CondType)
446 
447 // Define tests ensuring that all types of conditions can be handled in predicated vectorization
448 // for diamond loops.
449 #define DEFINE_CONDITION_TESTS(Name, CondType)                                                  \
450 TEST_F(PredicatedSimdLoopOptimizationTest, VectorizeCondition##Name##CondType) {                \
451   if (!IsPredicatedVectorizationSupported()) {                                                  \
452     GTEST_SKIP() << "Predicated SIMD is not enabled.";                                          \
453   }                                                                                             \
454   ReplaceIfCondition(DataType::Type::k##CondType,                                               \
455                      DataType::Type::k##CondType,                                               \
456                      diamond_top_,                                                              \
457                      kCond##Name);                                                              \
458   PerformAnalysis(/*run_checker=*/ true);                                                       \
459   EXPECT_TRUE(graph_->HasPredicatedSIMD());                                                     \
460 }
461 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Uint8)
462 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Int8)
463 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Uint16)
464 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Int16)
465 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Int32)
466 #undef DEFINE_CONDITION_TESTS
467 #undef FOR_EACH_CONDITION_INSTRUCTION
468 #endif  // ART_ENABLE_CODEGEN_arm64
469 
470 }  // namespace art
471