xref: /aosp_15_r20/external/ruy/ruy/block_map_test.cc (revision bb86c7ed5fb1b98a7eac808e443a46cc8b90dfc0)
1 /* Copyright 2019 Google LLC. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "ruy/block_map.h"
17 
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <limits>
22 #include <vector>
23 
24 #include "ruy/cpu_cache_params.h"
25 #include "ruy/gtest_wrapper.h"
26 #include "ruy/platform.h"
27 #include "ruy/side_pair.h"
28 
29 namespace ruy {
30 namespace {
31 
32 #if RUY_PLATFORM_NEON_64
33 
34 // Unless otherwise specified, these tests have been tuned on ARM Cortex-A55.
MakeBlockMapTuningTest(int rows,int cols,int depth,int kernel_rows,int kernel_cols,int lhs_scalar_size,int rhs_scalar_size,int tentative_thread_count,int expected_num_blocks_base_log2,int expected_rectangularness_log2)35 void MakeBlockMapTuningTest(int rows, int cols, int depth, int kernel_rows,
36                             int kernel_cols, int lhs_scalar_size,
37                             int rhs_scalar_size, int tentative_thread_count,
38                             int expected_num_blocks_base_log2,
39                             int expected_rectangularness_log2) {
40   // Plausible Cortex-A55 cache sizes.
41   CpuCacheParams cpu_cache_params;
42   cpu_cache_params.local_cache_size = 128 * 1024;
43   cpu_cache_params.last_level_cache_size = 1024 * 1024;
44   BlockMap block_map;
45   MakeBlockMap(rows, cols, depth, kernel_rows, kernel_cols, lhs_scalar_size,
46                rhs_scalar_size, tentative_thread_count, cpu_cache_params,
47                &block_map);
48   EXPECT_EQ(block_map.num_blocks_base_log2, expected_num_blocks_base_log2);
49   EXPECT_EQ(std::min(block_map.rectangularness_log2[Side::kLhs],
50                      block_map.rectangularness_log2[Side::kRhs]),
51             0);
52   EXPECT_EQ(std::max(block_map.rectangularness_log2[Side::kLhs],
53                      block_map.rectangularness_log2[Side::kRhs]),
54             expected_rectangularness_log2);
55 }
56 
TEST(BlockMapTest,MakeBlockMapTuningTest8bitCubicShapesOneThreadNeonDotprod)57 TEST(BlockMapTest, MakeBlockMapTuningTest8bitCubicShapesOneThreadNeonDotprod) {
58   MakeBlockMapTuningTest(32, 32, 32, 8, 8, 1, 1, /* tentative_thread_count */ 1,
59                          /* expected_num_blocks_base_log2 */ 0,
60                          /* expected_rectangularness_log2 */ 0);
61   MakeBlockMapTuningTest(48, 48, 48, 8, 8, 1, 1, /* tentative_thread_count */ 1,
62                          /* expected_num_blocks_base_log2 */ 0,
63                          /* expected_rectangularness_log2 */ 0);
64   MakeBlockMapTuningTest(64, 64, 64, 8, 8, 1, 1, /* tentative_thread_count */ 1,
65                          /* expected_num_blocks_base_log2 */ 0,
66                          /* expected_rectangularness_log2 */ 0);
67   MakeBlockMapTuningTest(96, 96, 96, 8, 8, 1, 1, /* tentative_thread_count */ 1,
68                          /* expected_num_blocks_base_log2 */ 0,
69                          /* expected_rectangularness_log2 */ 0);
70   MakeBlockMapTuningTest(128, 128, 128, 8, 8, 1, 1,
71                          /* tentative_thread_count */ 1,
72                          /* expected_num_blocks_base_log2 */ 0,
73                          /* expected_rectangularness_log2 */ 0);
74   MakeBlockMapTuningTest(192, 192, 192, 8, 8, 1, 1,
75                          /* tentative_thread_count */ 1,
76                          /* expected_num_blocks_base_log2 */ 0,
77                          /* expected_rectangularness_log2 */ 0);
78   MakeBlockMapTuningTest(256, 256, 256, 8, 8, 1, 1,
79                          /* tentative_thread_count */ 1,
80                          /* expected_num_blocks_base_log2 */ 1,
81                          /* expected_rectangularness_log2 */ 0);
82   MakeBlockMapTuningTest(384, 384, 384, 8, 8, 1, 1,
83                          /* tentative_thread_count */ 1,
84                          /* expected_num_blocks_base_log2 */ 1,
85                          /* expected_rectangularness_log2 */ 0);
86 }
87 
TEST(BlockMapTest,MakeBlockMapTuningTest8bitCubicShapesFourThreadsNeonDotprod)88 TEST(BlockMapTest,
89      MakeBlockMapTuningTest8bitCubicShapesFourThreadsNeonDotprod) {
90   MakeBlockMapTuningTest(32, 32, 32, 8, 8, 1, 1, /* tentative_thread_count */ 4,
91                          /* expected_num_blocks_base_log2 */ 1,
92                          /* expected_rectangularness_log2 */ 0);
93   MakeBlockMapTuningTest(48, 48, 48, 8, 8, 1, 1, /* tentative_thread_count */ 4,
94                          /* expected_num_blocks_base_log2 */ 1,
95                          /* expected_rectangularness_log2 */ 0);
96   MakeBlockMapTuningTest(64, 64, 64, 8, 8, 1, 1, /* tentative_thread_count */ 4,
97                          /* expected_num_blocks_base_log2 */ 1,
98                          /* expected_rectangularness_log2 */ 0);
99   MakeBlockMapTuningTest(96, 96, 96, 8, 8, 1, 1, /* tentative_thread_count */ 4,
100                          /* expected_num_blocks_base_log2 */ 1,
101                          /* expected_rectangularness_log2 */ 0);
102   MakeBlockMapTuningTest(128, 128, 128, 8, 8, 1, 1,
103                          /* tentative_thread_count */ 4,
104                          /* expected_num_blocks_base_log2 */ 1,
105                          /* expected_rectangularness_log2 */ 0);
106   MakeBlockMapTuningTest(192, 192, 192, 8, 8, 1, 1,
107                          /* tentative_thread_count */ 4,
108                          /* expected_num_blocks_base_log2 */ 1,
109                          /* expected_rectangularness_log2 */ 0);
110   MakeBlockMapTuningTest(256, 256, 256, 8, 8, 1, 1,
111                          /* tentative_thread_count */ 4,
112                          /* expected_num_blocks_base_log2 */ 2,
113                          /* expected_rectangularness_log2 */ 0);
114   MakeBlockMapTuningTest(384, 384, 384, 8, 8, 1, 1,
115                          /* tentative_thread_count */ 4,
116                          /* expected_num_blocks_base_log2 */ 2,
117                          /* expected_rectangularness_log2 */ 0);
118 }
119 
TEST(BlockMapTest,MakeBlockMapTuningTest32bit)120 TEST(BlockMapTest, MakeBlockMapTuningTest32bit) {
121   MakeBlockMapTuningTest(256, 256, 256, 8, 8, 4, 4,
122                          /* tentative_thread_count */ 4,
123                          /* expected_num_blocks_base_log2 */ 3,
124                          /* expected_rectangularness_log2 */ 0);
125   MakeBlockMapTuningTest(4096, 4096, 4096, 8, 8, 4, 4,
126                          /* tentative_thread_count */ 4,
127                          /* expected_num_blocks_base_log2 */ 7,
128                          /* expected_rectangularness_log2 */ 0);
129 }
130 
TEST(BlockMapTest,MakeBlockMapTuningTestRectangular)131 TEST(BlockMapTest, MakeBlockMapTuningTestRectangular) {
132   MakeBlockMapTuningTest(256, 16, 256, 8, 8, 1, 1,
133                          /* tentative_thread_count */ 1,
134                          /* expected_num_blocks_base_log2 */ 0,
135                          /* expected_rectangularness_log2 */ 3);
136   MakeBlockMapTuningTest(24, 2400, 256, 8, 8, 1, 1,
137                          /* tentative_thread_count */ 1,
138                          /* expected_num_blocks_base_log2 */ 0,
139                          /* expected_rectangularness_log2 */ 6);
140 }
141 
142 #endif
143 
L1Distance(const SidePair<int> & a,const SidePair<int> & b)144 int L1Distance(const SidePair<int>& a, const SidePair<int>& b) {
145   return std::abs(a[Side::kLhs] - b[Side::kLhs]) +
146          std::abs(a[Side::kRhs] - b[Side::kRhs]);
147 }
148 
GetBlockByIndexSquareTest(int num_blocks_base_log2,BlockMapTraversalOrder traversal_order)149 void GetBlockByIndexSquareTest(int num_blocks_base_log2,
150                                BlockMapTraversalOrder traversal_order) {
151   // Arbitrary, does not affect this test. 3 is just a typical value.
152   constexpr int kKernelSizeLog2 = 3;
153 
154   const int size_log2 = num_blocks_base_log2 + kKernelSizeLog2;
155   BlockMap block_map;
156   block_map.thread_count = 1;
157   block_map.traversal_order = traversal_order;
158   block_map.num_blocks_base_log2 = num_blocks_base_log2;
159   for (Side side : {Side::kLhs, Side::kRhs}) {
160     block_map.dims[side] = 1 << size_log2;
161     block_map.rectangularness_log2[side] = 0;
162     block_map.kernel_dims[side] = 1 << kKernelSizeLog2;
163     block_map.small_block_dims[side] = block_map.kernel_dims[side];
164     block_map.large_blocks[side] = 0;
165   }
166 
167   const int num_blocks_per_side = 1 << num_blocks_base_log2;
168   const int num_blocks = num_blocks_per_side * num_blocks_per_side;
169   EXPECT_EQ(num_blocks, NumBlocks(block_map));
170 
171   // Perform a full traversal of all blocks, as if computing a whole matrix
172   // multiplication.
173   //
174   // Used to record how many times each block was hit by the traversal.
175   std::vector<int> block_hit_counts(num_blocks);
176   // Here we guard an assumption that all traversal orders start at (0, 0).
177   SidePair<int> previous_block_coords(0, 0);
178   // Sum of L1 norm of the coordinate change at every step of the traversal.
179   std::int64_t total_l1_distance = 0;
180   // Number of jumps i.e. traversal steps with a L1 norm greater than 1.
181   int discontinuity_count = 0;
182   for (int block_index = 0; block_index < num_blocks; block_index++) {
183     SidePair<int> block_coords;
184     GetBlockByIndex(block_map, block_index, &block_coords);
185     ++block_hit_counts[block_coords[Side::kLhs] +
186                        num_blocks_per_side * block_coords[Side::kRhs]];
187     int distance = L1Distance(block_coords, previous_block_coords);
188     total_l1_distance += distance;
189     discontinuity_count += (distance > 1);
190     previous_block_coords = block_coords;
191   }
192 
193   // Verify that each block was traversed exactly once.
194   for (int l = 0; l < num_blocks_per_side; l++) {
195     for (int r = 0; r < num_blocks_per_side; r++) {
196       EXPECT_EQ(block_hit_counts[l + num_blocks_per_side * r], 1);
197     }
198   }
199 
200   // Verify that the discontinuity_count and total_l1_distance are as expected
201   // for the given traversal_order.
202   switch (traversal_order) {
203     case BlockMapTraversalOrder::kFractalHilbert:
204       // No discontinuity at all with this space-filling continuous curve!
205       EXPECT_EQ(discontinuity_count, 0);
206       // Therefore, total_l1_distance has to be the number of blocks minus one.
207       EXPECT_EQ(total_l1_distance, num_blocks - 1);
208       break;
209     case BlockMapTraversalOrder::kLinear:
210       EXPECT_EQ(discontinuity_count, num_blocks_per_side - 1);
211       EXPECT_EQ(total_l1_distance,
212                 2 * num_blocks_per_side * (num_blocks_per_side - 1));
213       break;
214     case BlockMapTraversalOrder::kFractalZ:
215       EXPECT_EQ(discontinuity_count, num_blocks > 1 ? (num_blocks / 2 - 1) : 0);
216       EXPECT_EQ(total_l1_distance,
217                 2 * num_blocks_per_side * (num_blocks_per_side - 1));
218       break;
219     case BlockMapTraversalOrder::kFractalU: {
220       if (num_blocks_base_log2 == 0) {
221         EXPECT_EQ(discontinuity_count, 0);
222         EXPECT_EQ(total_l1_distance, 0);
223       } else {
224         int expected_discontinuity_count = 0;
225         int expected_total_l1_distance = 3;
226         for (int i = 2; i <= num_blocks_base_log2; i++) {
227           expected_discontinuity_count = 4 * expected_discontinuity_count + 2;
228           expected_total_l1_distance =
229               4 * expected_total_l1_distance + (1 << (i + 1)) - 1;
230         }
231         EXPECT_EQ(discontinuity_count, expected_discontinuity_count);
232         EXPECT_EQ(total_l1_distance, expected_total_l1_distance);
233       }
234       break;
235     }
236     default:
237       abort();
238   }
239 }
240 
TEST(BlockMapTest,GetBlockByIndexSquare)241 TEST(BlockMapTest, GetBlockByIndexSquare) {
242   for (int num_blocks_base_log2 = 0; num_blocks_base_log2 <= 10;
243        num_blocks_base_log2++) {
244     for (BlockMapTraversalOrder traversal_order :
245          {BlockMapTraversalOrder::kLinear, BlockMapTraversalOrder::kFractalZ,
246           BlockMapTraversalOrder::kFractalU,
247           BlockMapTraversalOrder::kFractalHilbert}) {
248       GetBlockByIndexSquareTest(num_blocks_base_log2, traversal_order);
249     }
250   }
251 }
252 
253 }  // namespace
254 }  // namespace ruy
255 
main(int argc,char ** argv)256 int main(int argc, char **argv) {
257   ::testing::InitGoogleTest(&argc, argv);
258   return RUN_ALL_TESTS();
259 }
260