1 /* 2 * Copyright (c) 2017-2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_TEST_UNIT_MEMORY_MANAGER 25 #define ARM_COMPUTE_TEST_UNIT_MEMORY_MANAGER 26 27 #include "arm_compute/core/TensorShape.h" 28 #include "arm_compute/core/Types.h" 29 #include "arm_compute/runtime/BlobLifetimeManager.h" 30 #include "arm_compute/runtime/MemoryManagerOnDemand.h" 31 #include "arm_compute/runtime/PoolManager.h" 32 #include "tests/AssetsLibrary.h" 33 #include "tests/Globals.h" 34 #include "tests/IAccessor.h" 35 #include "tests/framework/Asserts.h" 36 #include "tests/framework/Fixture.h" 37 #include "tests/validation/Helpers.h" 38 #include "tests/validation/reference/FullyConnectedLayer.h" 39 #include "tests/validation/reference/SoftmaxLayer.h" 40 41 namespace arm_compute 42 { 43 namespace test 44 { 45 namespace validation 46 { 47 /** Simple test case to run two fully connected layers using a blob affinity memory manager 48 * 49 * Runs two fully connected layers back to back 50 */ 51 template <typename TensorType, typename AccessorType, typename AllocatorType, typename FullyConnectedFunction> 52 class BlobMemoryManagerSimpleTestCaseFixture : public framework::Fixture 53 { 54 using T = float; 55 56 public: setup()57 void setup() 58 { 59 _target = compute_target(); 60 _reference = compute_reference(); 61 }; 62 63 protected: 64 template <typename U> fill(U && tensor,int i)65 void fill(U &&tensor, int i) 66 { 67 std::uniform_real_distribution<> distribution(0.5f, 1.f); 68 library->fill(tensor, distribution, i); 69 } 70 compute_target()71 TensorType compute_target() 72 { 73 auto lifetime_mgr = std::make_shared<BlobLifetimeManager>(); 74 auto pool_mgr = std::make_shared<PoolManager>(); 75 auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr); 76 77 // Create tensors 78 TensorType w1 = create_tensor<TensorType>(TensorShape(128U, 128U), DataType::F32, 1); 79 TensorType b1 = create_tensor<TensorType>(TensorShape(128U), DataType::F32, 1); 80 TensorType w2 = create_tensor<TensorType>(TensorShape(128U, 24U), DataType::F32, 1); 81 TensorType b2 = create_tensor<TensorType>(TensorShape(24U), DataType::F32, 1); 82 TensorType src = create_tensor<TensorType>(TensorShape(128U), DataType::F32, 1); 83 TensorType fc1 = create_tensor<TensorType>(TensorShape(128U), DataType::F32, 1); 84 TensorType dst = create_tensor<TensorType>(TensorShape(24U), DataType::F32, 1); 85 86 // Create and configure function 87 FullyConnectedFunction fc_layer_1(mm); 88 FullyConnectedFunction fc_layer_2(mm); 89 fc_layer_1.configure(&src, &w1, &b1, &fc1); 90 fc_layer_2.configure(&fc1, &w2, &b2, &dst); 91 92 // Allocate tensors 93 w1.allocator()->allocate(); 94 b1.allocator()->allocate(); 95 w2.allocator()->allocate(); 96 b2.allocator()->allocate(); 97 src.allocator()->allocate(); 98 fc1.allocator()->allocate(); 99 dst.allocator()->allocate(); 100 101 // Finalize memory manager 102 mm->populate(_allocator, 1 /* num_pools */); 103 ARM_COMPUTE_ASSERT(mm->lifetime_manager()->are_all_finalized()); 104 ARM_COMPUTE_ASSERT(mm->pool_manager()->num_pools() == 1); 105 106 // Fill tensors 107 fill(AccessorType(src), 0); 108 fill(AccessorType(w1), 1); 109 fill(AccessorType(b1), 2); 110 fill(AccessorType(w2), 3); 111 fill(AccessorType(b2), 4); 112 113 // Compute functions 114 fc_layer_1.run(); 115 fc_layer_2.run(); 116 117 return dst; 118 } 119 compute_reference()120 SimpleTensor<T> compute_reference() 121 { 122 // Create reference 123 SimpleTensor<T> w1{ TensorShape(128U, 128U), DataType::F32 }; 124 SimpleTensor<T> b1{ TensorShape(128U), DataType::F32 }; 125 SimpleTensor<T> w2{ TensorShape(128U, 24U), DataType::F32 }; 126 SimpleTensor<T> b2{ TensorShape(24U), DataType::F32 }; 127 SimpleTensor<T> src{ TensorShape(128U), DataType::F32 }; 128 129 // Fill reference 130 fill(src, 0); 131 fill(w1, 1); 132 fill(b1, 2); 133 fill(w2, 3); 134 fill(b2, 4); 135 136 auto fc1 = reference::fully_connected_layer(src, w1, b1, TensorShape(128U)); 137 return reference::fully_connected_layer(fc1, w2, b2, TensorShape(24U)); 138 } 139 140 protected: 141 TensorType _target{}; 142 SimpleTensor<T> _reference{}; 143 AllocatorType _allocator{}; 144 }; 145 146 /** Test case to run two fully connected layers using a blob affinity memory manager, 147 * reconfigure with different shapes and rerun 148 * 149 * Runs two fully connected layers back to back then reconfigures with different batch size and reruns 150 * Shapes of the reconfigure step are smaller that the initial configured step 151 */ 152 template <typename TensorType, typename AccessorType, typename AllocatorType, typename FullyConnectedFunction> 153 class BlobMemoryManagerReconfigureTestCaseFixture : public framework::Fixture 154 { 155 using T = float; 156 157 public: setup()158 void setup() 159 { 160 _max_batches = 8; 161 _cur_batches = 6; 162 _target = compute_target(); 163 _reference = compute_reference(); 164 }; 165 166 protected: 167 template <typename U> fill(U && tensor,int i)168 void fill(U &&tensor, int i) 169 { 170 std::uniform_real_distribution<> distribution(0.5f, 1.f); 171 library->fill(tensor, distribution, i); 172 } 173 compute_target()174 TensorType compute_target() 175 { 176 AllocatorType allocator{}; 177 auto lifetime_mgr = std::make_shared<BlobLifetimeManager>(); 178 auto pool_mgr = std::make_shared<PoolManager>(); 179 auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr); 180 181 // Create tensors 182 TensorType w1 = create_tensor<TensorType>(TensorShape(128U, 128U), DataType::F32, 1); 183 TensorType b1 = create_tensor<TensorType>(TensorShape(128U), DataType::F32, 1); 184 TensorType w2 = create_tensor<TensorType>(TensorShape(128U, 24U), DataType::F32, 1); 185 TensorType b2 = create_tensor<TensorType>(TensorShape(24U), DataType::F32, 1); 186 TensorType src = create_tensor<TensorType>(TensorShape(128U, _max_batches), DataType::F32, 1); 187 TensorType fc1 = create_tensor<TensorType>(TensorShape(128U, _max_batches), DataType::F32, 1); 188 TensorType dst = create_tensor<TensorType>(TensorShape(24U, _max_batches), DataType::F32, 1); 189 190 // Create and configure function 191 FullyConnectedFunction fc_layer_1(mm); 192 FullyConnectedFunction fc_layer_2(mm); 193 fc_layer_1.configure(&src, &w1, &b1, &fc1); 194 fc_layer_2.configure(&fc1, &w2, &b2, &dst); 195 196 // Allocate persistent tensors 197 w1.allocator()->allocate(); 198 b1.allocator()->allocate(); 199 w2.allocator()->allocate(); 200 b2.allocator()->allocate(); 201 202 // Allocate tensors (1st iteration) 203 src.allocator()->allocate(); 204 fc1.allocator()->allocate(); 205 dst.allocator()->allocate(); 206 207 // Finalize memory manager 208 mm->populate(_allocator, 1 /* num_pools */); 209 ARM_COMPUTE_ASSERT(mm->lifetime_manager()->are_all_finalized()); 210 ARM_COMPUTE_ASSERT(mm->pool_manager()->num_pools() == 1); 211 212 // Fill tensors (1st iteration) 213 fill(AccessorType(src), 0); 214 fill(AccessorType(w1), 1); 215 fill(AccessorType(b1), 2); 216 fill(AccessorType(w2), 3); 217 fill(AccessorType(b2), 4); 218 219 // Compute functions (1st iteration) 220 fc_layer_1.run(); 221 fc_layer_2.run(); 222 223 // Update tensor shapes (2nd iteration) 224 auto src_padding = src.allocator()->info().padding(); 225 auto fc1_padding = fc1.allocator()->info().padding(); 226 auto dst_padding = dst.allocator()->info().padding(); 227 int diff = _max_batches - _cur_batches; 228 auto new_src_padding = PaddingSize(src_padding.top, src_padding.right, src_padding.bottom + diff, src_padding.left); 229 auto new_fc1_padding = PaddingSize(fc1_padding.top, fc1_padding.right, fc1_padding.bottom + diff, fc1_padding.left); 230 auto new_dst_padding = PaddingSize(dst_padding.top, dst_padding.right, dst_padding.bottom + diff, dst_padding.left); 231 src.allocator()->info().set_tensor_shape(TensorShape(128U, _cur_batches)).set_is_resizable(true).extend_padding(new_src_padding); 232 src.allocator()->info().set_is_resizable(false); 233 fc1.allocator()->info().set_tensor_shape(TensorShape(128U, _cur_batches)).set_is_resizable(true).extend_padding(new_fc1_padding); 234 fc1.allocator()->info().set_is_resizable(false); 235 dst.allocator()->info().set_tensor_shape(TensorShape(24U, _cur_batches)).set_is_resizable(true).extend_padding(new_dst_padding); 236 dst.allocator()->info().set_is_resizable(false); 237 238 // Configure FC info 239 FullyConnectedLayerInfo fc_info; 240 fc_info.retain_internal_weights = true; 241 242 // Configure functions (2nd iteration) 243 fc_layer_1.configure(&src, &w1, &b1, &fc1, fc_info); 244 fc_layer_2.configure(&fc1, &w2, &b2, &dst, fc_info); 245 246 // Fill tensors (2nd iteration) 247 fill(AccessorType(src), 5); 248 249 // Compute functions (2nd iteration) 250 fc_layer_1.run(); 251 fc_layer_2.run(); 252 253 return dst; 254 } 255 compute_reference()256 SimpleTensor<T> compute_reference() 257 { 258 // Create reference 259 SimpleTensor<T> w1{ TensorShape(128U, 128U), DataType::F32 }; 260 SimpleTensor<T> b1{ TensorShape(128U), DataType::F32 }; 261 SimpleTensor<T> w2{ TensorShape(128U, 24U), DataType::F32 }; 262 SimpleTensor<T> b2{ TensorShape(24U), DataType::F32 }; 263 SimpleTensor<T> src{ TensorShape(128U, _cur_batches), DataType::F32 }; 264 265 // Fill reference 266 fill(src, 5); 267 fill(w1, 1); 268 fill(b1, 2); 269 fill(w2, 3); 270 fill(b2, 4); 271 272 auto fc1 = reference::fully_connected_layer(src, w1, b1, TensorShape(128U, _cur_batches)); 273 return reference::fully_connected_layer(fc1, w2, b2, TensorShape(24U, _cur_batches)); 274 } 275 276 protected: 277 TensorType _target{}; 278 SimpleTensor<T> _reference{}; 279 AllocatorType _allocator{}; 280 unsigned int _max_batches{}; 281 unsigned int _cur_batches{}; 282 }; 283 284 /** Test case to run a fully connected layer followed by a softmax layer using a blob affinity memory manager, 285 * reconfigure with different shapes and rerun 286 * 287 * Runs a fully connected convolution layer followed by a softmax layer then reconfigures with different batch size and reruns 288 * Shapes of the reconfigure step are smaller that the initial configured step 289 */ 290 template <typename TensorType, typename AccessorType, typename AllocatorType, typename FullyConnectedFunction, typename SoftmaxFunction> 291 class BlobMemoryManagerReconfigure2TestCaseFixture : public framework::Fixture 292 { 293 using T = float; 294 295 public: setup()296 void setup() 297 { 298 _max_batches = 30; 299 _cur_batches = 3; 300 _target = compute_target(); 301 _reference = compute_reference(); 302 }; 303 304 protected: 305 template <typename U> fill(U && tensor,int i)306 void fill(U &&tensor, int i) 307 { 308 std::uniform_real_distribution<> distribution(0.5f, 1.f); 309 library->fill(tensor, distribution, i); 310 } 311 compute_target()312 TensorType compute_target() 313 { 314 AllocatorType allocator{}; 315 auto lifetime_mgr = std::make_shared<BlobLifetimeManager>(); 316 auto pool_mgr = std::make_shared<PoolManager>(); 317 auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr); 318 319 // Create tensors 320 TensorType w = create_tensor<TensorType>(TensorShape(112U, 8U), DataType::F32, 1); 321 TensorType b = create_tensor<TensorType>(TensorShape(8U), DataType::F32, 1); 322 TensorType src = create_tensor<TensorType>(TensorShape(1U, 1U, 112U, _max_batches), DataType::F32, 1); 323 TensorType fc = create_tensor<TensorType>(TensorShape(8U, _max_batches), DataType::F32, 1); 324 TensorType dst = create_tensor<TensorType>(TensorShape(8U, _max_batches), DataType::F32, 1); 325 326 // Create and configure function 327 FullyConnectedFunction fc_layer(mm); 328 SoftmaxFunction smx_layer(mm); 329 fc_layer.configure(&src, &w, &b, &fc); 330 smx_layer.configure(&fc, &dst); 331 332 // Allocate persistent tensors 333 w.allocator()->allocate(); 334 b.allocator()->allocate(); 335 336 // Allocate tensors (1st iteration) 337 src.allocator()->allocate(); 338 fc.allocator()->allocate(); 339 dst.allocator()->allocate(); 340 341 // Finalize memory manager 342 mm->populate(_allocator, 1 /* num_pools */); 343 ARM_COMPUTE_ASSERT(mm->lifetime_manager()->are_all_finalized()); 344 ARM_COMPUTE_ASSERT(mm->pool_manager()->num_pools() == 1); 345 346 // Fill tensors (1st iteration) 347 fill(AccessorType(src), 0); 348 fill(AccessorType(w), 1); 349 fill(AccessorType(b), 2); 350 351 // Compute functions (1st iteration) 352 fc_layer.run(); 353 smx_layer.run(); 354 355 // Get padding requirements 356 auto fc_padding = fc.allocator()->info().padding(); 357 358 // Configure FC info 359 FullyConnectedLayerInfo fc_info; 360 fc_info.retain_internal_weights = true; 361 362 // Run rest iterations 363 for(int i = _max_batches; i >= static_cast<int>(_cur_batches); --i) 364 { 365 int diff = _max_batches - i; 366 auto new_fc_padding = PaddingSize(fc_padding.top, fc_padding.right, fc_padding.bottom + diff, fc_padding.left); 367 src.allocator()->info().set_tensor_shape(TensorShape(1U, 1U, 112U, i)); 368 fc.allocator()->info().set_tensor_shape(TensorShape(8U, i)).set_is_resizable(true).extend_padding(new_fc_padding); 369 fc.allocator()->info().set_is_resizable(false); 370 dst.allocator()->info().set_tensor_shape(TensorShape(8U, i)); 371 372 // Configure functions 373 fc_layer.configure(&src, &w, &b, &fc, fc_info); 374 smx_layer.configure(&fc, &dst); 375 376 // Fill tensors 377 fill(AccessorType(src), 3); 378 379 // Compute functions 380 fc_layer.run(); 381 smx_layer.run(); 382 } 383 384 return dst; 385 } 386 compute_reference()387 SimpleTensor<T> compute_reference() 388 { 389 // Create reference 390 SimpleTensor<T> w{ TensorShape(112U, 8U), DataType::F32 }; 391 SimpleTensor<T> b{ TensorShape(8U), DataType::F32 }; 392 SimpleTensor<T> src{ TensorShape(1U, 1U, 112U, _cur_batches), DataType::F32 }; 393 394 // Fill reference 395 fill(src, 3); 396 fill(w, 1); 397 fill(b, 2); 398 399 auto fc = reference::fully_connected_layer(src, w, b, TensorShape(8U, _cur_batches)); 400 return reference::softmax_layer(fc, 1.f); 401 } 402 403 protected: 404 TensorType _target{}; 405 SimpleTensor<T> _reference{}; 406 AllocatorType _allocator{}; 407 unsigned int _max_batches{}; 408 unsigned int _cur_batches{}; 409 }; 410 } // namespace validation 411 } // namespace test 412 } // namespace arm_compute 413 #endif /* ARM_COMPUTE_TEST_UNIT_MEMORY_MANAGER */ 414