1 /* 2 * Copyright (c) 2017-2022 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE 25 #define ARM_COMPUTE_TEST_GEMM_FIXTURE 26 27 #include "arm_compute/core/KernelDescriptors.h" 28 #include "arm_compute/core/TensorShape.h" 29 #include "arm_compute/core/Types.h" 30 #include "arm_compute/core/experimental/IPostOp.h" 31 #include "src/core/experimental/PostOpUtils.h" 32 #include "tests/AssetsLibrary.h" 33 #include "tests/Globals.h" 34 #include "tests/IAccessor.h" 35 #include "tests/framework/Asserts.h" 36 #include "tests/framework/Fixture.h" 37 #include "tests/validation/Helpers.h" 38 #include "tests/validation/reference/ActivationLayer.h" 39 #include "tests/validation/reference/ElementwiseOperations.h" 40 #include "tests/validation/reference/GEMM.h" 41 #include "tests/validation/reference/PostOps.h" 42 43 #include <random> 44 45 namespace arm_compute 46 { 47 namespace test 48 { 49 namespace validation 50 { 51 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> 52 class GEMMValidationFixture : public framework::Fixture 53 { 54 public: 55 template <typename...> setup(TensorShape shape_a,TensorShape shape_b,TensorShape shape_c,TensorShape output_shape,float alpha,float beta,bool pretranspose,DataType data_type)56 void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) 57 { 58 ARM_COMPUTE_UNUSED(pretranspose); 59 _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type); 60 _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type); 61 } 62 63 protected: 64 template <typename U> 65 void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f) 66 { 67 switch(tensor.data_type()) 68 { 69 case DataType::F16: 70 { 71 arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ float(lo), float(hi) }; 72 library->fill(tensor, distribution, i); 73 break; 74 } 75 case DataType::F32: 76 { 77 std::uniform_real_distribution<float> distribution(lo, hi); 78 library->fill(tensor, distribution, i); 79 break; 80 } 81 default: 82 library->fill_tensor_uniform(tensor, i); 83 } 84 } 85 compute_target(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & shape_c,const TensorShape & output_shape,float alpha,float beta,DataType data_type)86 TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta, 87 DataType data_type) 88 { 89 // Create tensors 90 TensorType a = create_tensor<TensorType>(shape_a, data_type, 1); 91 TensorType b = create_tensor<TensorType>(shape_b, data_type, 1); 92 TensorType c = create_tensor<TensorType>(shape_c, data_type, 1); 93 TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1); 94 95 // Create and configure function 96 FunctionType gemm; 97 // The GEMMinfo includes the values of the depth in case of reinterpreted 3d output. 98 // If the output shape has the same number of dimensions of the input the method called is a 2D matrix multiplication (depth_output_reinterpreted_as_3D = 0), 99 // in the other case we have to use the reinterpreted version of GEMM (depth_output_reinterpreted_as_3D = depth of the 3D output). 100 gemm.configure(&a, 101 &b, 102 (disable_c) ? nullptr : &c, 103 &dst, 104 alpha, beta, 105 GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d 106 || reinterpret_output_as_3d))); 107 ARM_COMPUTE_ASSERT(a.info()->is_resizable()); 108 ARM_COMPUTE_ASSERT(b.info()->is_resizable()); 109 ARM_COMPUTE_ASSERT(c.info()->is_resizable()); 110 ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); 111 112 add_padding_x({ &a, &b, &c, &dst }); 113 114 // Allocate tensors 115 a.allocator()->allocate(); 116 b.allocator()->allocate(); 117 c.allocator()->allocate(); 118 dst.allocator()->allocate(); 119 120 ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); 121 ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); 122 ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); 123 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 124 125 // Fill tensors 126 fill(AccessorType(a), 0); 127 fill(AccessorType(b), 1); 128 if(!disable_c) 129 { 130 fill(AccessorType(c), 2); 131 } 132 133 // Run with variable inputs. 134 if(run_twice) 135 { 136 gemm.run(); 137 fill(AccessorType(a), 3); // Fill tensors with new seed after run 138 fill(AccessorType(b), 4); 139 if(!disable_c) 140 { 141 fill(AccessorType(c), 5); 142 } 143 } 144 145 // Compute GEMM function 146 gemm.run(); 147 148 return dst; 149 } 150 compute_reference(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & output_shape,float alpha,float beta,DataType data_type)151 SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta, 152 DataType data_type) 153 { 154 TensorShape shape_a_to_use = shape_a; 155 if(reinterpret_input_as_3d) 156 { 157 // Collapse the second and third dimension if the input is 3D 158 shape_a_to_use.collapse(2U, 1U); 159 } 160 161 // Create reference 162 SimpleTensor<T> a{ shape_a_to_use, data_type, 1 }; 163 SimpleTensor<T> b{ shape_b, data_type, 1 }; 164 SimpleTensor<T> c{ output_shape, data_type, 1 }; 165 166 // Fill reference 167 fill(a, 0); 168 fill(b, 1); 169 fill(c, 2); 170 171 if(reinterpret_input_as_3d || reinterpret_output_as_3d) 172 { 173 const int n = shape_b[0]; 174 const int m = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1]; 175 const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2]; 176 177 // In case of broadcast, we need to simply copy the first into the following "M" ones 178 for(int i = 1; i < m * batch_size; i++) 179 { 180 memcpy(c.data() + i * n, c.data(), n * sizeof(T)); 181 } 182 } 183 184 /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), 185 therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) 186 in order to be able to call reference implementation that works with (B x M x K) input. 187 Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ 188 189 // Define transposed shapes 190 TensorShape a_transposed_shape(a.shape().y(), a.shape().x()); 191 TensorShape b_transposed_shape(b.shape().y(), b.shape().x()); 192 193 // Define transposed tensors 194 SimpleTensor<T> a_transposed{ a_transposed_shape, data_type }; 195 SimpleTensor<T> b_transposed{ b_transposed_shape, data_type }; 196 197 // pretranspose a if necessary 198 if(pretranspose_a) 199 { 200 transpose_matrix<T>(a, a_transposed); 201 } 202 203 // pretranspose b if necessary 204 if(pretranspose_b) 205 { 206 transpose_matrix<T>(b, b_transposed); 207 } 208 209 // Run with variable inputs. 210 if(run_twice) 211 { 212 reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); 213 fill((pretranspose_a) ? a_transposed : a, 3); 214 fill((pretranspose_b) ? b_transposed : b, 4); 215 fill(c, 5); 216 } 217 218 // Setting beta to 0 will effectively disable C for the 219 // computation of the reference: alpha * A * B + 0 * C 220 // Use transposed tensors if boolean enabled else use original tensors 221 auto r = reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); 222 return r; 223 } 224 225 TensorType _target{}; 226 SimpleTensor<T> _reference{}; 227 }; 228 229 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> 230 class GEMMMatrixMultiplyValidationFixture : public framework::Fixture 231 { 232 public: 233 template <typename...> setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,float alpha,float beta,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,DataType data_type,GPUTarget gpu_arch)234 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, 235 DataType data_type, GPUTarget gpu_arch) 236 { 237 // Set the tensor shapes for LHS and RHS matrices 238 const TensorShape lhs_shape(k, m, batch_size); 239 const TensorShape rhs_shape(n, k, batch_size); 240 const TensorShape bias_shape(n, 241 broadcast_bias ? 1 : m, 242 broadcast_bias ? 1 : batch_size); 243 244 _target = compute_target(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, broadcast_bias, fp16_mixed_precision, act_info, gpu_arch); 245 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); 246 } 247 248 protected: 249 template <typename U> fill(U && tensor,int i)250 void fill(U &&tensor, int i) 251 { 252 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 253 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 254 255 DistributionType distribution{ T(-1.0f), T(1.0f) }; 256 library->fill(tensor, distribution, i); 257 258 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) 259 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; 260 library->fill_borders_with_garbage(tensor, distribution_inf, i); 261 } 262 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,GPUTarget gpu_arch)263 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 264 bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch) 265 { 266 // Create tensors 267 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 268 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 269 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 270 TensorType dst; 271 272 const unsigned int m = lhs_shape[1]; 273 const unsigned int n = rhs_shape[0]; 274 const unsigned int k = lhs_shape[0]; 275 GEMMReshapeInfo reshape_info(m, n, k, 1, 1, 0, false, broadcast_bias); 276 277 // The output tensor will be auto-initialized within the function 278 279 // Create and configure function 280 GEMMOperatorType gemm; 281 gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); 282 283 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 284 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 285 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 286 287 add_padding_x({ &lhs, &rhs, &bias, &dst }); 288 289 // Allocate tensors 290 lhs.allocator()->allocate(); 291 rhs.allocator()->allocate(); 292 bias.allocator()->allocate(); 293 dst.allocator()->allocate(); 294 295 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 296 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 297 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 298 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 299 300 // Fill tensors 301 fill(AccessorType(lhs), 0); 302 fill(AccessorType(rhs), 1); 303 fill(AccessorType(bias), 2); 304 305 // Compute GEMM 306 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, 307 { ACL_SRC_1, &rhs }, 308 { ACL_SRC_2, &bias }, 309 { ACL_DST, &dst } 310 }); 311 gemm.run(gemm_pack); 312 313 return dst; 314 } 315 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)316 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 317 const ActivationLayerInfo &act_info) 318 { 319 TensorShape dst_shape = lhs_shape; 320 dst_shape[0] = rhs_shape[0]; 321 dst_shape[1] = lhs_shape[1]; 322 323 // Create reference 324 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 325 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 326 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 327 328 const int n = rhs_shape[0]; 329 const int m = lhs_shape[1]; 330 const int batch_size = lhs_shape[2]; 331 332 // Fill reference 333 fill(lhs, 0); 334 fill(rhs, 1); 335 fill(bias, 2); 336 337 if(broadcast_bias) 338 { 339 // In case of broadcast, we need to simply copy the first into the following "M" ones 340 for(int i = 1; i < m * batch_size; i++) 341 { 342 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 343 } 344 } 345 346 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 347 } 348 349 TensorType _target{}; 350 SimpleTensor<T> _reference{}; 351 }; 352 353 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> 354 class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture 355 { 356 public: 357 template <typename...> setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,float alpha,float beta,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,DataType data_type,GPUTarget gpu_arch)358 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, 359 const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) 360 { 361 ARM_COMPUTE_UNUSED(broadcast_bias); 362 363 // In case of GEMM3D, m is the product between m_w and m_h 364 const unsigned int m = m_w * m_h; 365 366 // Set the tensor shapes for LHS and RHS matrices 367 const TensorShape lhs_shape(k, m, batch_size); 368 const TensorShape rhs_shape(n, k, batch_size); 369 const TensorShape bias_shape(n, 1, 1); 370 371 _target = compute_target(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, m_h, fp16_mixed_precision, act_info, gpu_arch); 372 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); 373 } 374 375 protected: 376 template <typename U> fill(U && tensor,int i)377 void fill(U &&tensor, int i) 378 { 379 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 380 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 381 382 DistributionType distribution{ T(-1.0f), T(1.0f) }; 383 library->fill(tensor, distribution, i); 384 } 385 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,DataType data_type,float alpha,float beta,unsigned int m_h,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,GPUTarget gpu_arch)386 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, unsigned int m_h, 387 bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch) 388 { 389 // Create tensors 390 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 391 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 392 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 393 TensorType dst; 394 395 const unsigned int m = lhs_shape[1]; 396 const unsigned int n = rhs_shape[0]; 397 const unsigned int k = lhs_shape[0]; 398 GEMMReshapeInfo reshape_info(m, n, k, 1, 1, m_h, false, true); 399 400 // The output tensor will be auto-initialized within the function 401 402 // Create and configure function 403 GEMMOperatorType gemm; 404 gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); 405 406 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 407 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 408 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 409 410 add_padding_x({ &lhs, &rhs, &bias, &dst }); 411 412 // Allocate tensors 413 lhs.allocator()->allocate(); 414 rhs.allocator()->allocate(); 415 bias.allocator()->allocate(); 416 dst.allocator()->allocate(); 417 418 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 419 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 420 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 421 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 422 423 // Fill tensors 424 fill(AccessorType(lhs), 0); 425 fill(AccessorType(rhs), 1); 426 fill(AccessorType(bias), 2); 427 428 // Compute GEMM 429 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, 430 { ACL_SRC_1, &rhs }, 431 { ACL_SRC_2, &bias }, 432 { ACL_DST, &dst } 433 }); 434 gemm.run(gemm_pack); 435 436 return dst; 437 } 438 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)439 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, 440 const ActivationLayerInfo &act_info) 441 { 442 TensorShape dst_shape = lhs_shape; 443 dst_shape.set(0, rhs_shape[0]); 444 dst_shape.set(1, lhs_shape[1] / m_h); 445 dst_shape.set(2, m_h); 446 dst_shape.set(3, lhs_shape[2]); 447 448 // Create reference 449 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 450 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 451 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 452 453 const int n = rhs_shape[0]; 454 const int m = lhs_shape[1]; 455 const int batch_size = lhs_shape[2]; 456 457 // Fill reference 458 fill(lhs, 0); 459 fill(rhs, 1); 460 fill(bias, 2); 461 462 // In case of broadcast, we need to simply copy the first into the following "M" ones 463 for(int i = 1; i < m * batch_size; i++) 464 { 465 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 466 } 467 468 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 469 } 470 471 TensorType _target{}; 472 SimpleTensor<T> _reference{}; 473 }; 474 475 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType> 476 class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture 477 { 478 public: 479 template <typename...> setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,float alpha,float beta,unsigned int v0,unsigned int h0,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,DataType data_type,GPUTarget gpu_arch)480 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision, 481 const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) 482 { 483 GEMMLHSMatrixInfo lhs_info; 484 lhs_info.m0 = 4; 485 lhs_info.k0 = 4; 486 lhs_info.v0 = v0; 487 lhs_info.interleave = true; 488 lhs_info.transpose = true; 489 490 GEMMRHSMatrixInfo rhs_info; 491 rhs_info.n0 = 16 / sizeof(T); 492 rhs_info.k0 = 1; 493 rhs_info.h0 = h0; 494 rhs_info.interleave = false; 495 rhs_info.transpose = false; 496 497 // Set the tensor shapes for LHS and RHS matrices 498 const TensorShape lhs_shape(k, m, batch_size); 499 const TensorShape rhs_shape(n, k, batch_size); 500 const TensorShape bias_shape(n, 501 broadcast_bias ? 1 : m, 502 broadcast_bias ? 1 : batch_size); 503 504 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, fp16_mixed_precision, act_info, gpu_arch); 505 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); 506 } 507 508 protected: 509 template <typename U> fill(U && tensor,int i)510 void fill(U &&tensor, int i) 511 { 512 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 513 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 514 515 DistributionType distribution{ T(-1.0f), T(1.0f) }; 516 library->fill(tensor, distribution, i); 517 518 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) 519 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; 520 library->fill_borders_with_garbage(tensor, distribution_inf, i); 521 } 522 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,GPUTarget gpu_arch)523 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 524 DataType data_type, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch) 525 { 526 // Create tensors 527 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 528 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 529 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 530 TensorType lhs_reshaped; 531 TensorType rhs_reshaped; 532 TensorType dst; 533 534 const unsigned int m = lhs_shape[1]; 535 const unsigned int n = rhs_shape[0]; 536 const unsigned int k = lhs_shape[0]; 537 GEMMReshapeInfo reshape_info(m, n, k, rhs_info.h0, lhs_info.v0, 0, false, broadcast_bias); 538 539 // The output tensor will be auto-initialized within the function 540 541 // Create and configure function 542 ReshapeLHSOperatorType reshape_lhs; 543 ReshapeRHSOperatorType reshape_rhs; 544 GEMMOperatorType gemm; 545 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); 546 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); 547 gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); 548 549 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 550 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 551 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 552 553 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions 554 if(!rhs_info.export_to_cl_image) 555 { 556 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); 557 } 558 559 // Allocate tensors 560 lhs.allocator()->allocate(); 561 rhs.allocator()->allocate(); 562 lhs_reshaped.allocator()->allocate(); 563 rhs_reshaped.allocator()->allocate(); 564 bias.allocator()->allocate(); 565 dst.allocator()->allocate(); 566 567 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 568 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 569 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 570 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); 571 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); 572 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 573 574 // Fill tensors 575 fill(AccessorType(lhs), 0); 576 fill(AccessorType(rhs), 1); 577 fill(AccessorType(bias), 2); 578 579 // Compute GEMM 580 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; 581 reshape_lhs.run(reshape_lhs_pack); 582 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; 583 reshape_rhs.run(reshape_rhs_pack); 584 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, 585 { ACL_SRC_1, &rhs_reshaped }, 586 { ACL_SRC_2, &bias }, 587 { ACL_DST, &dst } 588 }); 589 gemm.run(gemm_pack); 590 591 return dst; 592 } 593 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)594 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 595 const ActivationLayerInfo &act_info) 596 { 597 TensorShape dst_shape = lhs_shape; 598 dst_shape[0] = rhs_shape[0]; 599 dst_shape[1] = lhs_shape[1]; 600 601 // Create reference 602 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 603 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 604 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 605 606 const int n = rhs_shape[0]; 607 const int m = lhs_shape[1]; 608 const int batch_size = lhs_shape[2]; 609 610 // Fill reference 611 fill(lhs, 0); 612 fill(rhs, 1); 613 fill(bias, 2); 614 615 if(broadcast_bias) 616 { 617 // In case of broadcast, we need to simply copy the first into the following "M" ones 618 for(int i = 1; i < m * batch_size; i++) 619 { 620 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 621 } 622 } 623 624 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 625 } 626 627 TensorType _target{}; 628 SimpleTensor<T> _reference{}; 629 }; 630 631 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType> 632 class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture 633 { 634 public: 635 template <typename...> setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,float alpha,float beta,unsigned int v0,unsigned int h0,bool broadcast_bias,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,DataType data_type,GPUTarget gpu_arch)636 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, 637 bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) 638 { 639 ARM_COMPUTE_UNUSED(broadcast_bias); 640 641 GEMMLHSMatrixInfo lhs_info; 642 lhs_info.m0 = 4; 643 lhs_info.k0 = 4; 644 lhs_info.v0 = v0; 645 lhs_info.interleave = true; 646 lhs_info.transpose = true; 647 648 GEMMRHSMatrixInfo rhs_info; 649 rhs_info.n0 = 16 / sizeof(T); 650 rhs_info.k0 = 1; 651 rhs_info.h0 = h0; 652 rhs_info.interleave = false; 653 rhs_info.transpose = false; 654 655 // In case of GEMM3D, m is the product between m_w and m_h 656 const unsigned int m = m_w * m_h; 657 658 // Set the tensor shapes for LHS and RHS matrices 659 const TensorShape lhs_shape(k, m, batch_size); 660 const TensorShape rhs_shape(n, k, batch_size); 661 const TensorShape bias_shape(n, 1, 1); 662 663 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, fp16_mixed_precision, act_info, gpu_arch); 664 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); 665 } 666 667 protected: 668 template <typename U> fill(U && tensor,int i)669 void fill(U &&tensor, int i) 670 { 671 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 672 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 673 674 DistributionType distribution{ T(-1.0f), T(1.0f) }; 675 library->fill(tensor, distribution, i); 676 } 677 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,unsigned int m_h,bool fp16_mixed_precision,const ActivationLayerInfo & act_info,GPUTarget gpu_arch)678 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 679 DataType data_type, float alpha, float beta, unsigned int m_h, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch) 680 { 681 // Create tensors 682 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 683 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 684 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 685 TensorType lhs_reshaped; 686 TensorType rhs_reshaped; 687 TensorType dst; 688 689 const unsigned int m = lhs_shape[1]; 690 const unsigned int n = rhs_shape[0]; 691 const unsigned int k = lhs_shape[0]; 692 GEMMReshapeInfo reshape_info(m, n, k, rhs_info.h0, lhs_info.v0, m_h, false, true); 693 694 // The output tensor will be auto-initialized within the function 695 696 // Create and configure function 697 ReshapeLHSOperatorType reshape_lhs; 698 ReshapeRHSOperatorType reshape_rhs; 699 GEMMOperatorType gemm; 700 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); 701 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); 702 gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); 703 704 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 705 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 706 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 707 708 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions 709 if(!rhs_info.export_to_cl_image) 710 { 711 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); 712 } 713 714 // Allocate tensors 715 lhs.allocator()->allocate(); 716 rhs.allocator()->allocate(); 717 lhs_reshaped.allocator()->allocate(); 718 rhs_reshaped.allocator()->allocate(); 719 bias.allocator()->allocate(); 720 dst.allocator()->allocate(); 721 722 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 723 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 724 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); 725 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); 726 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 727 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 728 729 // Fill tensors 730 fill(AccessorType(lhs), 0); 731 fill(AccessorType(rhs), 1); 732 fill(AccessorType(bias), 2); 733 734 // Compute GEMM 735 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; 736 reshape_lhs.run(reshape_lhs_pack); 737 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; 738 reshape_rhs.run(reshape_rhs_pack); 739 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, 740 { ACL_SRC_1, &rhs_reshaped }, 741 { ACL_SRC_2, &bias }, 742 { ACL_DST, &dst } 743 }); 744 gemm.run(gemm_pack); 745 746 return dst; 747 } 748 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)749 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, 750 const ActivationLayerInfo &act_info) 751 { 752 TensorShape dst_shape = lhs_shape; 753 dst_shape.set(0, rhs_shape[0]); 754 dst_shape.set(1, lhs_shape[1] / m_h); 755 dst_shape.set(2, m_h); 756 dst_shape.set(3, lhs_shape[2]); 757 758 // Create reference 759 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 760 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 761 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 762 763 const int n = rhs_shape[0]; 764 const int m = lhs_shape[1]; 765 const int batch_size = lhs_shape[2]; 766 767 // Fill reference 768 fill(lhs, 0); 769 fill(rhs, 1); 770 fill(bias, 2); 771 772 // In case of broadcast, we need to simply copy the first into the following "M" ones 773 for(int i = 1; i < m * batch_size; i++) 774 { 775 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 776 } 777 778 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 779 } 780 781 TensorType _target{}; 782 SimpleTensor<T> _reference{}; 783 }; 784 785 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false> 786 class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture 787 { 788 public: 789 template <typename...> setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,bool lhs_transpose,const ActivationLayerInfo & act_info)790 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, 791 bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info) 792 { 793 GEMMLHSMatrixInfo lhs_info; 794 lhs_info.m0 = m0; 795 lhs_info.k0 = k0; 796 lhs_info.v0 = v0; 797 lhs_info.interleave = interleave_lhs; 798 lhs_info.transpose = lhs_transpose; 799 800 GEMMRHSMatrixInfo rhs_info; 801 rhs_info.n0 = n0; 802 rhs_info.k0 = k0; 803 rhs_info.h0 = h0; 804 rhs_info.interleave = interleave_rhs; 805 rhs_info.transpose = !lhs_transpose; 806 rhs_info.export_to_cl_image = export_to_cl_image; 807 808 // Set the tensor shapes for LHS and RHS matrices 809 const TensorShape lhs_shape(k, m, batch_size); 810 const TensorShape rhs_shape(n, k, batch_size); 811 const TensorShape bias_shape(n, 812 broadcast_bias ? 1 : m, 813 broadcast_bias ? 1 : batch_size); 814 815 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); 816 if(validate_result) 817 { 818 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); 819 } 820 } 821 822 protected: 823 template <typename U> fill(U && tensor,int i)824 void fill(U &&tensor, int i) 825 { 826 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 827 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 828 829 DistributionType distribution{ T(-1.0f), T(1.0f) }; 830 library->fill(tensor, distribution, i); 831 832 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) 833 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; 834 library->fill_borders_with_garbage(tensor, distribution_inf, i); 835 } 836 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)837 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 838 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) 839 { 840 // Create tensors 841 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 842 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 843 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 844 TensorType lhs_reshaped; 845 TensorType rhs_reshaped; 846 TensorType dst; 847 848 const unsigned int M = lhs_shape[1]; 849 const unsigned int N = rhs_shape[0]; 850 const unsigned int K = lhs_shape[0]; 851 GEMMKernelInfo kernel_info; 852 kernel_info.m = M; 853 kernel_info.n = N; 854 kernel_info.k = K; 855 kernel_info.depth_output_gemm3d = 0; 856 kernel_info.reinterpret_input_as_3d = false; 857 kernel_info.broadcast_bias = broadcast_bias; 858 kernel_info.activation_info = act_info; 859 kernel_info.fp_mixed_precision = fp_mixed_precision; 860 861 // The output tensor will be auto-initialized within the function 862 863 // Create and configure function 864 ReshapeLHSOperatorType reshape_lhs; 865 ReshapeRHSOperatorType reshape_rhs; 866 GEMMOperatorType gemm; 867 868 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); 869 validate_result = validate_result || !rhs_info.export_to_cl_image; 870 if(!validate_result) 871 { 872 return nullptr; 873 } 874 875 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); 876 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); 877 gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 878 879 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 880 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 881 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 882 883 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions 884 if(!rhs_info.export_to_cl_image) 885 { 886 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); 887 } 888 889 // Allocate tensors 890 lhs.allocator()->allocate(); 891 rhs.allocator()->allocate(); 892 lhs_reshaped.allocator()->allocate(); 893 rhs_reshaped.allocator()->allocate(); 894 bias.allocator()->allocate(); 895 dst.allocator()->allocate(); 896 897 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 898 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 899 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 900 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); 901 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); 902 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 903 904 // Fill tensors 905 fill(AccessorType(lhs), 0); 906 fill(AccessorType(rhs), 1); 907 fill(AccessorType(bias), 2); 908 909 // Compute GEMM 910 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; 911 reshape_lhs.run(reshape_lhs_pack); 912 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; 913 reshape_rhs.run(reshape_rhs_pack); 914 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, 915 { ACL_SRC_1, &rhs_reshaped }, 916 { ACL_SRC_2, &bias }, 917 { ACL_DST, &dst } 918 }); 919 gemm.run(gemm_pack); 920 921 return dst; 922 } 923 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)924 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 925 const ActivationLayerInfo &act_info) 926 { 927 TensorShape dst_shape = lhs_shape; 928 dst_shape[0] = rhs_shape[0]; 929 dst_shape[1] = lhs_shape[1]; 930 931 // Create reference 932 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 933 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 934 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 935 936 const int n = rhs_shape[0]; 937 const int m = lhs_shape[1]; 938 const int batch_size = lhs_shape[2]; 939 940 // Fill reference 941 fill(lhs, 0); 942 fill(rhs, 1); 943 fill(bias, 2); 944 945 if(broadcast_bias) 946 { 947 // In case of broadcast, we need to simply copy the first into the following "M" ones 948 for(int i = 1; i < m * batch_size; i++) 949 { 950 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 951 } 952 } 953 954 if(fp_mixed_precision) 955 { 956 return reference::activation_layer(reference::gemm_mixed_precision<T>(lhs, rhs, bias, alpha, beta), act_info); 957 } 958 else 959 { 960 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 961 } 962 } 963 964 bool validate_result = true; 965 TensorType _target{}; 966 SimpleTensor<T> _reference{}; 967 }; 968 969 /** (EXPERIMENTAL_POST_OPS)*/ 970 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false> 971 class GEMMMatrixMultiplyReshapedWithPostOpsValidationFixture : public framework::Fixture 972 { 973 public: 974 using PostOpArgBroadcast = std::tuple<bool, bool, bool>; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument 975 public: 976 template <typename...> setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,bool lhs_transpose,const ActivationLayerInfo & act_info,const experimental::PostOpList<PostOpArgBroadcast> & post_ops)977 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, 978 bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info, 979 const experimental::PostOpList<PostOpArgBroadcast> &post_ops) 980 { 981 GEMMLHSMatrixInfo lhs_info; 982 lhs_info.m0 = m0; 983 lhs_info.k0 = k0; 984 lhs_info.v0 = v0; 985 lhs_info.interleave = interleave_lhs; 986 lhs_info.transpose = lhs_transpose; 987 988 GEMMRHSMatrixInfo rhs_info; 989 rhs_info.n0 = n0; 990 rhs_info.k0 = k0; 991 rhs_info.h0 = h0; 992 rhs_info.interleave = interleave_rhs; 993 rhs_info.transpose = !lhs_transpose; 994 rhs_info.export_to_cl_image = export_to_cl_image; 995 996 // Set the tensor shapes for LHS and RHS matrices 997 const TensorShape lhs_shape(k, m, batch_size); 998 const TensorShape rhs_shape(n, k, batch_size); 999 const TensorShape bias_shape(n, 1000 broadcast_bias ? 1 : m, 1001 broadcast_bias ? 1 : batch_size); 1002 auto post_ops_with_shapes = experimental::transform_post_op_list_arguments<PostOpArgBroadcast, TensorShape>(post_ops, 1003 [ = ](auto broadcast) 1004 { 1005 return TensorShape 1006 { 1007 std::get<0>(broadcast) ? 1 : n, 1008 std::get<1>(broadcast) ? 1 : m, 1009 std::get<2>(broadcast) ? 1 : batch_size, 1010 }; 1011 }); 1012 1013 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); 1014 if(validate_result) 1015 { 1016 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); 1017 } 1018 } 1019 1020 protected: 1021 template <typename U> fill(U && tensor,int i)1022 void fill(U &&tensor, int i) 1023 { 1024 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 1025 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 1026 1027 DistributionType distribution{ T(-1.0f), T(1.0f) }; 1028 library->fill(tensor, distribution, i); 1029 1030 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) 1031 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; 1032 library->fill_borders_with_garbage(tensor, distribution_inf, i); 1033 } 1034 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)1035 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 1036 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops) 1037 { 1038 // Create tensors 1039 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 1040 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 1041 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 1042 1043 // Create post op tensors and populate post op with them 1044 std::vector<TensorType> post_op_tensors_holder{}; 1045 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, ITensorInfo *>(post_ops, 1046 [&post_op_tensors_holder, &data_type](auto shape) 1047 { 1048 auto t = create_tensor<TensorType>(shape, data_type, 1); 1049 post_op_tensors_holder.push_back(std::move(t)); 1050 return post_op_tensors_holder.back().info(); 1051 }); 1052 TensorType lhs_reshaped; 1053 TensorType rhs_reshaped; 1054 TensorType dst; 1055 1056 const unsigned int M = lhs_shape[1]; 1057 const unsigned int N = rhs_shape[0]; 1058 const unsigned int K = lhs_shape[0]; 1059 GEMMKernelInfo kernel_info; 1060 kernel_info.m = M; 1061 kernel_info.n = N; 1062 kernel_info.k = K; 1063 kernel_info.depth_output_gemm3d = 0; 1064 kernel_info.reinterpret_input_as_3d = false; 1065 kernel_info.broadcast_bias = broadcast_bias; 1066 kernel_info.activation_info = act_info; 1067 kernel_info.fp_mixed_precision = fp_mixed_precision; 1068 kernel_info.post_ops = populated_post_ops; 1069 1070 // The output tensor will be auto-initialized within the function 1071 1072 // Create and configure function 1073 ReshapeLHSOperatorType reshape_lhs; 1074 ReshapeRHSOperatorType reshape_rhs; 1075 GEMMOperatorType gemm; 1076 1077 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); 1078 validate_result = validate_result || !rhs_info.export_to_cl_image; 1079 if(!validate_result) 1080 { 1081 return nullptr; 1082 } 1083 1084 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); 1085 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); 1086 gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 1087 1088 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 1089 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 1090 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 1091 for(const auto &tensor : post_op_tensors_holder) 1092 { 1093 ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); 1094 } 1095 1096 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions 1097 if(!rhs_info.export_to_cl_image) 1098 { 1099 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); 1100 for(auto &tensor : post_op_tensors_holder) 1101 { 1102 add_padding_x({ &tensor }); 1103 } 1104 } 1105 1106 // Allocate tensors 1107 lhs.allocator()->allocate(); 1108 rhs.allocator()->allocate(); 1109 lhs_reshaped.allocator()->allocate(); 1110 rhs_reshaped.allocator()->allocate(); 1111 bias.allocator()->allocate(); 1112 dst.allocator()->allocate(); 1113 for(auto &tensor : post_op_tensors_holder) 1114 { 1115 tensor.allocator()->allocate(); 1116 } 1117 1118 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 1119 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 1120 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 1121 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); 1122 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); 1123 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 1124 for(const auto &tensor : post_op_tensors_holder) 1125 { 1126 ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); 1127 } 1128 1129 // Fill tensors 1130 fill(AccessorType(lhs), 0); 1131 fill(AccessorType(rhs), 1); 1132 fill(AccessorType(bias), 2); 1133 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) 1134 { 1135 fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i); 1136 } 1137 1138 // Compute GEMM 1139 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; 1140 reshape_lhs.run(reshape_lhs_pack); 1141 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; 1142 reshape_rhs.run(reshape_rhs_pack); 1143 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, 1144 { ACL_SRC_1, &rhs_reshaped }, 1145 { ACL_SRC_2, &bias }, 1146 { ACL_DST, &dst } 1147 }); 1148 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) 1149 { 1150 gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i)); 1151 } 1152 gemm.run(gemm_pack); 1153 1154 return dst; 1155 } 1156 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)1157 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 1158 const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops) 1159 { 1160 TensorShape dst_shape = lhs_shape; 1161 dst_shape[0] = rhs_shape[0]; 1162 dst_shape[1] = lhs_shape[1]; 1163 1164 // Create reference 1165 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 1166 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 1167 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 1168 // Create post op tensors and populate post op with them 1169 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, SimpleTensor<T>>(post_ops, [&data_type](auto shape) 1170 { 1171 return SimpleTensor<T> { shape, data_type, 1 }; 1172 }); 1173 1174 const int n = rhs_shape[0]; 1175 const int m = lhs_shape[1]; 1176 const int batch_size = lhs_shape[2]; 1177 1178 // Fill reference 1179 int tensor_idx = 0; 1180 fill(lhs, tensor_idx++); 1181 fill(rhs, tensor_idx++); 1182 fill(bias, tensor_idx++); 1183 for(auto &op : populated_post_ops.get_list()) 1184 { 1185 for(auto tensor : op->arguments()) 1186 { 1187 fill(*tensor, tensor_idx++); 1188 } 1189 } 1190 1191 if(broadcast_bias) 1192 { 1193 // In case of broadcast, we need to simply copy the first into the following "M" ones 1194 for(int i = 1; i < m * batch_size; i++) 1195 { 1196 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 1197 } 1198 } 1199 1200 SimpleTensor<T> out; 1201 if(fp_mixed_precision) 1202 { 1203 out = reference::gemm_mixed_precision<T>(lhs, rhs, bias, alpha, beta); 1204 } 1205 else 1206 { 1207 out = reference::gemm<T>(lhs, rhs, bias, alpha, beta); 1208 } 1209 // Ignore activation info if post ops are used instead 1210 if(populated_post_ops.size() > 0) 1211 { 1212 out = reference::post_ops<T>(out, populated_post_ops); 1213 } 1214 else 1215 { 1216 out = reference::activation_layer(out, act_info); 1217 } 1218 return out; 1219 } 1220 1221 bool validate_result = true; 1222 TensorType _target{}; 1223 SimpleTensor<T> _reference{}; 1224 }; 1225 1226 template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false> 1227 class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture 1228 { 1229 public: 1230 template <typename...> setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool lhs_transpose,const ActivationLayerInfo & act_info)1231 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, 1232 bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info) 1233 { 1234 GEMMLHSMatrixInfo lhs_info; 1235 lhs_info.m0 = m0; 1236 lhs_info.k0 = k0; 1237 lhs_info.v0 = v0; 1238 lhs_info.interleave = interleave_lhs; 1239 lhs_info.transpose = lhs_transpose; 1240 1241 GEMMRHSMatrixInfo rhs_info; 1242 rhs_info.n0 = n0; 1243 rhs_info.k0 = k0; 1244 rhs_info.h0 = h0; 1245 rhs_info.interleave = interleave_rhs; 1246 rhs_info.transpose = !lhs_transpose; 1247 rhs_info.export_to_cl_image = export_to_cl_image; 1248 1249 // In case of GEMM3D, m is the product between m_w and m_h 1250 const unsigned int m = m_w * m_h; 1251 1252 // Set the tensor shapes for LHS and RHS matrices 1253 const TensorShape lhs_shape(k, m, batch_size); 1254 const TensorShape rhs_shape(n, k, batch_size); 1255 const TensorShape bias_shape(n, 1, 1); 1256 1257 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info); 1258 if(validate_result) 1259 { 1260 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); 1261 } 1262 } 1263 1264 protected: 1265 template <typename U> fill(U && tensor,int i)1266 void fill(U &&tensor, int i) 1267 { 1268 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 1269 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 1270 1271 DistributionType distribution{ T(-1.0f), T(1.0f) }; 1272 library->fill(tensor, distribution, i); 1273 } 1274 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)1275 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 1276 DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) 1277 { 1278 // Create tensors 1279 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 1280 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 1281 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 1282 TensorType lhs_reshaped; 1283 TensorType rhs_reshaped; 1284 TensorType dst; 1285 1286 const unsigned int M = lhs_shape[1]; 1287 const unsigned int N = rhs_shape[0]; 1288 const unsigned int K = lhs_shape[0]; 1289 GEMMKernelInfo kernel_info; 1290 kernel_info.m = M; 1291 kernel_info.n = N; 1292 kernel_info.k = K; 1293 kernel_info.depth_output_gemm3d = m_h; 1294 kernel_info.reinterpret_input_as_3d = false; 1295 kernel_info.broadcast_bias = true; 1296 kernel_info.activation_info = act_info; 1297 kernel_info.fp_mixed_precision = fp_mixed_precision; 1298 1299 // The output tensor will be auto-initialized within the function 1300 1301 // Create and configure function 1302 ReshapeLHSOperatorType reshape_lhs; 1303 ReshapeRHSOperatorType reshape_rhs; 1304 GEMMOperatorType gemm; 1305 1306 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); 1307 validate_result = validate_result || !rhs_info.export_to_cl_image; 1308 if(!validate_result) 1309 { 1310 return nullptr; 1311 } 1312 1313 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); 1314 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); 1315 gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 1316 1317 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 1318 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 1319 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 1320 1321 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions 1322 if(!rhs_info.export_to_cl_image) 1323 { 1324 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); 1325 } 1326 1327 // Allocate tensors 1328 lhs.allocator()->allocate(); 1329 rhs.allocator()->allocate(); 1330 lhs_reshaped.allocator()->allocate(); 1331 rhs_reshaped.allocator()->allocate(); 1332 bias.allocator()->allocate(); 1333 dst.allocator()->allocate(); 1334 1335 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 1336 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 1337 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); 1338 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); 1339 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 1340 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 1341 1342 // Fill tensors 1343 fill(AccessorType(lhs), 0); 1344 fill(AccessorType(rhs), 1); 1345 fill(AccessorType(bias), 2); 1346 1347 // Compute GEMM 1348 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; 1349 reshape_lhs.run(reshape_lhs_pack); 1350 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; 1351 reshape_rhs.run(reshape_rhs_pack); 1352 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, 1353 { ACL_SRC_1, &rhs_reshaped }, 1354 { ACL_SRC_2, &bias }, 1355 { ACL_DST, &dst } 1356 }); 1357 gemm.run(gemm_pack); 1358 1359 return dst; 1360 } 1361 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)1362 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, 1363 const ActivationLayerInfo &act_info) 1364 { 1365 TensorShape dst_shape = lhs_shape; 1366 dst_shape.set(0, rhs_shape[0]); 1367 dst_shape.set(1, lhs_shape[1] / m_h); 1368 dst_shape.set(2, m_h); 1369 dst_shape.set(3, lhs_shape[2]); 1370 1371 // Create reference 1372 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 1373 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 1374 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 1375 1376 const int n = rhs_shape[0]; 1377 const int m = lhs_shape[1]; 1378 const int batch_size = lhs_shape[2]; 1379 1380 // Fill reference 1381 fill(lhs, 0); 1382 fill(rhs, 1); 1383 fill(bias, 2); 1384 1385 // In case of broadcast, we need to simply copy the first into the following "M" ones 1386 for(int i = 1; i < m * batch_size; i++) 1387 { 1388 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 1389 } 1390 1391 if(fp_mixed_precision) 1392 { 1393 return reference::activation_layer(reference::gemm_mixed_precision<T>(lhs, rhs, bias, alpha, beta), act_info); 1394 } 1395 else 1396 { 1397 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 1398 } 1399 } 1400 1401 bool validate_result = true; 1402 TensorType _target{}; 1403 SimpleTensor<T> _reference{}; 1404 }; 1405 1406 template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> 1407 class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture 1408 { 1409 public: 1410 template <typename...> setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)1411 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, 1412 bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) 1413 { 1414 GEMMLHSMatrixInfo lhs_info; 1415 lhs_info.m0 = m0; 1416 lhs_info.k0 = k0; 1417 1418 GEMMRHSMatrixInfo rhs_info; 1419 rhs_info.n0 = n0; 1420 rhs_info.k0 = k0; 1421 rhs_info.h0 = h0; 1422 rhs_info.interleave = interleave_rhs; 1423 rhs_info.transpose = transpose_rhs; 1424 rhs_info.export_to_cl_image = export_to_cl_image; 1425 1426 // Set the tensor shapes for LHS and RHS matrices 1427 const TensorShape lhs_shape(k, m, batch_size); 1428 const TensorShape rhs_shape(n, k, batch_size); 1429 const TensorShape bias_shape(n, 1430 broadcast_bias ? 1 : m, 1431 broadcast_bias ? 1 : batch_size); 1432 1433 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); 1434 if(validate_result) 1435 { 1436 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); 1437 } 1438 } 1439 1440 protected: 1441 template <typename U> fill(U && tensor,int i)1442 void fill(U &&tensor, int i) 1443 { 1444 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 1445 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 1446 1447 DistributionType distribution{ T(-1.0f), T(1.0f) }; 1448 library->fill(tensor, distribution, i); 1449 1450 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) 1451 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; 1452 library->fill_borders_with_garbage(tensor, distribution_inf, i); 1453 } 1454 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)1455 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 1456 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) 1457 { 1458 // Create tensors 1459 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 1460 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 1461 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 1462 TensorType rhs_reshaped; 1463 TensorType dst; 1464 1465 const unsigned int M = lhs_shape[1]; 1466 const unsigned int N = rhs_shape[0]; 1467 const unsigned int K = lhs_shape[0]; 1468 GEMMKernelInfo kernel_info; 1469 kernel_info.m = M; 1470 kernel_info.n = N; 1471 kernel_info.k = K; 1472 kernel_info.depth_output_gemm3d = 0; 1473 kernel_info.reinterpret_input_as_3d = false; 1474 kernel_info.broadcast_bias = broadcast_bias; 1475 kernel_info.activation_info = act_info; 1476 1477 // The output tensor will be auto-initialized within the function 1478 1479 // Create and configure function 1480 ReshapeRHSOperatorType reshape_rhs; 1481 GEMMOperatorType gemm; 1482 1483 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); 1484 validate_result = validate_result || !rhs_info.export_to_cl_image; 1485 if(!validate_result) 1486 { 1487 return nullptr; 1488 } 1489 1490 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); 1491 gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 1492 1493 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 1494 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 1495 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 1496 1497 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions 1498 if(!rhs_info.export_to_cl_image) 1499 { 1500 add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); 1501 } 1502 1503 // Allocate tensors 1504 lhs.allocator()->allocate(); 1505 rhs.allocator()->allocate(); 1506 rhs_reshaped.allocator()->allocate(); 1507 bias.allocator()->allocate(); 1508 dst.allocator()->allocate(); 1509 1510 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 1511 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 1512 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); 1513 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 1514 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 1515 1516 // Fill tensors 1517 fill(AccessorType(lhs), 0); 1518 fill(AccessorType(rhs), 1); 1519 fill(AccessorType(bias), 2); 1520 1521 // Compute GEMM 1522 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; 1523 reshape_rhs.run(reshape_rhs_pack); 1524 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, 1525 { ACL_SRC_1, &rhs_reshaped }, 1526 { ACL_SRC_2, &bias }, 1527 { ACL_DST, &dst } 1528 }); 1529 gemm.run(gemm_pack); 1530 1531 return dst; 1532 } 1533 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)1534 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 1535 const ActivationLayerInfo &act_info) 1536 { 1537 TensorShape dst_shape = lhs_shape; 1538 dst_shape[0] = rhs_shape[0]; 1539 dst_shape[1] = lhs_shape[1]; 1540 1541 // Create reference 1542 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 1543 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 1544 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 1545 1546 const int n = rhs_shape[0]; 1547 const int m = lhs_shape[1]; 1548 const int batch_size = lhs_shape[2]; 1549 1550 // Fill reference 1551 fill(lhs, 0); 1552 fill(rhs, 1); 1553 fill(bias, 2); 1554 1555 if(broadcast_bias) 1556 { 1557 // In case of broadcast, we need to simply copy the first into the following "M" ones 1558 for(int i = 1; i < m * batch_size; i++) 1559 { 1560 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 1561 } 1562 } 1563 1564 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 1565 } 1566 1567 bool validate_result = true; 1568 TensorType _target{}; 1569 SimpleTensor<T> _reference{}; 1570 }; 1571 1572 /** (EXPERIMENTAL_POST_OPS)*/ 1573 template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> 1574 class GEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsValidationFixture : public framework::Fixture 1575 { 1576 public: 1577 using PostOpArgBroadcast = std::tuple<bool, bool, bool>; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument 1578 template <typename...> setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<PostOpArgBroadcast> & post_ops)1579 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, 1580 bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, 1581 const experimental::PostOpList<PostOpArgBroadcast> &post_ops) 1582 { 1583 GEMMLHSMatrixInfo lhs_info; 1584 lhs_info.m0 = m0; 1585 lhs_info.k0 = k0; 1586 1587 GEMMRHSMatrixInfo rhs_info; 1588 rhs_info.n0 = n0; 1589 rhs_info.k0 = k0; 1590 rhs_info.h0 = h0; 1591 rhs_info.interleave = interleave_rhs; 1592 rhs_info.transpose = transpose_rhs; 1593 rhs_info.export_to_cl_image = export_to_cl_image; 1594 1595 // Set the tensor shapes for LHS and RHS matrices 1596 const TensorShape lhs_shape(k, m, batch_size); 1597 const TensorShape rhs_shape(n, k, batch_size); 1598 const TensorShape bias_shape(n, 1599 broadcast_bias ? 1 : m, 1600 broadcast_bias ? 1 : batch_size); 1601 auto post_ops_with_shapes = experimental::transform_post_op_list_arguments<PostOpArgBroadcast, TensorShape>(post_ops, 1602 [ = ](auto broadcast) 1603 { 1604 return TensorShape 1605 { 1606 std::get<0>(broadcast) ? 1 : n, 1607 std::get<1>(broadcast) ? 1 : m, 1608 std::get<2>(broadcast) ? 1 : batch_size, 1609 }; 1610 }); 1611 1612 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); 1613 if(validate_result) 1614 { 1615 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); 1616 } 1617 } 1618 1619 protected: 1620 template <typename U> fill(U && tensor,int i)1621 void fill(U &&tensor, int i) 1622 { 1623 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 1624 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 1625 1626 DistributionType distribution{ T(-1.0f), T(1.0f) }; 1627 library->fill(tensor, distribution, i); 1628 1629 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) 1630 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; 1631 library->fill_borders_with_garbage(tensor, distribution_inf, i); 1632 } 1633 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)1634 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 1635 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops) 1636 { 1637 // Create tensors 1638 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 1639 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 1640 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 1641 TensorType rhs_reshaped; 1642 TensorType dst; 1643 // Create post op tensors and populate post op with them 1644 std::vector<TensorType> post_op_tensors_holder{}; 1645 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, ITensorInfo *>(post_ops, 1646 [&post_op_tensors_holder, &data_type](auto shape) 1647 { 1648 auto t = create_tensor<TensorType>(shape, data_type, 1); 1649 post_op_tensors_holder.push_back(std::move(t)); 1650 return post_op_tensors_holder.back().info(); 1651 }); 1652 1653 const unsigned int M = lhs_shape[1]; 1654 const unsigned int N = rhs_shape[0]; 1655 const unsigned int K = lhs_shape[0]; 1656 GEMMKernelInfo kernel_info; 1657 kernel_info.m = M; 1658 kernel_info.n = N; 1659 kernel_info.k = K; 1660 kernel_info.depth_output_gemm3d = 0; 1661 kernel_info.reinterpret_input_as_3d = false; 1662 kernel_info.broadcast_bias = broadcast_bias; 1663 kernel_info.activation_info = act_info; 1664 kernel_info.post_ops = populated_post_ops; 1665 1666 // The output tensor will be auto-initialized within the function 1667 1668 // Create and configure function 1669 ReshapeRHSOperatorType reshape_rhs; 1670 GEMMOperatorType gemm; 1671 1672 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); 1673 validate_result = validate_result || !rhs_info.export_to_cl_image; 1674 if(!validate_result) 1675 { 1676 return nullptr; 1677 } 1678 1679 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); 1680 gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 1681 1682 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 1683 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 1684 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 1685 for(const auto &tensor : post_op_tensors_holder) 1686 { 1687 ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); 1688 } 1689 1690 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions 1691 if(!rhs_info.export_to_cl_image) 1692 { 1693 add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); 1694 for(auto &tensor : post_op_tensors_holder) 1695 { 1696 add_padding_x({ &tensor }); 1697 } 1698 } 1699 1700 // Allocate tensors 1701 lhs.allocator()->allocate(); 1702 rhs.allocator()->allocate(); 1703 rhs_reshaped.allocator()->allocate(); 1704 bias.allocator()->allocate(); 1705 dst.allocator()->allocate(); 1706 for(auto &tensor : post_op_tensors_holder) 1707 { 1708 tensor.allocator()->allocate(); 1709 } 1710 1711 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 1712 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 1713 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); 1714 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 1715 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 1716 for(const auto &tensor : post_op_tensors_holder) 1717 { 1718 ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); 1719 } 1720 1721 // Fill tensors 1722 fill(AccessorType(lhs), 0); 1723 fill(AccessorType(rhs), 1); 1724 fill(AccessorType(bias), 2); 1725 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) 1726 { 1727 fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i); 1728 } 1729 1730 // Compute GEMM 1731 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; 1732 reshape_rhs.run(reshape_rhs_pack); 1733 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, 1734 { ACL_SRC_1, &rhs_reshaped }, 1735 { ACL_SRC_2, &bias }, 1736 { ACL_DST, &dst } 1737 }); 1738 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) 1739 { 1740 gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i)); 1741 } 1742 gemm.run(gemm_pack); 1743 1744 return dst; 1745 } 1746 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)1747 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 1748 const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops) 1749 { 1750 TensorShape dst_shape = lhs_shape; 1751 dst_shape[0] = rhs_shape[0]; 1752 dst_shape[1] = lhs_shape[1]; 1753 1754 // Create reference 1755 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 1756 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 1757 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 1758 // Create post op tensors and populate post op with them 1759 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, SimpleTensor<T>>(post_ops, [&data_type](auto shape) 1760 { 1761 return SimpleTensor<T> { shape, data_type, 1 }; 1762 }); 1763 1764 const int n = rhs_shape[0]; 1765 const int m = lhs_shape[1]; 1766 const int batch_size = lhs_shape[2]; 1767 1768 // Fill reference 1769 int tensor_idx = 0; 1770 fill(lhs, tensor_idx++); 1771 fill(rhs, tensor_idx++); 1772 fill(bias, tensor_idx++); 1773 for(auto &op : populated_post_ops.get_list()) 1774 { 1775 for(auto tensor : op->arguments()) 1776 { 1777 fill(*tensor, tensor_idx++); 1778 } 1779 } 1780 1781 if(broadcast_bias) 1782 { 1783 // In case of broadcast, we need to simply copy the first into the following "M" ones 1784 for(int i = 1; i < m * batch_size; i++) 1785 { 1786 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 1787 } 1788 } 1789 1790 SimpleTensor<T> out; 1791 out = reference::gemm<T>(lhs, rhs, bias, alpha, beta); 1792 // Ignore activation info if post ops are used instead 1793 if(populated_post_ops.size() > 0) 1794 { 1795 out = reference::post_ops<T>(out, populated_post_ops); 1796 } 1797 else 1798 { 1799 out = reference::activation_layer(out, act_info); 1800 } 1801 return out; 1802 } 1803 1804 bool validate_result = true; 1805 TensorType _target{}; 1806 SimpleTensor<T> _reference{}; 1807 }; 1808 1809 template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> 1810 class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture 1811 { 1812 public: 1813 template <typename...> setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,bool export_to_cl_image,bool has_pad_y,DataType data_type,float alpha,float beta,const ActivationLayerInfo & act_info)1814 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, 1815 bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info) 1816 { 1817 GEMMLHSMatrixInfo lhs_info; 1818 lhs_info.m0 = m0; 1819 lhs_info.k0 = k0; 1820 1821 GEMMRHSMatrixInfo rhs_info; 1822 rhs_info.n0 = n0; 1823 rhs_info.k0 = k0; 1824 rhs_info.h0 = h0; 1825 rhs_info.interleave = interleave_rhs; 1826 rhs_info.transpose = transpose_rhs; 1827 rhs_info.export_to_cl_image = export_to_cl_image; 1828 1829 // In case of GEMM3D, m is the product between m_w and m_h 1830 const unsigned int m = m_w * m_h; 1831 1832 // Set the tensor shapes for LHS and RHS matrices 1833 const TensorShape lhs_shape(k, m, batch_size); 1834 const TensorShape rhs_shape(n, k, batch_size); 1835 const TensorShape bias_shape(n, 1, 1); 1836 1837 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info, has_pad_y); 1838 if(validate_result) 1839 { 1840 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); 1841 } 1842 } 1843 1844 protected: 1845 template <typename U> fill(U && tensor,int i)1846 void fill(U &&tensor, int i) 1847 { 1848 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 1849 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 1850 1851 DistributionType distribution{ T(-1.0f), T(1.0f) }; 1852 library->fill(tensor, distribution, i); 1853 } 1854 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info,bool has_pad_y)1855 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 1856 DataType data_type, float alpha, float beta, 1857 unsigned int m_h, const ActivationLayerInfo &act_info, bool has_pad_y) 1858 { 1859 // Create tensors 1860 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 1861 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 1862 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 1863 TensorType rhs_reshaped; 1864 TensorType dst; 1865 1866 const unsigned int M = lhs_shape[1]; 1867 const unsigned int N = rhs_shape[0]; 1868 const unsigned int K = lhs_shape[0]; 1869 GEMMKernelInfo kernel_info; 1870 kernel_info.m = M; 1871 kernel_info.n = N; 1872 kernel_info.k = K; 1873 kernel_info.depth_output_gemm3d = m_h; 1874 kernel_info.reinterpret_input_as_3d = false; 1875 kernel_info.broadcast_bias = true; 1876 kernel_info.activation_info = act_info; 1877 kernel_info.has_pad_y = has_pad_y; 1878 1879 // The output tensor will be auto-initialized within the function 1880 // Create and configure function 1881 ReshapeRHSOperatorType reshape_rhs; 1882 GEMMOperatorType gemm; 1883 1884 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); 1885 validate_result = validate_result || !rhs_info.export_to_cl_image; 1886 if(!validate_result) 1887 { 1888 return nullptr; 1889 } 1890 1891 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); 1892 gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 1893 1894 if(has_pad_y) 1895 { 1896 // Add dummy padding into lhs to validate has_pad_y path 1897 lhs.info()->extend_padding(PaddingSize(2, 0, 2, 0)); 1898 dst.info()->extend_padding(PaddingSize(2, 0, 1, 0)); 1899 } 1900 1901 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 1902 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 1903 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 1904 1905 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions 1906 if(!rhs_info.export_to_cl_image) 1907 { 1908 add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); 1909 } 1910 1911 // Allocate tensors 1912 lhs.allocator()->allocate(); 1913 rhs.allocator()->allocate(); 1914 rhs_reshaped.allocator()->allocate(); 1915 bias.allocator()->allocate(); 1916 dst.allocator()->allocate(); 1917 1918 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 1919 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 1920 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); 1921 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 1922 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 1923 1924 // Fill tensors 1925 fill(AccessorType(lhs), 0); 1926 fill(AccessorType(rhs), 1); 1927 fill(AccessorType(bias), 2); 1928 1929 // Compute GEMM 1930 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; 1931 reshape_rhs.run(reshape_rhs_pack); 1932 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, 1933 { ACL_SRC_1, &rhs_reshaped }, 1934 { ACL_SRC_2, &bias }, 1935 { ACL_DST, &dst } 1936 }); 1937 gemm.run(gemm_pack); 1938 1939 return dst; 1940 } 1941 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)1942 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, 1943 const ActivationLayerInfo &act_info) 1944 { 1945 TensorShape dst_shape = lhs_shape; 1946 dst_shape.set(0, rhs_shape[0]); 1947 dst_shape.set(1, lhs_shape[1] / m_h); 1948 dst_shape.set(2, m_h); 1949 dst_shape.set(3, lhs_shape[2]); 1950 1951 // Create reference 1952 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 1953 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 1954 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 1955 1956 const int n = rhs_shape[0]; 1957 const int m = lhs_shape[1]; 1958 const int batch_size = lhs_shape[2]; 1959 1960 // Fill reference 1961 fill(lhs, 0); 1962 fill(rhs, 1); 1963 fill(bias, 2); 1964 1965 // In case of broadcast, we need to simply copy the first into the following "M" ones 1966 for(int i = 1; i < m * batch_size; i++) 1967 { 1968 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 1969 } 1970 1971 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 1972 } 1973 1974 bool validate_result = true; 1975 TensorType _target{}; 1976 SimpleTensor<T> _reference{}; 1977 }; 1978 1979 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> 1980 class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture 1981 { 1982 public: 1983 template <typename...> setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)1984 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias, 1985 const ActivationLayerInfo &act_info) 1986 { 1987 GEMMLHSMatrixInfo lhs_info; 1988 lhs_info.m0 = m0; 1989 lhs_info.k0 = k0; 1990 1991 GEMMRHSMatrixInfo rhs_info; 1992 rhs_info.n0 = n0; 1993 rhs_info.k0 = k0; 1994 1995 // Set the tensor shapes for LHS and RHS matrices 1996 const TensorShape lhs_shape(k, m, batch_size); 1997 const TensorShape rhs_shape(n, k, batch_size); 1998 const TensorShape bias_shape(n, 1999 broadcast_bias ? 1 : m, 2000 broadcast_bias ? 1 : batch_size); 2001 2002 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); 2003 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); 2004 } 2005 2006 protected: 2007 template <typename U> fill(U && tensor,int i)2008 void fill(U &&tensor, int i) 2009 { 2010 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 2011 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 2012 2013 DistributionType distribution{ T(-1.0f), T(1.0f) }; 2014 library->fill(tensor, distribution, i); 2015 2016 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) 2017 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; 2018 library->fill_borders_with_garbage(tensor, distribution_inf, i); 2019 } 2020 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2021 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 2022 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) 2023 { 2024 // Create tensors 2025 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 2026 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 2027 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 2028 TensorType dst; 2029 2030 const unsigned int M = lhs_shape[1]; 2031 const unsigned int N = rhs_shape[0]; 2032 const unsigned int K = lhs_shape[0]; 2033 GEMMKernelInfo kernel_info; 2034 kernel_info.m = M; 2035 kernel_info.n = N; 2036 kernel_info.k = K; 2037 kernel_info.depth_output_gemm3d = 0; 2038 kernel_info.reinterpret_input_as_3d = false; 2039 kernel_info.broadcast_bias = broadcast_bias; 2040 kernel_info.activation_info = act_info; 2041 2042 // Create and configure function 2043 GEMMOperatorType gemm; 2044 gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 2045 2046 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 2047 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 2048 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 2049 2050 add_padding_x({ &lhs, &rhs, &bias, &dst }); 2051 2052 // Allocate tensors 2053 lhs.allocator()->allocate(); 2054 rhs.allocator()->allocate(); 2055 bias.allocator()->allocate(); 2056 dst.allocator()->allocate(); 2057 2058 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 2059 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 2060 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 2061 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 2062 2063 // Fill tensors 2064 fill(AccessorType(lhs), 0); 2065 fill(AccessorType(rhs), 1); 2066 fill(AccessorType(bias), 2); 2067 2068 // Compute GEMM 2069 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, 2070 { ACL_SRC_1, &rhs }, 2071 { ACL_SRC_2, &bias }, 2072 { ACL_DST, &dst } 2073 }); 2074 gemm.run(gemm_pack); 2075 2076 return dst; 2077 } 2078 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2079 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 2080 const ActivationLayerInfo &act_info) 2081 { 2082 TensorShape dst_shape = lhs_shape; 2083 dst_shape[0] = rhs_shape[0]; 2084 dst_shape[1] = lhs_shape[1]; 2085 2086 // Create reference 2087 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 2088 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 2089 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 2090 2091 const int n = rhs_shape[0]; 2092 const int m = lhs_shape[1]; 2093 const int batch_size = lhs_shape[2]; 2094 2095 // Fill reference 2096 fill(lhs, 0); 2097 fill(rhs, 1); 2098 fill(bias, 2); 2099 2100 if(broadcast_bias) 2101 { 2102 // In case of broadcast, we need to simply copy the first into the following "M" ones 2103 for(int i = 1; i < m * batch_size; i++) 2104 { 2105 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 2106 } 2107 } 2108 2109 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 2110 } 2111 2112 TensorType _target{}; 2113 SimpleTensor<T> _reference{}; 2114 }; 2115 2116 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> 2117 class GEMMMatrixMultiplyNativeWithPostOpsValidationFixture : public framework::Fixture 2118 { 2119 public: 2120 using PostOpArgBroadcast = std::tuple<bool, bool, bool>; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument 2121 public: 2122 template <typename...> setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<PostOpArgBroadcast> & post_ops)2123 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias, 2124 const ActivationLayerInfo &act_info, const experimental::PostOpList<PostOpArgBroadcast> &post_ops) 2125 { 2126 GEMMLHSMatrixInfo lhs_info; 2127 lhs_info.m0 = m0; 2128 lhs_info.k0 = k0; 2129 2130 GEMMRHSMatrixInfo rhs_info; 2131 rhs_info.n0 = n0; 2132 rhs_info.k0 = k0; 2133 2134 // Set the tensor shapes for LHS and RHS matrices 2135 const TensorShape lhs_shape(k, m, batch_size); 2136 const TensorShape rhs_shape(n, k, batch_size); 2137 const TensorShape bias_shape(n, 2138 broadcast_bias ? 1 : m, 2139 broadcast_bias ? 1 : batch_size); 2140 const auto post_ops_with_shapes = experimental::transform_post_op_list_arguments<PostOpArgBroadcast, TensorShape>(post_ops, 2141 [ = ](auto broadcast) 2142 { 2143 return TensorShape 2144 { 2145 std::get<0>(broadcast) ? 1 : n, 2146 std::get<1>(broadcast) ? 1 : m, 2147 std::get<2>(broadcast) ? 1 : batch_size, 2148 }; 2149 }); 2150 2151 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); 2152 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); 2153 } 2154 2155 protected: 2156 template <typename U> fill(U && tensor,int i)2157 void fill(U &&tensor, int i) 2158 { 2159 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 2160 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 2161 2162 DistributionType distribution{ T(-1.0f), T(1.0f) }; 2163 library->fill(tensor, distribution, i); 2164 2165 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) 2166 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; 2167 library->fill_borders_with_garbage(tensor, distribution_inf, i); 2168 } 2169 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)2170 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 2171 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops) 2172 { 2173 // Create tensors 2174 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 2175 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 2176 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 2177 TensorType dst; 2178 // Create post op tensors and populate post op with them 2179 std::vector<TensorType> post_op_tensors_holder{}; 2180 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, ITensorInfo *>(post_ops, 2181 [&post_op_tensors_holder, &data_type](auto shape) 2182 { 2183 auto t = create_tensor<TensorType>(shape, data_type, 1); 2184 post_op_tensors_holder.push_back(std::move(t)); 2185 return post_op_tensors_holder.back().info(); 2186 }); 2187 2188 const unsigned int M = lhs_shape[1]; 2189 const unsigned int N = rhs_shape[0]; 2190 const unsigned int K = lhs_shape[0]; 2191 GEMMKernelInfo kernel_info; 2192 kernel_info.m = M; 2193 kernel_info.n = N; 2194 kernel_info.k = K; 2195 kernel_info.depth_output_gemm3d = 0; 2196 kernel_info.reinterpret_input_as_3d = false; 2197 kernel_info.broadcast_bias = broadcast_bias; 2198 kernel_info.activation_info = act_info; 2199 kernel_info.post_ops = populated_post_ops; 2200 2201 // Create and configure function 2202 GEMMOperatorType gemm; 2203 gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 2204 2205 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 2206 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 2207 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 2208 for(const auto &tensor : post_op_tensors_holder) 2209 { 2210 ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); 2211 } 2212 2213 add_padding_x({ &lhs, &rhs, &bias, &dst }); 2214 for(auto &tensor : post_op_tensors_holder) 2215 { 2216 add_padding_x({ &tensor }); 2217 } 2218 2219 // Allocate tensors 2220 lhs.allocator()->allocate(); 2221 rhs.allocator()->allocate(); 2222 bias.allocator()->allocate(); 2223 dst.allocator()->allocate(); 2224 for(auto &tensor : post_op_tensors_holder) 2225 { 2226 tensor.allocator()->allocate(); 2227 } 2228 2229 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 2230 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 2231 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 2232 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 2233 for(const auto &tensor : post_op_tensors_holder) 2234 { 2235 ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); 2236 } 2237 2238 // Fill tensors 2239 fill(AccessorType(lhs), 0); 2240 fill(AccessorType(rhs), 1); 2241 fill(AccessorType(bias), 2); 2242 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) 2243 { 2244 fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i); 2245 } 2246 2247 // Compute GEMM 2248 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, 2249 { ACL_SRC_1, &rhs }, 2250 { ACL_SRC_2, &bias }, 2251 { ACL_DST, &dst } 2252 }); 2253 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) 2254 { 2255 gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i)); 2256 } 2257 gemm.run(gemm_pack); 2258 2259 return dst; 2260 } 2261 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info,const experimental::PostOpList<TensorShape> & post_ops)2262 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 2263 const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops) 2264 { 2265 TensorShape dst_shape = lhs_shape; 2266 dst_shape[0] = rhs_shape[0]; 2267 dst_shape[1] = lhs_shape[1]; 2268 2269 // Create reference 2270 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 2271 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 2272 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 2273 // Create post op tensors and populate post op with them 2274 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, SimpleTensor<T>>(post_ops, [&data_type](auto shape) 2275 { 2276 return SimpleTensor<T> { shape, data_type, 1 }; 2277 }); 2278 2279 const int n = rhs_shape[0]; 2280 const int m = lhs_shape[1]; 2281 const int batch_size = lhs_shape[2]; 2282 2283 // Fill reference 2284 int tensor_idx = 0; 2285 fill(lhs, tensor_idx++); 2286 fill(rhs, tensor_idx++); 2287 fill(bias, tensor_idx++); 2288 for(auto &op : populated_post_ops.get_list()) 2289 { 2290 for(auto tensor : op->arguments()) 2291 { 2292 fill(*tensor, tensor_idx++); 2293 } 2294 } 2295 2296 if(broadcast_bias) 2297 { 2298 // In case of broadcast, we need to simply copy the first into the following "M" ones 2299 for(int i = 1; i < m * batch_size; i++) 2300 { 2301 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 2302 } 2303 } 2304 2305 SimpleTensor<T> out; 2306 out = reference::gemm<T>(lhs, rhs, bias, alpha, beta); 2307 // Ignore activation info if post ops are used instead 2308 if(populated_post_ops.size() > 0) 2309 { 2310 out = reference::post_ops<T>(out, populated_post_ops); 2311 } 2312 else 2313 { 2314 out = reference::activation_layer(out, act_info); 2315 } 2316 return out; 2317 } 2318 2319 TensorType _target{}; 2320 SimpleTensor<T> _reference{}; 2321 }; 2322 2323 template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> 2324 class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture 2325 { 2326 public: 2327 template <typename...> setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,DataType data_type,float alpha,float beta,const ActivationLayerInfo & act_info)2328 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, 2329 const ActivationLayerInfo &act_info) 2330 { 2331 GEMMLHSMatrixInfo lhs_info; 2332 lhs_info.m0 = m0; 2333 lhs_info.k0 = k0; 2334 2335 GEMMRHSMatrixInfo rhs_info; 2336 rhs_info.n0 = n0; 2337 rhs_info.k0 = k0; 2338 2339 // In case of GEMM3D, m is the product between m_w and m_h 2340 const unsigned int m = m_w * m_h; 2341 2342 // Set the tensor shapes for LHS and RHS matrices 2343 const TensorShape lhs_shape(k, m, batch_size); 2344 const TensorShape rhs_shape(n, k, batch_size); 2345 const TensorShape bias_shape(n, 1, 1); 2346 2347 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info); 2348 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); 2349 } 2350 2351 protected: 2352 template <typename U> fill(U && tensor,int i)2353 void fill(U &&tensor, int i) 2354 { 2355 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 2356 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 2357 2358 DistributionType distribution{ T(-1.0f), T(1.0f) }; 2359 library->fill(tensor, distribution, i); 2360 } 2361 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)2362 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 2363 DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) 2364 { 2365 // Create tensors 2366 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 2367 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 2368 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 2369 TensorType dst; 2370 2371 const unsigned int M = lhs_shape[1]; 2372 const unsigned int N = rhs_shape[0]; 2373 const unsigned int K = lhs_shape[0]; 2374 GEMMKernelInfo kernel_info; 2375 kernel_info.m = M; 2376 kernel_info.n = N; 2377 kernel_info.k = K; 2378 kernel_info.depth_output_gemm3d = m_h; 2379 kernel_info.reinterpret_input_as_3d = false; 2380 kernel_info.broadcast_bias = true; 2381 kernel_info.activation_info = act_info; 2382 2383 // The output tensor will be auto-initialized within the function 2384 2385 // Create and configure function 2386 GEMMOperatorType gemm; 2387 gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 2388 2389 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 2390 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 2391 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 2392 2393 add_padding_x({ &lhs, &rhs, &bias, &dst }); 2394 2395 // Allocate tensors 2396 lhs.allocator()->allocate(); 2397 rhs.allocator()->allocate(); 2398 bias.allocator()->allocate(); 2399 dst.allocator()->allocate(); 2400 2401 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 2402 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 2403 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 2404 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 2405 2406 // Fill tensors 2407 fill(AccessorType(lhs), 0); 2408 fill(AccessorType(rhs), 1); 2409 fill(AccessorType(bias), 2); 2410 2411 // Compute GEMM 2412 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, 2413 { ACL_SRC_1, &rhs }, 2414 { ACL_SRC_2, &bias }, 2415 { ACL_DST, &dst } 2416 }); 2417 gemm.run(gemm_pack); 2418 2419 return dst; 2420 } 2421 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,unsigned int m_h,const ActivationLayerInfo & act_info)2422 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, 2423 const ActivationLayerInfo &act_info) 2424 { 2425 TensorShape dst_shape = lhs_shape; 2426 dst_shape.set(0, rhs_shape[0]); 2427 dst_shape.set(1, lhs_shape[1] / m_h); 2428 dst_shape.set(2, m_h); 2429 dst_shape.set(3, lhs_shape[2]); 2430 2431 // Create reference 2432 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 2433 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 2434 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 2435 2436 const int n = rhs_shape[0]; 2437 const int m = lhs_shape[1]; 2438 const int batch_size = lhs_shape[2]; 2439 2440 // Fill reference 2441 fill(lhs, 0); 2442 fill(rhs, 1); 2443 fill(bias, 2); 2444 2445 // In case of broadcast, we need to simply copy the first into the following "M" ones 2446 for(int i = 1; i < m * batch_size; i++) 2447 { 2448 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 2449 } 2450 2451 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 2452 } 2453 2454 TensorType _target{}; 2455 SimpleTensor<T> _reference{}; 2456 }; 2457 2458 template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> 2459 class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture 2460 { 2461 public: 2462 template <typename...> setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,bool export_to_cl_image,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2463 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha, 2464 float beta, bool broadcast_bias, 2465 const ActivationLayerInfo &act_info) 2466 { 2467 GEMMLHSMatrixInfo lhs_info; 2468 lhs_info.m0 = m0; 2469 lhs_info.k0 = k0; 2470 2471 GEMMRHSMatrixInfo rhs_info; 2472 rhs_info.n0 = n0; 2473 rhs_info.k0 = k0; 2474 rhs_info.interleave = true; 2475 rhs_info.transpose = false; 2476 rhs_info.h0 = 4; 2477 rhs_info.export_to_cl_image = export_to_cl_image; 2478 2479 // Set the tensor shapes for LHS and RHS matrices 2480 const TensorShape lhs_shape(k, m, batch_size); 2481 const TensorShape rhs_shape(n, k, batch_size); 2482 const TensorShape bias_shape(n, 2483 broadcast_bias ? 1 : m, 2484 broadcast_bias ? 1 : batch_size); 2485 2486 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); 2487 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); 2488 } 2489 2490 protected: 2491 template <typename U> fill(U && tensor,int i)2492 void fill(U &&tensor, int i) 2493 { 2494 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); 2495 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; 2496 2497 DistributionType distribution{ T(-1.0f), T(1.0f) }; 2498 library->fill(tensor, distribution, i); 2499 2500 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) 2501 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; 2502 library->fill_borders_with_garbage(tensor, distribution_inf, i); 2503 } 2504 compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2505 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, 2506 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) 2507 { 2508 // Create tensors 2509 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); 2510 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); 2511 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); 2512 TensorType rhs_reshaped; 2513 TensorType dst; 2514 2515 const unsigned int M = lhs_shape[1]; 2516 const unsigned int N = rhs_shape[0]; 2517 const unsigned int K = lhs_shape[0]; 2518 GEMMKernelInfo kernel_info; 2519 kernel_info.m = M; 2520 kernel_info.n = N; 2521 kernel_info.k = K; 2522 kernel_info.depth_output_gemm3d = 0; 2523 kernel_info.reinterpret_input_as_3d = false; 2524 kernel_info.broadcast_bias = broadcast_bias; 2525 kernel_info.activation_info = act_info; 2526 2527 // Create and configure function 2528 ReshapeRHSOperatorType reshape_rhs; 2529 GEMMOperatorType gemm; 2530 2531 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); 2532 if(!validate_result) 2533 { 2534 return nullptr; 2535 } 2536 2537 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); 2538 2539 validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info)); 2540 if(!validate_result) 2541 { 2542 return nullptr; 2543 } 2544 2545 gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); 2546 2547 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); 2548 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); 2549 ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); 2550 2551 // Allocate tensors 2552 lhs.allocator()->allocate(); 2553 rhs.allocator()->allocate(); 2554 rhs_reshaped.allocator()->allocate(); 2555 bias.allocator()->allocate(); 2556 dst.allocator()->allocate(); 2557 2558 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); 2559 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); 2560 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); 2561 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); 2562 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); 2563 2564 // Fill tensors 2565 fill(AccessorType(lhs), 0); 2566 fill(AccessorType(rhs), 1); 2567 fill(AccessorType(bias), 2); 2568 2569 // Compute GEMM 2570 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; 2571 reshape_rhs.run(reshape_rhs_pack); 2572 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, 2573 { ACL_SRC_1, &rhs_reshaped }, 2574 { ACL_SRC_2, &bias }, 2575 { ACL_DST, &dst } 2576 }); 2577 gemm.run(gemm_pack); 2578 2579 return dst; 2580 } 2581 compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type,float alpha,float beta,bool broadcast_bias,const ActivationLayerInfo & act_info)2582 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, 2583 const ActivationLayerInfo &act_info) 2584 { 2585 if(!validate_result) 2586 return SimpleTensor<T>(); 2587 2588 TensorShape dst_shape = lhs_shape; 2589 dst_shape[0] = rhs_shape[0]; 2590 dst_shape[1] = lhs_shape[1]; 2591 2592 // Create reference 2593 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; 2594 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; 2595 SimpleTensor<T> bias{ dst_shape, data_type, 1 }; 2596 2597 const int n = rhs_shape[0]; 2598 const int m = lhs_shape[1]; 2599 const int batch_size = lhs_shape[2]; 2600 2601 // Fill reference 2602 fill(lhs, 0); 2603 fill(rhs, 1); 2604 fill(bias, 2); 2605 2606 if(broadcast_bias) 2607 { 2608 // In case of broadcast, we need to simply copy the first into the following "M" ones 2609 for(int i = 1; i < m * batch_size; i++) 2610 { 2611 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); 2612 } 2613 } 2614 2615 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); 2616 } 2617 2618 bool validate_result = true; 2619 TensorType _target{}; 2620 SimpleTensor<T> _reference{}; 2621 }; 2622 2623 } // namespace validation 2624 } // namespace test 2625 } // namespace arm_compute 2626 #endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */ 2627