1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "ClContextControlFixture.hpp"
7 #include "ClWorkloadFactoryHelper.hpp"
8
9 #include <armnn/utility/Assert.hpp>
10 #include <armnn/utility/IgnoreUnused.hpp>
11 #include <armnn/utility/PolymorphicDowncast.hpp>
12 #include <armnn/backends/MemCopyWorkload.hpp>
13 #include <armnnTestUtils/TensorCopyUtils.hpp>
14 #include <armnnTestUtils/TensorHelpers.hpp>
15 #include <armnnTestUtils/WorkloadTestUtils.hpp>
16
17 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
18 #include <aclCommon/ArmComputeTensorUtils.hpp>
19
20 #include <cl/ClImportTensorHandle.hpp>
21 #include <cl/ClImportTensorHandleFactory.hpp>
22 #include <cl/ClTensorHandle.hpp>
23 #include <cl/ClWorkloadFactory.hpp>
24 #include <cl/workloads/ClWorkloads.hpp>
25 #include <cl/workloads/ClWorkloadUtils.hpp>
26
27 #include <doctest/doctest.h>
28
CompareIClTensorHandleShape(IClTensorHandle * tensorHandle,std::initializer_list<unsigned int> expectedDimensions)29 armnn::PredicateResult CompareIClTensorHandleShape(IClTensorHandle* tensorHandle,
30 std::initializer_list<unsigned int> expectedDimensions)
31 {
32 return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
33 }
34
35 TEST_SUITE("CreateWorkloadCl")
36 {
37 template <armnn::DataType DataType>
ClCreateActivationWorkloadTest()38 static void ClCreateActivationWorkloadTest()
39 {
40 Graph graph;
41 ClWorkloadFactory factory =
42 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
43
44 auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
45
46 // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
47 ActivationQueueDescriptor queueDescriptor = workload->GetData();
48 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
49 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
50
51 auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 1});
52 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
53
54 predResult = CompareIClTensorHandleShape(outputHandle, {1, 1});
55 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
56 }
57
58 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloatWorkload")
59 {
60 ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
61 }
62
63 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloat16Workload")
64 {
65 ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
66 }
67
68 template <typename WorkloadType,
69 armnn::DataType DataType>
ClCreateElementwiseWorkloadTest(BinaryOperation binaryOperator)70 static void ClCreateElementwiseWorkloadTest(BinaryOperation binaryOperator)
71 {
72 Graph graph;
73 ClWorkloadFactory factory =
74 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
75
76 auto workload = CreateElementwiseBinaryWorkloadTest<WorkloadType, DataType>(factory, graph, binaryOperator);
77
78 // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
79 auto queueDescriptor = workload->GetData();
80 auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
81 auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
82 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
83 auto predResult = CompareIClTensorHandleShape(inputHandle1, {2, 3});
84 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
85 predResult = CompareIClTensorHandleShape(inputHandle2, {2, 3});
86 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
87 predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
88 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
89 }
90
91 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloatWorkload")
92 {
93 ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
94 armnn::DataType::Float32>(BinaryOperation::Add);
95 }
96
97 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloat16Workload")
98 {
99 ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
100 armnn::DataType::Float16>(BinaryOperation::Add);
101 }
102
103 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloatWorkload")
104 {
105 ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
106 armnn::DataType::Float32>(BinaryOperation::Sub);
107 }
108
109 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloat16Workload")
110 {
111 ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
112 armnn::DataType::Float16>(BinaryOperation::Sub);
113 }
114
115 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloatWorkloadTest")
116 {
117 ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
118 armnn::DataType::Float32>(BinaryOperation::Mul);
119 }
120
121 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloat16WorkloadTest")
122 {
123 ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
124 armnn::DataType::Float16>(BinaryOperation::Mul);
125 }
126
127 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationUint8WorkloadTest")
128 {
129 ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
130 armnn::DataType::QAsymmU8>(BinaryOperation::Mul);
131 }
132
133 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloatWorkloadTest")
134 {
135 ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
136 armnn::DataType::Float32>(BinaryOperation::Div);
137 }
138
139 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloat16WorkloadTest")
140 {
141 ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
142 armnn::DataType::Float16>(BinaryOperation::Div);
143 }
144
145 template <typename WorkloadType,
146 typename DescriptorType,
147 armnn::DataType DataType>
ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op)148 static void ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op)
149 {
150 Graph graph;
151 ClWorkloadFactory factory =
152 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
153
154 auto workload = CreateElementwiseUnaryWorkloadTest<WorkloadType, DescriptorType, DataType>(factory, graph, op);
155
156 DescriptorType queueDescriptor = workload->GetData();
157
158 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
159 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
160
161 auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3});
162 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
163
164 predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
165 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
166 }
167
168 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateRsqrtFloat32WorkloadTest")
169 {
170 ClCreateElementwiseUnaryWorkloadTest<ClRsqrtWorkload, RsqrtQueueDescriptor, armnn::DataType::Float32>(
171 UnaryOperation::Rsqrt);
172 }
173
174 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)175 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
176 {
177 Graph graph;
178 ClWorkloadFactory factory =
179 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
180
181 auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
182 (factory, graph, dataLayout);
183
184 // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
185 BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
186 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
187 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
188
189 armnn::PredicateResult predResult(true);
190 switch (dataLayout)
191 {
192 case DataLayout::NHWC:
193 predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
194 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
195 predResult = CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 });
196 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
197 break;
198 default: // NCHW
199 predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
200 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
201 predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 });
202 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
203 }
204 }
205
206 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNchwWorkload")
207 {
208 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
209 armnn::DataType::Float32>(DataLayout::NCHW);
210 }
211
212 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloat16NchwWorkload")
213 {
214 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
215 armnn::DataType::Float16>(DataLayout::NCHW);
216 }
217
218 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNhwcWorkload")
219 {
220 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
221 armnn::DataType::Float32>(DataLayout::NHWC);
222 }
223
224 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationNhwcFloat16NhwcWorkload")
225 {
226 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
227 armnn::DataType::Float16>(DataLayout::NHWC);
228 }
229
230 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp16ToFp32Workload")
231 {
232 Graph graph;
233 ClWorkloadFactory factory =
234 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
235
236 auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
237
238 ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
239 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
240 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
241 auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
242 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
243 predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
244 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
245 CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
246 CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
247 }
248
249 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp32ToFp16Workload")
250 {
251 Graph graph;
252 ClWorkloadFactory factory =
253 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
254
255 auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
256
257 ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
258 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
259 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
260
261 auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
262 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
263 predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
264 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
265 CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
266 CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
267 }
268
269 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
ClConvolution2dWorkloadTest(DataLayout dataLayout)270 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
271 {
272 Graph graph;
273 ClWorkloadFactory factory =
274 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
275
276 auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
277 graph,
278 dataLayout);
279
280 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
281 : std::initializer_list<unsigned int>({2, 8, 16, 3});
282 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
283 : std::initializer_list<unsigned int>({2, 2, 10, 2});
284
285 // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
286 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
287 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
288 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
289 CHECK((inputHandle->GetShape() == inputShape));
290 CHECK((outputHandle->GetShape() == outputShape));
291 }
292
293 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNchwWorkload")
294 {
295 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
296 }
297
298 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNhwcWorkload")
299 {
300 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
301 }
302
303 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NchwWorkload")
304 {
305 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
306 }
307
308 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NhwcWorkload")
309 {
310 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
311 }
312
313 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFastMathEnabledWorkload")
314 {
315 Graph graph;
316
317 using ModelOptions = std::vector<BackendOptions>;
318 ModelOptions modelOptions = {};
319 BackendOptions gpuAcc("GpuAcc",
320 {
321 { "FastMathEnabled", true }
322 });
323 modelOptions.push_back(gpuAcc);
324
325 ClWorkloadFactory factory =
326 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
327
328 auto workload =
329 CreateConvolution2dWorkloadFastMathTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
330 graph,
331 DataLayout::NCHW,
332 modelOptions);
333
334 ARMNN_ASSERT(workload != nullptr);
335 auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
336 IgnoreUnused(conv2dWorkload);
337 ARMNN_ASSERT(conv2dWorkload != nullptr);
338 ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
339 }
340
341 TEST_CASE_FIXTURE(ClContextControlFixture, "ClReplaceInputOutputConvolution2dWorkload")
342 {
343 // Create Convolution2dWorkload with ClTensorHandle input and output
344 // Then replace the input and output with ClImportTensorHandle
345 Graph graph;
346 ClWorkloadFactory factory =
347 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
348
349 auto workload =
350 CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType::Float32>(factory,
351 graph,
352 DataLayout::NHWC);
353
354 TensorShape inputShape = std::initializer_list<unsigned int>({2, 8, 16, 3});
355 TensorShape outputShape = std::initializer_list<unsigned int>({2, 2, 10, 2});
356
357 // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
358 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
359 auto inputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
360 auto outputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
361 CHECK((inputHandle->GetShape() == inputShape));
362 CHECK((outputHandle->GetShape() == outputShape));
363 // The input and output handles are created correctly as ClTensorHandle
364 CHECK((dynamic_cast<ClTensorHandle*>(inputHandle) != nullptr));
365 CHECK((dynamic_cast<ClTensorHandle*>(outputHandle) != nullptr));
366
367 // Replace with ImportTensorHandle
368 ClImportTensorHandleFactory importFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
369 static_cast<MemorySourceFlags>(MemorySource::Malloc));
370
371 TensorInfo inputInfo({ 2, 8, 16, 3 }, DataType::Float32);
372 TensorInfo outputInfo({ 2, 2, 10, 2 }, DataType::Float32);
373
374 // create TensorHandle for memory import
375 auto inputImportHandle = importFactory.CreateTensorHandle(inputInfo);
376 auto outputImportHandle = importFactory.CreateTensorHandle(outputInfo);
377
378 // Calling ReplaceInputTensorHandle and ReplaceOutputTensorHandle does not throw exception
379 // as Reconfigure function is implemented
380 workload->ReplaceInputTensorHandle(inputImportHandle.get(), 0);
381 workload->ReplaceOutputTensorHandle(outputImportHandle.get(), 0);
382
383 // Correctly replaced with the import handles with correct information
384 queueDescriptor = workload->GetData();
385 auto replacedInputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
386 auto replacedOutputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
387 CHECK((replacedInputHandle->GetShape() == inputShape));
388 CHECK((replacedOutputHandle->GetShape() == outputShape));
389
390 CHECK((inputImportHandle.get() == replacedInputHandle));
391 CHECK((inputImportHandle.get() == replacedInputHandle));
392
393 CHECK((dynamic_cast<ClTensorHandle*>(replacedInputHandle) == nullptr));
394 CHECK((dynamic_cast<ClImportTensorHandle*>(replacedInputHandle) != nullptr));
395 CHECK((dynamic_cast<ClTensorHandle*>(replacedOutputHandle) == nullptr));
396 CHECK((dynamic_cast<ClImportTensorHandle*>(replacedOutputHandle) != nullptr));
397 }
398
399 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dClCompiledContextWorkload")
400 {
401 using namespace armnn;
402
403 const DataType inputType = DataType::QAsymmU8;
404 const DataType kernelType = DataType::QSymmS8;
405 const DataType biasType = DataType::Signed32;
406
407 TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
408 TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
409
410 const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
411 constexpr unsigned int quantDimension = 0;
412
413 TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
414
415 const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
416 TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
417
418 std::vector<uint8_t> inputData =
419 {
420 138, 108, 138, 108, 138, 108
421 };
422
423 std::vector<int8_t> kernelData =
424 {
425 1, 2, 1, 2, 1, 2
426 };
427
428 std::vector<int32_t> biasData =
429 {
430 4, 4, 4
431 };
432
433 std::vector<uint8_t> expectedOutputData =
434 {
435 121, 118, 115, 121, 118, 115, 121, 118, 115
436 };
437
438
439 Convolution2dDescriptor descriptor;
440 descriptor.m_StrideX = 1;
441 descriptor.m_StrideY = 1;
442 descriptor.m_PadLeft = 0;
443 descriptor.m_PadRight = 0;
444 descriptor.m_PadTop = 0;
445 descriptor.m_PadBottom = 0;
446 descriptor.m_BiasEnabled = true;
447 descriptor.m_DataLayout = DataLayout::NHWC;
448
449 auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
450 auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager);
451 auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager);
452
453 std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
454 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
455 std::unique_ptr<armnn::ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
456 std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
457
458
459 WorkloadInfo workloadInfo;
460
461 Convolution2dQueueDescriptor queueDescriptor;
462 queueDescriptor.m_Parameters = descriptor;
463
464 AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
465 AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get());
466 AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get());
467 AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
468
469 // Initialize our m_CLCompileContext using default device and context
470 auto context = arm_compute::CLKernelLibrary::get().context();
471 auto device = arm_compute::CLKernelLibrary::get().get_device();
472 auto clCompileContext = arm_compute::CLCompileContext(context, device);
473
474
475
476 // Check built programs are empty in context
477 CHECK(clCompileContext.get_built_programs().empty());
478
479 auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor,
480 workloadInfo,
481 clMemoryManager->GetIntraLayerManager(),
482 clCompileContext);
483 ARMNN_ASSERT(workload != nullptr);
484 // Check built programs are not empty in context
485 CHECK(!clCompileContext.get_built_programs().empty());
486 }
487
488 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)489 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
490 {
491 Graph graph;
492 ClWorkloadFactory factory =
493 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
494
495 auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
496 (factory, graph, dataLayout);
497
498 // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
499 DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
500 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
501 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
502
503 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
504 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
505 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
506 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
507
508 CHECK((inputHandle->GetShape() == inputShape));
509 CHECK((outputHandle->GetShape() == outputShape));
510 }
511
512 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDepthwiseConvolutionFloat32NhwcWorkload")
513 {
514 ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
515 }
516
517 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
ClDirectConvolution2dWorkloadTest()518 static void ClDirectConvolution2dWorkloadTest()
519 {
520 Graph graph;
521 ClWorkloadFactory factory =
522 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
523
524 auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
525
526 // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
527 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
528 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
529 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
530 auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6});
531 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
532 predResult = CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6});
533 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
534 }
535
536 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloatWorkload")
537 {
538 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
539 }
540
541 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloat16Workload")
542 {
543 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
544 }
545
546 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dUint8Workload")
547 {
548 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QAsymmU8>();
549 }
550
551 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
ClCreateFullyConnectedWorkloadTest()552 static void ClCreateFullyConnectedWorkloadTest()
553 {
554 Graph graph;
555 ClWorkloadFactory factory =
556 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
557
558 auto workload =
559 CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
560
561 // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
562 FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
563 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
564 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
565 auto predResult = CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5});
566 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
567 predResult = CompareIClTensorHandleShape(outputHandle, {3, 7});
568 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
569 }
570
571
572 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloatWorkloadTest")
573 {
574 ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
575 }
576
577 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloat16WorkloadTest")
578 {
579 ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
580 }
581
582 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
ClNormalizationWorkloadTest(DataLayout dataLayout)583 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
584 {
585 Graph graph;
586 ClWorkloadFactory factory =
587 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
588
589 auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
590
591 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
592 NormalizationQueueDescriptor queueDescriptor = workload->GetData();
593 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
594 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
595
596 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
597 : std::initializer_list<unsigned int>({3, 1, 5, 5});
598 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
599 : std::initializer_list<unsigned int>({3, 1, 5, 5});
600
601 CHECK((inputHandle->GetShape() == inputShape));
602 CHECK((outputHandle->GetShape() == outputShape));
603 }
604
605 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NchwWorkload")
606 {
607 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
608 }
609
610 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NchwWorkload")
611 {
612 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
613 }
614
615 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NhwcWorkload")
616 {
617 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
618 }
619
620 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NhwcWorkload")
621 {
622 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
623 }
624
625 template <typename armnn::DataType DataType>
ClPooling2dWorkloadTest(DataLayout dataLayout)626 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
627 {
628 Graph graph;
629 ClWorkloadFactory factory =
630 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
631
632 auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
633
634 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5})
635 : std::initializer_list<unsigned int>({3, 5, 5, 2});
636 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4})
637 : std::initializer_list<unsigned int>({3, 2, 4, 2});
638
639 // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
640 Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
641 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
642 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
643
644 CHECK((inputHandle->GetShape() == inputShape));
645 CHECK((outputHandle->GetShape() == outputShape));
646 }
647
648 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNchwWorkload")
649 {
650 ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
651 }
652
653 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNhwcWorkload")
654 {
655 ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
656 }
657
658 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NchwWorkload")
659 {
660 ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
661 }
662
663 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NhwcWorkload")
664 {
665 ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
666 }
667
ClCreatePreluWorkloadTest(const armnn::TensorShape & inputShape,const armnn::TensorShape & alphaShape,const armnn::TensorShape & outputShape,armnn::DataType dataType)668 static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
669 const armnn::TensorShape& alphaShape,
670 const armnn::TensorShape& outputShape,
671 armnn::DataType dataType)
672 {
673 Graph graph;
674 ClWorkloadFactory factory =
675 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
676
677 auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
678 graph,
679 inputShape,
680 alphaShape,
681 outputShape,
682 dataType);
683
684 // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
685 PreluQueueDescriptor queueDescriptor = workload->GetData();
686 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
687 auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
688 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
689
690 CHECK((inputHandle->GetShape() == inputShape));
691 CHECK((alphaHandle->GetShape() == alphaShape));
692 CHECK((outputHandle->GetShape() == outputShape));
693 }
694
695 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloat16Workload")
696 {
697 ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
698 }
699
700 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloatWorkload")
701 {
702 ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
703 }
704
705 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluUint8Workload")
706 {
707 ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
708 }
709
710 template <typename armnn::DataType DataType>
ClCreateReshapeWorkloadTest()711 static void ClCreateReshapeWorkloadTest()
712 {
713 Graph graph;
714 ClWorkloadFactory factory =
715 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
716
717 auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
718
719 // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
720 ReshapeQueueDescriptor queueDescriptor = workload->GetData();
721 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
722 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
723
724 auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
725 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
726 predResult = CompareIClTensorHandleShape(outputHandle, {1, 4});
727 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
728 }
729
730 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloatWorkload")
731 {
732 ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
733 }
734
735 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloat16Workload")
736 {
737 ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
738 }
739
740 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeUint8Workload")
741 {
742 ClCreateReshapeWorkloadTest<armnn::DataType::QAsymmU8>();
743 }
744
745 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
ClSoftmaxWorkloadTest()746 static void ClSoftmaxWorkloadTest()
747 {
748 Graph graph;
749 ClWorkloadFactory factory =
750 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
751
752 auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
753
754 // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
755 SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
756 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
757 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
758
759 armnn::TensorInfo tensorInfo({4, 1}, DataType);
760 if (DataType == armnn::DataType::QAsymmU8)
761 {
762 tensorInfo.SetQuantizationOffset(0);
763 tensorInfo.SetQuantizationScale(1.f / 256);
764 }
765 else if (DataType == armnn::DataType::QAsymmS8)
766 {
767 tensorInfo.SetQuantizationOffset(-128);
768 tensorInfo.SetQuantizationScale(1.f / 256);
769 }
770
771 auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
772 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
773 predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
774 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
775 }
776
777
778 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat32WorkloadTest")
779 {
780 ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float32>();
781 }
782
783 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat16WorkloadTest")
784 {
785 ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float16>();
786 }
787
788 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmU8Workload")
789 {
790 ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmU8>();
791 }
792
793 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmS8Workload")
794 {
795 ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmS8>();
796 }
797
798 template <typename armnn::DataType DataType>
ClSplitterWorkloadTest()799 static void ClSplitterWorkloadTest()
800 {
801 Graph graph;
802 ClWorkloadFactory factory =
803 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
804
805 auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
806
807 // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
808 SplitterQueueDescriptor queueDescriptor = workload->GetData();
809 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
810 auto predResult = CompareIClTensorHandleShape(inputHandle, {5, 7, 7});
811 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
812
813 auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
814 predResult = CompareIClTensorHandleShape(outputHandle1, {2, 7, 7});
815 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
816
817 auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
818 predResult = CompareIClTensorHandleShape(outputHandle2, {2, 7, 7});
819 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
820
821 auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
822 predResult = CompareIClTensorHandleShape(outputHandle0, {1, 7, 7});
823 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
824 }
825
826 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloatWorkload")
827 {
828 ClSplitterWorkloadTest<armnn::DataType::Float32>();
829 }
830
831 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloat16Workload")
832 {
833 ClSplitterWorkloadTest<armnn::DataType::Float16>();
834 }
835
836 template <typename armnn::DataType DataType>
ClSplitterConcatTest()837 static void ClSplitterConcatTest()
838 {
839 // Tests that it is possible to decide which output of the splitter layer
840 // should be lined to which input of the concat layer.
841 // We test that is is possible to specify 0th output
842 // of the splitter to be the 1st input to the concat and the 1st output of the splitter to be 0th input
843 // of the concat.
844
845 Graph graph;
846 ClWorkloadFactory factory =
847 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
848
849 auto workloads =
850 CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
851 (factory, graph);
852
853 auto wlSplitter = std::move(workloads.first);
854 auto wlConcat = std::move(workloads.second);
855
856 //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
857 armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
858 armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
859 armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
860 armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
861
862 CHECK(sOut0);
863 CHECK(sOut1);
864 CHECK(mIn0);
865 CHECK(mIn1);
866
867 //Fliped order of inputs/outputs.
868 bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
869 CHECK(validDataPointers);
870
871
872 //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
873 bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
874 && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
875
876 CHECK(validSubTensorParents);
877 }
878
879 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloatWorkload")
880 {
881 ClSplitterConcatTest<armnn::DataType::Float32>();
882 }
883
884 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloat16Workload")
885 {
886 ClSplitterConcatTest<armnn::DataType::Float16>();
887 }
888
889
890 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSingleOutputMultipleInputs")
891 {
892 // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
893 // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
894
895 Graph graph;
896 ClWorkloadFactory factory =
897 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
898
899 std::unique_ptr<ClSplitterWorkload> wlSplitter;
900 std::unique_ptr<ClActivationWorkload> wlActiv0_0;
901 std::unique_ptr<ClActivationWorkload> wlActiv0_1;
902 std::unique_ptr<ClActivationWorkload> wlActiv1_0;
903 std::unique_ptr<ClActivationWorkload> wlActiv1_1;
904
905 CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
906 ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
907 wlActiv1_0, wlActiv1_1);
908
909 //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
910 armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
911 armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
912 armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
913 armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
914 armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
915 armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
916
917
918 CHECK(sOut0);
919 CHECK(sOut1);
920 CHECK(activ0_0Im);
921 CHECK(activ0_1Im);
922 CHECK(activ1_0Im);
923 CHECK(activ1_1Im);
924
925 bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
926 (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
927
928 CHECK(validDataPointers);
929 }
930
931 #if defined(ARMNNREF_ENABLED)
932
933 // This test unit needs the reference backend, it's not available if the reference backend is not built
934
935 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMemCopyWorkloadsCl")
936 {
937 ClWorkloadFactory factory =
938 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
939
940 CreateMemCopyWorkloads<IClTensorHandle>(factory);
941 }
942
943 #endif
944
945 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
ClL2NormalizationWorkloadTest(DataLayout dataLayout)946 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
947 {
948 Graph graph;
949 ClWorkloadFactory factory =
950 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
951
952 auto workload =
953 CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
954
955 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
956 L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
957 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
958 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
959
960 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
961 : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
962 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
963 : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
964
965 CHECK((inputHandle->GetShape() == inputShape));
966 CHECK((outputHandle->GetShape() == outputShape));
967 }
968
969 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNchwWorkload")
970 {
971 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
972 }
973
974 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNhwcWorkload")
975 {
976 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
977 }
978
979 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NchwWorkload")
980 {
981 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
982 }
983
984 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NhwcWorkload")
985 {
986 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
987 }
988
989 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
ClCreateLogSoftmaxWorkloadTest()990 static void ClCreateLogSoftmaxWorkloadTest()
991 {
992 Graph graph;
993 ClWorkloadFactory factory =
994 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
995
996 auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
997
998 // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
999 LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1000 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1001 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1002
1003 auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
1004 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1005 predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
1006 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1007 }
1008
1009 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLogSoftmaxFloat32WorkloadTest")
1010 {
1011 ClCreateLogSoftmaxWorkloadTest<ClLogSoftmaxWorkload, armnn::DataType::Float32>();
1012 }
1013
1014 template <typename LstmWorkloadType>
ClCreateLstmWorkloadTest()1015 static void ClCreateLstmWorkloadTest()
1016 {
1017 Graph graph;
1018 ClWorkloadFactory factory =
1019 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1020
1021 auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
1022
1023 LstmQueueDescriptor queueDescriptor = workload->GetData();
1024 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1025 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
1026 auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 2});
1027 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1028 predResult = CompareIClTensorHandleShape(outputHandle, {2, 4});
1029 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1030 }
1031
1032 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLSTMWorkloadFloatWorkload")
1033 {
1034 ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
1035 }
1036
1037 template <typename ResizeWorkloadType, typename armnn::DataType DataType>
ClResizeWorkloadTest(DataLayout dataLayout)1038 static void ClResizeWorkloadTest(DataLayout dataLayout)
1039 {
1040 Graph graph;
1041 ClWorkloadFactory factory =
1042 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1043
1044 auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
1045
1046 auto queueDescriptor = workload->GetData();
1047
1048 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1049 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1050
1051 armnn::PredicateResult predResult(true);
1052 switch (dataLayout)
1053 {
1054 case DataLayout::NHWC:
1055 predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
1056 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1057 predResult = CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 });
1058 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1059 break;
1060 default: // DataLayout::NCHW
1061 predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
1062 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1063 predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 });
1064 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1065 }
1066 }
1067
1068 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NchwWorkload")
1069 {
1070 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
1071 }
1072
1073 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NchwWorkload")
1074 {
1075 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
1076 }
1077
1078 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NchwWorkload")
1079 {
1080 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
1081 }
1082
1083 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NhwcWorkload")
1084 {
1085 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
1086 }
1087
1088 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NhwcWorkload")
1089 {
1090 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
1091 }
1092
1093 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NhwcWorkload")
1094 {
1095 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
1096 }
1097
1098 template <typename MeanWorkloadType, typename armnn::DataType DataType>
ClMeanWorkloadTest()1099 static void ClMeanWorkloadTest()
1100 {
1101 Graph graph;
1102 ClWorkloadFactory factory =
1103 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1104
1105 auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
1106
1107 // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
1108 MeanQueueDescriptor queueDescriptor = workload->GetData();
1109 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1110 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1111
1112 // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
1113 auto predResult = CompareIClTensorHandleShape(inputHandle, { 1, 3, 7, 4 });
1114 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1115 predResult = CompareIClTensorHandleShape(outputHandle, { 1, 4 });
1116 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1117 }
1118
1119 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat32Workload")
1120 {
1121 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
1122 }
1123
1124 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat16Workload")
1125 {
1126 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
1127 }
1128
1129 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanUint8Workload")
1130 {
1131 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QAsymmU8>();
1132 }
1133
1134 template <typename ConcatWorkloadType, armnn::DataType DataType>
ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,unsigned int concatAxis)1135 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
1136 unsigned int concatAxis)
1137 {
1138 Graph graph;
1139 ClWorkloadFactory factory =
1140 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1141
1142 auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
1143
1144 ConcatQueueDescriptor queueDescriptor = workload->GetData();
1145 auto inputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1146 auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
1147 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1148
1149 auto predResult = CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 });
1150 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1151 predResult = CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 });
1152 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1153 predResult = CompareIClTensorHandleShape(outputHandle, outputShape);
1154 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1155 }
1156
1157 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Float32Workload")
1158 {
1159 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
1160 }
1161
1162 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Float32Workload")
1163 {
1164 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
1165 }
1166
1167 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Float32Workload")
1168 {
1169 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
1170 }
1171
1172 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Uint8Workload")
1173 {
1174 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
1175 }
1176
1177 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Uint8Workload")
1178 {
1179 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
1180 }
1181
1182 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Uint8Workload")
1183 {
1184 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
1185 }
1186
1187 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
ClSpaceToDepthWorkloadTest()1188 static void ClSpaceToDepthWorkloadTest()
1189 {
1190 Graph graph;
1191 ClWorkloadFactory factory =
1192 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1193
1194 auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
1195
1196 SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
1197 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1198 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1199
1200 auto predResult = CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 });
1201 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1202 predResult = CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 });
1203 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1204 }
1205
1206 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat32Workload")
1207 {
1208 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
1209 }
1210
1211 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat16Workload")
1212 {
1213 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
1214 }
1215
1216 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQAsymm8Workload")
1217 {
1218 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
1219 }
1220
1221 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQSymm16Workload")
1222 {
1223 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
1224 }
1225
1226 template <armnn::DataType DataType>
ClCreateStackWorkloadTest(const std::initializer_list<unsigned int> & inputShape,const std::initializer_list<unsigned int> & outputShape,unsigned int axis,unsigned int numInputs)1227 static void ClCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
1228 const std::initializer_list<unsigned int>& outputShape,
1229 unsigned int axis,
1230 unsigned int numInputs)
1231 {
1232 armnn::Graph graph;
1233 ClWorkloadFactory factory =
1234 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1235
1236 auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory,
1237 graph,
1238 TensorShape(inputShape),
1239 TensorShape(outputShape),
1240 axis,
1241 numInputs);
1242
1243 // Check inputs and output are as expected
1244 StackQueueDescriptor queueDescriptor = workload->GetData();
1245 for (unsigned int i = 0; i < numInputs; ++i)
1246 {
1247 auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
1248 auto predResult1 = CompareIClTensorHandleShape(inputHandle, inputShape);
1249 CHECK_MESSAGE(predResult1.m_Result, predResult1.m_Message.str());
1250 }
1251 auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1252 auto predResult2 = CompareIClTensorHandleShape(outputHandle, outputShape);
1253 CHECK_MESSAGE(predResult2.m_Result, predResult2.m_Message.str());
1254 }
1255
1256 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat32Workload")
1257 {
1258 ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1259 }
1260
1261 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat16Workload")
1262 {
1263 ClCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1264 }
1265
1266 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackUint8Workload")
1267 {
1268 ClCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1269 }
1270
1271
1272 template <typename QLstmWorkloadType>
ClCreateQLstmWorkloadTest()1273 static void ClCreateQLstmWorkloadTest()
1274 {
1275 Graph graph;
1276 ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1277
1278 auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1279 QLstmQueueDescriptor queueDescriptor = workload->GetData();
1280
1281 IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1282 CHECK((inputHandle->GetShape() == TensorShape({2, 4})));
1283 CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1284
1285 IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1286 CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1287 CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1288
1289 IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1290 CHECK((outputHandle->GetShape() == TensorShape({2, 4})));
1291 CHECK((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1292 }
1293
1294 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQLstmWorkloadTest")
1295 {
1296 ClCreateQLstmWorkloadTest<ClQLstmWorkload>();
1297 }
1298
1299 template <typename QuantizedLstmWorkloadType>
ClCreateQuantizedLstmWorkloadTest()1300 static void ClCreateQuantizedLstmWorkloadTest()
1301 {
1302 using namespace armnn::armcomputetensorutils;
1303
1304 Graph graph;
1305 ClWorkloadFactory factory =
1306 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1307
1308 auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1309
1310 QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1311
1312 IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1313 CHECK((inputHandle->GetShape() == TensorShape({2, 2})));
1314 CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1315
1316 IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1317 CHECK((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1318 CHECK((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1319
1320 IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1321 CHECK((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1322 CHECK((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1323
1324 IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1325 CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1326 CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1327
1328 IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1329 CHECK((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1330 CHECK((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1331 }
1332
1333 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQuantizedLstmWorkload")
1334 {
1335 ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>();
1336 }
1337
1338 template <armnn::DataType DataType>
ClCreateActivationWorkloadReplaceFunctionsTest()1339 static void ClCreateActivationWorkloadReplaceFunctionsTest()
1340 {
1341 std::shared_ptr<ClMemoryManager> memoryManager = std::make_shared<ClMemoryManager>(
1342 std::make_unique<arm_compute::CLBufferAllocator>());
1343
1344 Graph graph;
1345 ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(memoryManager);
1346 // input and output are created as armnn::TensorInfo tensorInfo({1, 1}, DataType)
1347 auto workloadPtr = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
1348
1349 // new input and output tensor handlers are created and then replace in the workload
1350 const ClTensorHandleFactory tensorHandleFactory(memoryManager);
1351 TensorInfo inputInfo({2 , 2}, DataType::Float16);
1352 TensorInfo outputInfo({2 , 2}, DataType::Float16);
1353 unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo, true);
1354 inputHandle->Manage();
1355 inputHandle->Allocate();
1356 unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo, true);
1357 outputHandle->Manage();
1358 outputHandle->Allocate();
1359
1360 unsigned int slot = 0;
1361 CHECK_THROWS_AS(workloadPtr->ReplaceInputTensorHandle(inputHandle.get(), slot), UnimplementedException);
1362 CHECK_THROWS_AS(workloadPtr->ReplaceOutputTensorHandle(outputHandle.get(), slot), UnimplementedException);
1363 }
1364
1365 TEST_CASE("ClReplaceFunctionsfromFloat32toFloat16ActivationWorkload")
1366 {
1367 ClCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::Float32>();
1368 }
1369
1370 }
1371