xref: /aosp_15_r20/external/armnn/src/backends/cl/test/ClCreateWorkloadTests.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ClContextControlFixture.hpp"
7 #include "ClWorkloadFactoryHelper.hpp"
8 
9 #include <armnn/utility/Assert.hpp>
10 #include <armnn/utility/IgnoreUnused.hpp>
11 #include <armnn/utility/PolymorphicDowncast.hpp>
12 #include <armnn/backends/MemCopyWorkload.hpp>
13 #include <armnnTestUtils/TensorCopyUtils.hpp>
14 #include <armnnTestUtils/TensorHelpers.hpp>
15 #include <armnnTestUtils/WorkloadTestUtils.hpp>
16 
17 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
18 #include <aclCommon/ArmComputeTensorUtils.hpp>
19 
20 #include <cl/ClImportTensorHandle.hpp>
21 #include <cl/ClImportTensorHandleFactory.hpp>
22 #include <cl/ClTensorHandle.hpp>
23 #include <cl/ClWorkloadFactory.hpp>
24 #include <cl/workloads/ClWorkloads.hpp>
25 #include <cl/workloads/ClWorkloadUtils.hpp>
26 
27 #include <doctest/doctest.h>
28 
CompareIClTensorHandleShape(IClTensorHandle * tensorHandle,std::initializer_list<unsigned int> expectedDimensions)29 armnn::PredicateResult CompareIClTensorHandleShape(IClTensorHandle* tensorHandle,
30                                                    std::initializer_list<unsigned int> expectedDimensions)
31 {
32     return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
33 }
34 
35 TEST_SUITE("CreateWorkloadCl")
36 {
37 template <armnn::DataType DataType>
ClCreateActivationWorkloadTest()38 static void ClCreateActivationWorkloadTest()
39 {
40     Graph graph;
41     ClWorkloadFactory factory =
42         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
43 
44     auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
45 
46     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
47     ActivationQueueDescriptor queueDescriptor = workload->GetData();
48     auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
49     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
50 
51     auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 1});
52     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
53 
54     predResult = CompareIClTensorHandleShape(outputHandle, {1, 1});
55     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
56 }
57 
58 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloatWorkload")
59 {
60     ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
61 }
62 
63 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloat16Workload")
64 {
65     ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
66 }
67 
68 template <typename WorkloadType,
69           armnn::DataType DataType>
ClCreateElementwiseWorkloadTest(BinaryOperation binaryOperator)70 static void ClCreateElementwiseWorkloadTest(BinaryOperation binaryOperator)
71 {
72     Graph graph;
73     ClWorkloadFactory factory =
74         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
75 
76     auto workload = CreateElementwiseBinaryWorkloadTest<WorkloadType, DataType>(factory, graph, binaryOperator);
77 
78     // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
79     auto queueDescriptor = workload->GetData();
80     auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
81     auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
82     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
83     auto predResult = CompareIClTensorHandleShape(inputHandle1, {2, 3});
84     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
85     predResult = CompareIClTensorHandleShape(inputHandle2, {2, 3});
86     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
87     predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
88     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
89 }
90 
91 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloatWorkload")
92 {
93     ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
94                                     armnn::DataType::Float32>(BinaryOperation::Add);
95 }
96 
97 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloat16Workload")
98 {
99     ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
100                                     armnn::DataType::Float16>(BinaryOperation::Add);
101 }
102 
103 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloatWorkload")
104 {
105     ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
106                                     armnn::DataType::Float32>(BinaryOperation::Sub);
107 }
108 
109 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloat16Workload")
110 {
111     ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
112                                     armnn::DataType::Float16>(BinaryOperation::Sub);
113 }
114 
115 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloatWorkloadTest")
116 {
117     ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
118                                     armnn::DataType::Float32>(BinaryOperation::Mul);
119 }
120 
121 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloat16WorkloadTest")
122 {
123     ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
124                                     armnn::DataType::Float16>(BinaryOperation::Mul);
125 }
126 
127 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationUint8WorkloadTest")
128 {
129     ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
130                                     armnn::DataType::QAsymmU8>(BinaryOperation::Mul);
131 }
132 
133 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloatWorkloadTest")
134 {
135     ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
136                                     armnn::DataType::Float32>(BinaryOperation::Div);
137 }
138 
139 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloat16WorkloadTest")
140 {
141     ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
142                                     armnn::DataType::Float16>(BinaryOperation::Div);
143 }
144 
145 template <typename WorkloadType,
146           typename DescriptorType,
147           armnn::DataType DataType>
ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op)148 static void ClCreateElementwiseUnaryWorkloadTest(armnn::UnaryOperation op)
149 {
150     Graph graph;
151     ClWorkloadFactory factory =
152         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
153 
154     auto workload = CreateElementwiseUnaryWorkloadTest<WorkloadType, DescriptorType, DataType>(factory, graph, op);
155 
156     DescriptorType queueDescriptor = workload->GetData();
157 
158     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
159     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
160 
161     auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3});
162     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
163 
164     predResult = CompareIClTensorHandleShape(outputHandle, {2, 3});
165     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
166 }
167 
168 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateRsqrtFloat32WorkloadTest")
169 {
170     ClCreateElementwiseUnaryWorkloadTest<ClRsqrtWorkload, RsqrtQueueDescriptor, armnn::DataType::Float32>(
171         UnaryOperation::Rsqrt);
172 }
173 
174 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)175 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
176 {
177     Graph graph;
178     ClWorkloadFactory factory =
179         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
180 
181     auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
182                     (factory, graph, dataLayout);
183 
184     // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
185     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
186     auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
187     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
188 
189     armnn::PredicateResult predResult(true);
190     switch (dataLayout)
191     {
192         case DataLayout::NHWC:
193             predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
194             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
195             predResult = CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 });
196             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
197             break;
198         default: // NCHW
199             predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
200             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
201             predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 });
202             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
203     }
204 }
205 
206 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNchwWorkload")
207 {
208     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
209                                            armnn::DataType::Float32>(DataLayout::NCHW);
210 }
211 
212 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloat16NchwWorkload")
213 {
214     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
215                                            armnn::DataType::Float16>(DataLayout::NCHW);
216 }
217 
218 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationFloatNhwcWorkload")
219 {
220     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
221                                            armnn::DataType::Float32>(DataLayout::NHWC);
222 }
223 
224 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateBatchNormalizationNhwcFloat16NhwcWorkload")
225 {
226     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
227                                            armnn::DataType::Float16>(DataLayout::NHWC);
228 }
229 
230 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp16ToFp32Workload")
231 {
232     Graph graph;
233     ClWorkloadFactory factory =
234         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
235 
236     auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
237 
238     ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
239     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
240     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
241     auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
242     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
243     predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
244     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
245     CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
246     CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
247 }
248 
249 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvertFp32ToFp16Workload")
250 {
251     Graph graph;
252     ClWorkloadFactory factory =
253         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
254 
255     auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
256 
257     ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
258     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
259     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
260 
261     auto predResult = CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3});
262     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
263     predResult = CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3});
264     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
265     CHECK((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
266     CHECK((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
267 }
268 
269 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
ClConvolution2dWorkloadTest(DataLayout dataLayout)270 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
271 {
272     Graph graph;
273     ClWorkloadFactory factory =
274         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
275 
276     auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
277                                                                                        graph,
278                                                                                        dataLayout);
279 
280     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
281                                                                : std::initializer_list<unsigned int>({2, 8, 16, 3});
282     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
283                                                                : std::initializer_list<unsigned int>({2, 2, 10, 2});
284 
285     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
286     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
287     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
288     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
289     CHECK((inputHandle->GetShape() == inputShape));
290     CHECK((outputHandle->GetShape() == outputShape));
291 }
292 
293 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNchwWorkload")
294 {
295     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
296 }
297 
298 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloatNhwcWorkload")
299 {
300     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
301 }
302 
303 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NchwWorkload")
304 {
305     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
306 }
307 
308 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFloat16NhwcWorkload")
309 {
310     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
311 }
312 
313 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dFastMathEnabledWorkload")
314 {
315     Graph graph;
316 
317     using ModelOptions = std::vector<BackendOptions>;
318     ModelOptions modelOptions = {};
319     BackendOptions gpuAcc("GpuAcc",
320     {
321         { "FastMathEnabled", true }
322     });
323     modelOptions.push_back(gpuAcc);
324 
325     ClWorkloadFactory factory =
326         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
327 
328     auto workload =
329         CreateConvolution2dWorkloadFastMathTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
330                                                                                            graph,
331                                                                                            DataLayout::NCHW,
332                                                                                            modelOptions);
333 
334     ARMNN_ASSERT(workload != nullptr);
335     auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
336     IgnoreUnused(conv2dWorkload);
337     ARMNN_ASSERT(conv2dWorkload != nullptr);
338     ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
339 }
340 
341 TEST_CASE_FIXTURE(ClContextControlFixture, "ClReplaceInputOutputConvolution2dWorkload")
342 {
343     // Create Convolution2dWorkload with ClTensorHandle input and output
344     // Then replace the input and output with ClImportTensorHandle
345     Graph graph;
346     ClWorkloadFactory factory =
347         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
348 
349     auto workload =
350         CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType::Float32>(factory,
351                                                                                     graph,
352                                                                                     DataLayout::NHWC);
353 
354     TensorShape inputShape  = std::initializer_list<unsigned int>({2, 8, 16, 3});
355     TensorShape outputShape = std::initializer_list<unsigned int>({2, 2, 10, 2});
356 
357     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
358     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
359     auto inputHandle  = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
360     auto outputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
361     CHECK((inputHandle->GetShape() == inputShape));
362     CHECK((outputHandle->GetShape() == outputShape));
363     // The input and output handles are created correctly as ClTensorHandle
364     CHECK((dynamic_cast<ClTensorHandle*>(inputHandle) != nullptr));
365     CHECK((dynamic_cast<ClTensorHandle*>(outputHandle) != nullptr));
366 
367     // Replace with ImportTensorHandle
368     ClImportTensorHandleFactory importFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
369                                               static_cast<MemorySourceFlags>(MemorySource::Malloc));
370 
371     TensorInfo inputInfo({ 2, 8, 16, 3 }, DataType::Float32);
372     TensorInfo outputInfo({ 2, 2, 10, 2 }, DataType::Float32);
373 
374     // create TensorHandle for memory import
375     auto inputImportHandle = importFactory.CreateTensorHandle(inputInfo);
376     auto outputImportHandle = importFactory.CreateTensorHandle(outputInfo);
377 
378     // Calling ReplaceInputTensorHandle and ReplaceOutputTensorHandle does not throw exception
379     // as Reconfigure function is implemented
380     workload->ReplaceInputTensorHandle(inputImportHandle.get(), 0);
381     workload->ReplaceOutputTensorHandle(outputImportHandle.get(), 0);
382 
383     // Correctly replaced with the import handles with correct information
384     queueDescriptor = workload->GetData();
385     auto replacedInputHandle  = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Inputs[0]);
386     auto replacedOutputHandle = PolymorphicDowncast<ITensorHandle*>(queueDescriptor.m_Outputs[0]);
387     CHECK((replacedInputHandle->GetShape() == inputShape));
388     CHECK((replacedOutputHandle->GetShape() == outputShape));
389 
390     CHECK((inputImportHandle.get() == replacedInputHandle));
391     CHECK((inputImportHandle.get() == replacedInputHandle));
392 
393     CHECK((dynamic_cast<ClTensorHandle*>(replacedInputHandle) == nullptr));
394     CHECK((dynamic_cast<ClImportTensorHandle*>(replacedInputHandle) != nullptr));
395     CHECK((dynamic_cast<ClTensorHandle*>(replacedOutputHandle) == nullptr));
396     CHECK((dynamic_cast<ClImportTensorHandle*>(replacedOutputHandle) != nullptr));
397 }
398 
399 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConvolution2dClCompiledContextWorkload")
400 {
401     using namespace armnn;
402 
403     const DataType inputType  = DataType::QAsymmU8;
404     const DataType kernelType = DataType::QSymmS8;
405     const DataType biasType   = DataType::Signed32;
406 
407     TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
408     TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
409 
410     const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
411     constexpr unsigned int quantDimension = 0;
412 
413     TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
414 
415     const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
416     TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
417 
418     std::vector<uint8_t> inputData =
419     {
420         138, 108, 138, 108, 138, 108
421     };
422 
423     std::vector<int8_t> kernelData =
424     {
425         1, 2, 1, 2, 1, 2
426     };
427 
428     std::vector<int32_t> biasData =
429     {
430         4, 4, 4
431     };
432 
433     std::vector<uint8_t> expectedOutputData =
434     {
435         121, 118, 115, 121, 118, 115, 121, 118, 115
436     };
437 
438 
439     Convolution2dDescriptor descriptor;
440     descriptor.m_StrideX     = 1;
441     descriptor.m_StrideY     = 1;
442     descriptor.m_PadLeft     = 0;
443     descriptor.m_PadRight    = 0;
444     descriptor.m_PadTop      = 0;
445     descriptor.m_PadBottom   = 0;
446     descriptor.m_BiasEnabled = true;
447     descriptor.m_DataLayout  = DataLayout::NHWC;
448 
449     auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
450     auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager);
451     auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager);
452 
453     std::unique_ptr<ITensorHandle> inputHandle  = tensorHandleFactory.CreateTensorHandle(inputInfo);
454     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
455     std::unique_ptr<armnn::ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
456     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
457 
458 
459     WorkloadInfo workloadInfo;
460 
461     Convolution2dQueueDescriptor queueDescriptor;
462     queueDescriptor.m_Parameters = descriptor;
463 
464     AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
465     AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get());
466     AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get());
467     AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
468 
469     // Initialize our m_CLCompileContext using default device and context
470     auto context = arm_compute::CLKernelLibrary::get().context();
471     auto device  = arm_compute::CLKernelLibrary::get().get_device();
472     auto clCompileContext = arm_compute::CLCompileContext(context, device);
473 
474 
475 
476     // Check built programs are empty in context
477     CHECK(clCompileContext.get_built_programs().empty());
478 
479     auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor,
480                                                               workloadInfo,
481                                                               clMemoryManager->GetIntraLayerManager(),
482                                                               clCompileContext);
483     ARMNN_ASSERT(workload != nullptr);
484     // Check built programs are not empty in context
485     CHECK(!clCompileContext.get_built_programs().empty());
486 }
487 
488 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)489 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
490 {
491     Graph graph;
492     ClWorkloadFactory factory =
493         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
494 
495     auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
496                     (factory, graph, dataLayout);
497 
498     // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
499     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
500     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
501     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
502 
503     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
504                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
505     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
506                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
507 
508     CHECK((inputHandle->GetShape() == inputShape));
509     CHECK((outputHandle->GetShape() == outputShape));
510 }
511 
512 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDepthwiseConvolutionFloat32NhwcWorkload")
513 {
514     ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
515 }
516 
517 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
ClDirectConvolution2dWorkloadTest()518 static void ClDirectConvolution2dWorkloadTest()
519 {
520     Graph graph;
521     ClWorkloadFactory factory =
522         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
523 
524     auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
525 
526     // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
527     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
528     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
529     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
530     auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6});
531     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
532     predResult = CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6});
533     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
534 }
535 
536 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloatWorkload")
537 {
538     ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
539 }
540 
541 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dFloat16Workload")
542 {
543     ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
544 }
545 
546 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDirectConvolution2dUint8Workload")
547 {
548     ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QAsymmU8>();
549 }
550 
551 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
ClCreateFullyConnectedWorkloadTest()552 static void ClCreateFullyConnectedWorkloadTest()
553 {
554     Graph graph;
555     ClWorkloadFactory factory =
556         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
557 
558     auto workload =
559         CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
560 
561     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
562     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
563     auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
564     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
565     auto predResult = CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5});
566     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
567     predResult = CompareIClTensorHandleShape(outputHandle, {3, 7});
568     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
569 }
570 
571 
572 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloatWorkloadTest")
573 {
574     ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
575 }
576 
577 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateFullyConnectedFloat16WorkloadTest")
578 {
579     ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
580 }
581 
582 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
ClNormalizationWorkloadTest(DataLayout dataLayout)583 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
584 {
585     Graph graph;
586     ClWorkloadFactory factory =
587         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
588 
589     auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
590 
591     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
592     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
593     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
594     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
595 
596     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
597                                                                : std::initializer_list<unsigned int>({3, 1, 5, 5});
598     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
599                                                                : std::initializer_list<unsigned int>({3, 1, 5, 5});
600 
601     CHECK((inputHandle->GetShape() == inputShape));
602     CHECK((outputHandle->GetShape() == outputShape));
603 }
604 
605 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NchwWorkload")
606 {
607     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
608 }
609 
610 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NchwWorkload")
611 {
612     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
613 }
614 
615 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat32NhwcWorkload")
616 {
617     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
618 }
619 
620 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateNormalizationFloat16NhwcWorkload")
621 {
622     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
623 }
624 
625 template <typename armnn::DataType DataType>
ClPooling2dWorkloadTest(DataLayout dataLayout)626 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
627 {
628     Graph graph;
629     ClWorkloadFactory factory =
630         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
631 
632     auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
633 
634     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5})
635                                                                : std::initializer_list<unsigned int>({3, 5, 5, 2});
636     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4})
637                                                                : std::initializer_list<unsigned int>({3, 2, 4, 2});
638 
639     // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
640     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
641     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
642     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
643 
644     CHECK((inputHandle->GetShape() == inputShape));
645     CHECK((outputHandle->GetShape() == outputShape));
646 }
647 
648 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNchwWorkload")
649 {
650     ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
651 }
652 
653 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloatNhwcWorkload")
654 {
655     ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
656 }
657 
658 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NchwWorkload")
659 {
660     ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
661 }
662 
663 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePooling2dFloat16NhwcWorkload")
664 {
665     ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
666 }
667 
ClCreatePreluWorkloadTest(const armnn::TensorShape & inputShape,const armnn::TensorShape & alphaShape,const armnn::TensorShape & outputShape,armnn::DataType dataType)668 static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
669                                       const armnn::TensorShape& alphaShape,
670                                       const armnn::TensorShape& outputShape,
671                                       armnn::DataType dataType)
672 {
673     Graph graph;
674     ClWorkloadFactory factory =
675             ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
676 
677     auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
678                                                              graph,
679                                                              inputShape,
680                                                              alphaShape,
681                                                              outputShape,
682                                                              dataType);
683 
684     // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
685     PreluQueueDescriptor queueDescriptor = workload->GetData();
686     auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
687     auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
688     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
689 
690     CHECK((inputHandle->GetShape() == inputShape));
691     CHECK((alphaHandle->GetShape() == alphaShape));
692     CHECK((outputHandle->GetShape() == outputShape));
693 }
694 
695 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloat16Workload")
696 {
697     ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
698 }
699 
700 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluFloatWorkload")
701 {
702     ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
703 }
704 
705 TEST_CASE_FIXTURE(ClContextControlFixture, "CreatePreluUint8Workload")
706 {
707     ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
708 }
709 
710 template <typename armnn::DataType DataType>
ClCreateReshapeWorkloadTest()711 static void ClCreateReshapeWorkloadTest()
712 {
713     Graph graph;
714     ClWorkloadFactory factory =
715         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
716 
717     auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
718 
719     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
720     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
721     auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
722     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
723 
724     auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
725     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
726     predResult = CompareIClTensorHandleShape(outputHandle, {1, 4});
727     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
728 }
729 
730 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloatWorkload")
731 {
732     ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
733 }
734 
735 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeFloat16Workload")
736 {
737     ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
738 }
739 
740 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateReshapeUint8Workload")
741 {
742     ClCreateReshapeWorkloadTest<armnn::DataType::QAsymmU8>();
743 }
744 
745 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
ClSoftmaxWorkloadTest()746 static void ClSoftmaxWorkloadTest()
747 {
748     Graph graph;
749     ClWorkloadFactory factory =
750         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
751 
752     auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
753 
754     // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
755     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
756     auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
757     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
758 
759     armnn::TensorInfo tensorInfo({4, 1}, DataType);
760     if (DataType == armnn::DataType::QAsymmU8)
761     {
762         tensorInfo.SetQuantizationOffset(0);
763         tensorInfo.SetQuantizationScale(1.f / 256);
764     }
765     else if (DataType == armnn::DataType::QAsymmS8)
766     {
767         tensorInfo.SetQuantizationOffset(-128);
768         tensorInfo.SetQuantizationScale(1.f / 256);
769     }
770 
771     auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
772     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
773     predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
774     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
775 }
776 
777 
778 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat32WorkloadTest")
779 {
780     ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float32>();
781 }
782 
783 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxFloat16WorkloadTest")
784 {
785     ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::Float16>();
786 }
787 
788 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmU8Workload")
789 {
790     ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmU8>();
791 }
792 
793 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSoftmaxQAsymmS8Workload")
794 {
795     ClSoftmaxWorkloadTest<ClSoftmaxWorkload, armnn::DataType::QAsymmS8>();
796 }
797 
798 template <typename armnn::DataType DataType>
ClSplitterWorkloadTest()799 static void ClSplitterWorkloadTest()
800 {
801     Graph graph;
802     ClWorkloadFactory factory =
803         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
804 
805     auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
806 
807     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
808     SplitterQueueDescriptor queueDescriptor = workload->GetData();
809     auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
810     auto predResult = CompareIClTensorHandleShape(inputHandle, {5, 7, 7});
811     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
812 
813     auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
814     predResult = CompareIClTensorHandleShape(outputHandle1, {2, 7, 7});
815     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
816 
817     auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
818     predResult = CompareIClTensorHandleShape(outputHandle2, {2, 7, 7});
819     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
820 
821     auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
822     predResult = CompareIClTensorHandleShape(outputHandle0, {1, 7, 7});
823     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
824 }
825 
826 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloatWorkload")
827 {
828     ClSplitterWorkloadTest<armnn::DataType::Float32>();
829 }
830 
831 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterFloat16Workload")
832 {
833     ClSplitterWorkloadTest<armnn::DataType::Float16>();
834 }
835 
836 template <typename armnn::DataType DataType>
ClSplitterConcatTest()837 static void ClSplitterConcatTest()
838 {
839     // Tests that it is possible to decide which output of the splitter layer
840     // should be lined to which input of the concat layer.
841     // We test that is is possible to specify 0th output
842     // of the splitter to be the 1st input to the concat and the 1st output of the splitter  to be 0th input
843     // of the concat.
844 
845     Graph graph;
846     ClWorkloadFactory factory =
847         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
848 
849     auto workloads =
850         CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
851             (factory, graph);
852 
853     auto wlSplitter = std::move(workloads.first);
854     auto wlConcat = std::move(workloads.second);
855 
856     //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
857     armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
858     armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
859     armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
860     armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
861 
862     CHECK(sOut0);
863     CHECK(sOut1);
864     CHECK(mIn0);
865     CHECK(mIn1);
866 
867     //Fliped order of inputs/outputs.
868     bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
869     CHECK(validDataPointers);
870 
871 
872     //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
873     bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
874                                     && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
875 
876     CHECK(validSubTensorParents);
877 }
878 
879 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloatWorkload")
880 {
881     ClSplitterConcatTest<armnn::DataType::Float32>();
882 }
883 
884 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSplitterConcatFloat16Workload")
885 {
886     ClSplitterConcatTest<armnn::DataType::Float16>();
887 }
888 
889 
890 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSingleOutputMultipleInputs")
891 {
892     // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
893     // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
894 
895     Graph graph;
896     ClWorkloadFactory factory =
897         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
898 
899     std::unique_ptr<ClSplitterWorkload> wlSplitter;
900     std::unique_ptr<ClActivationWorkload> wlActiv0_0;
901     std::unique_ptr<ClActivationWorkload> wlActiv0_1;
902     std::unique_ptr<ClActivationWorkload> wlActiv1_0;
903     std::unique_ptr<ClActivationWorkload> wlActiv1_1;
904 
905     CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
906         ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
907                                                                wlActiv1_0, wlActiv1_1);
908 
909     //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
910     armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
911     armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
912     armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
913     armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
914     armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
915     armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
916 
917 
918     CHECK(sOut0);
919     CHECK(sOut1);
920     CHECK(activ0_0Im);
921     CHECK(activ0_1Im);
922     CHECK(activ1_0Im);
923     CHECK(activ1_1Im);
924 
925     bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
926                              (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
927 
928     CHECK(validDataPointers);
929 }
930 
931 #if defined(ARMNNREF_ENABLED)
932 
933 // This test unit needs the reference backend, it's not available if the reference backend is not built
934 
935 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMemCopyWorkloadsCl")
936 {
937     ClWorkloadFactory factory =
938         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
939 
940     CreateMemCopyWorkloads<IClTensorHandle>(factory);
941 }
942 
943 #endif
944 
945 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
ClL2NormalizationWorkloadTest(DataLayout dataLayout)946 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
947 {
948     Graph graph;
949     ClWorkloadFactory factory =
950         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
951 
952     auto workload =
953             CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
954 
955     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
956     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
957     auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
958     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
959 
960     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
961                                                                : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
962     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
963                                                                : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
964 
965     CHECK((inputHandle->GetShape() == inputShape));
966     CHECK((outputHandle->GetShape() == outputShape));
967 }
968 
969 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNchwWorkload")
970 {
971     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
972 }
973 
974 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloatNhwcWorkload")
975 {
976     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
977 }
978 
979 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NchwWorkload")
980 {
981     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
982 }
983 
984 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateL2NormalizationFloat16NhwcWorkload")
985 {
986     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
987 }
988 
989 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
ClCreateLogSoftmaxWorkloadTest()990 static void ClCreateLogSoftmaxWorkloadTest()
991 {
992     Graph graph;
993     ClWorkloadFactory factory =
994             ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
995 
996     auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
997 
998     // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
999     LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1000     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1001     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1002 
1003     auto predResult = CompareIClTensorHandleShape(inputHandle, {4, 1});
1004     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1005     predResult = CompareIClTensorHandleShape(outputHandle, {4, 1});
1006     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1007 }
1008 
1009 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLogSoftmaxFloat32WorkloadTest")
1010 {
1011     ClCreateLogSoftmaxWorkloadTest<ClLogSoftmaxWorkload, armnn::DataType::Float32>();
1012 }
1013 
1014 template <typename LstmWorkloadType>
ClCreateLstmWorkloadTest()1015 static void ClCreateLstmWorkloadTest()
1016 {
1017     Graph graph;
1018     ClWorkloadFactory factory =
1019         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1020 
1021     auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
1022 
1023     LstmQueueDescriptor queueDescriptor = workload->GetData();
1024     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1025     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
1026     auto predResult = CompareIClTensorHandleShape(inputHandle, {2, 2});
1027     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1028     predResult = CompareIClTensorHandleShape(outputHandle, {2, 4});
1029     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1030 }
1031 
1032 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateLSTMWorkloadFloatWorkload")
1033 {
1034     ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
1035 }
1036 
1037 template <typename ResizeWorkloadType, typename armnn::DataType DataType>
ClResizeWorkloadTest(DataLayout dataLayout)1038 static void ClResizeWorkloadTest(DataLayout dataLayout)
1039 {
1040     Graph graph;
1041     ClWorkloadFactory factory =
1042         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1043 
1044     auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
1045 
1046     auto queueDescriptor = workload->GetData();
1047 
1048     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1049     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1050 
1051     armnn::PredicateResult predResult(true);
1052     switch (dataLayout)
1053     {
1054         case DataLayout::NHWC:
1055             predResult = CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
1056             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1057             predResult = CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 });
1058             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1059             break;
1060         default: // DataLayout::NCHW
1061             predResult = CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
1062             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1063             predResult = CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 });
1064             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1065     }
1066 }
1067 
1068 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NchwWorkload")
1069 {
1070     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
1071 }
1072 
1073 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NchwWorkload")
1074 {
1075     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
1076 }
1077 
1078 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NchwWorkload")
1079 {
1080     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
1081 }
1082 
1083 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat32NhwcWorkload")
1084 {
1085     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
1086 }
1087 
1088 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeFloat16NhwcWorkload")
1089 {
1090     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
1091 }
1092 
1093 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateResizeUint8NhwcWorkload")
1094 {
1095     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
1096 }
1097 
1098 template <typename MeanWorkloadType, typename armnn::DataType DataType>
ClMeanWorkloadTest()1099 static void ClMeanWorkloadTest()
1100 {
1101     Graph graph;
1102     ClWorkloadFactory factory =
1103         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1104 
1105     auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
1106 
1107     // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
1108     MeanQueueDescriptor queueDescriptor = workload->GetData();
1109     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1110     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1111 
1112     // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
1113     auto predResult = CompareIClTensorHandleShape(inputHandle, {  1, 3, 7, 4 });
1114     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1115     predResult = CompareIClTensorHandleShape(outputHandle, { 1, 4 });
1116     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1117 }
1118 
1119 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat32Workload")
1120 {
1121     ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
1122 }
1123 
1124 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanFloat16Workload")
1125 {
1126     ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
1127 }
1128 
1129 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMeanUint8Workload")
1130 {
1131     ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QAsymmU8>();
1132 }
1133 
1134 template <typename ConcatWorkloadType, armnn::DataType DataType>
ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,unsigned int concatAxis)1135 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
1136                                        unsigned int concatAxis)
1137 {
1138     Graph graph;
1139     ClWorkloadFactory factory =
1140         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1141 
1142     auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
1143 
1144     ConcatQueueDescriptor queueDescriptor = workload->GetData();
1145     auto inputHandle0  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1146     auto inputHandle1  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
1147     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1148 
1149     auto predResult = CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 });
1150     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1151     predResult = CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 });
1152     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1153     predResult = CompareIClTensorHandleShape(outputHandle, outputShape);
1154     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1155 }
1156 
1157 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Float32Workload")
1158 {
1159     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
1160 }
1161 
1162 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Float32Workload")
1163 {
1164     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
1165 }
1166 
1167 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Float32Workload")
1168 {
1169     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
1170 }
1171 
1172 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim0Uint8Workload")
1173 {
1174     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
1175 }
1176 
1177 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim1Uint8Workload")
1178 {
1179     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
1180 }
1181 
1182 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateConcatDim3Uint8Workload")
1183 {
1184     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
1185 }
1186 
1187 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
ClSpaceToDepthWorkloadTest()1188 static void ClSpaceToDepthWorkloadTest()
1189 {
1190     Graph graph;
1191     ClWorkloadFactory factory =
1192             ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1193 
1194     auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
1195 
1196     SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
1197     auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
1198     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1199 
1200     auto predResult = CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 });
1201     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1202     predResult = CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 });
1203     CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
1204 }
1205 
1206 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat32Workload")
1207 {
1208     ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
1209 }
1210 
1211 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthFloat16Workload")
1212 {
1213     ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
1214 }
1215 
1216 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQAsymm8Workload")
1217 {
1218     ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
1219 }
1220 
1221 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSpaceToDepthQSymm16Workload")
1222 {
1223     ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
1224 }
1225 
1226 template <armnn::DataType DataType>
ClCreateStackWorkloadTest(const std::initializer_list<unsigned int> & inputShape,const std::initializer_list<unsigned int> & outputShape,unsigned int axis,unsigned int numInputs)1227 static void ClCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
1228                                       const std::initializer_list<unsigned int>& outputShape,
1229                                       unsigned int axis,
1230                                       unsigned int numInputs)
1231 {
1232     armnn::Graph graph;
1233     ClWorkloadFactory factory =
1234             ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1235 
1236     auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory,
1237                                                                        graph,
1238                                                                        TensorShape(inputShape),
1239                                                                        TensorShape(outputShape),
1240                                                                        axis,
1241                                                                        numInputs);
1242 
1243     // Check inputs and output are as expected
1244     StackQueueDescriptor queueDescriptor = workload->GetData();
1245     for (unsigned int i = 0; i < numInputs; ++i)
1246     {
1247         auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
1248         auto predResult1 = CompareIClTensorHandleShape(inputHandle, inputShape);
1249         CHECK_MESSAGE(predResult1.m_Result, predResult1.m_Message.str());
1250     }
1251     auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
1252     auto predResult2 = CompareIClTensorHandleShape(outputHandle, outputShape);
1253     CHECK_MESSAGE(predResult2.m_Result, predResult2.m_Message.str());
1254 }
1255 
1256 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat32Workload")
1257 {
1258     ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1259 }
1260 
1261 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackFloat16Workload")
1262 {
1263     ClCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1264 }
1265 
1266 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateStackUint8Workload")
1267 {
1268     ClCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
1269 }
1270 
1271 
1272 template <typename QLstmWorkloadType>
ClCreateQLstmWorkloadTest()1273 static void ClCreateQLstmWorkloadTest()
1274 {
1275     Graph graph;
1276     ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1277 
1278     auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1279     QLstmQueueDescriptor queueDescriptor = workload->GetData();
1280 
1281     IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1282     CHECK((inputHandle->GetShape() == TensorShape({2, 4})));
1283     CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1284 
1285     IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1286     CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1287     CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1288 
1289     IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1290     CHECK((outputHandle->GetShape() == TensorShape({2, 4})));
1291     CHECK((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1292 }
1293 
1294 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQLstmWorkloadTest")
1295 {
1296     ClCreateQLstmWorkloadTest<ClQLstmWorkload>();
1297 }
1298 
1299 template <typename QuantizedLstmWorkloadType>
ClCreateQuantizedLstmWorkloadTest()1300 static void ClCreateQuantizedLstmWorkloadTest()
1301 {
1302     using namespace armnn::armcomputetensorutils;
1303 
1304     Graph graph;
1305     ClWorkloadFactory factory =
1306             ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
1307 
1308     auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1309 
1310     QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1311 
1312     IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1313     CHECK((inputHandle->GetShape() == TensorShape({2, 2})));
1314     CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1315 
1316     IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1317     CHECK((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1318     CHECK((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1319 
1320     IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1321     CHECK((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1322     CHECK((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1323 
1324     IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1325     CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1326     CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1327 
1328     IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1329     CHECK((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1330     CHECK((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1331 }
1332 
1333 TEST_CASE_FIXTURE(ClContextControlFixture, "CreateQuantizedLstmWorkload")
1334 {
1335     ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>();
1336 }
1337 
1338 template <armnn::DataType DataType>
ClCreateActivationWorkloadReplaceFunctionsTest()1339 static void ClCreateActivationWorkloadReplaceFunctionsTest()
1340 {
1341     std::shared_ptr<ClMemoryManager> memoryManager = std::make_shared<ClMemoryManager>(
1342             std::make_unique<arm_compute::CLBufferAllocator>());
1343 
1344     Graph graph;
1345     ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(memoryManager);
1346     // input and output are created as armnn::TensorInfo tensorInfo({1, 1}, DataType)
1347     auto workloadPtr = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
1348 
1349     // new input and output tensor handlers are created and then replace in the workload
1350     const ClTensorHandleFactory tensorHandleFactory(memoryManager);
1351     TensorInfo inputInfo({2 , 2}, DataType::Float16);
1352     TensorInfo outputInfo({2 , 2}, DataType::Float16);
1353     unique_ptr<ITensorHandle> inputHandle  = tensorHandleFactory.CreateTensorHandle(inputInfo, true);
1354     inputHandle->Manage();
1355     inputHandle->Allocate();
1356     unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo, true);
1357     outputHandle->Manage();
1358     outputHandle->Allocate();
1359 
1360     unsigned int slot = 0;
1361     CHECK_THROWS_AS(workloadPtr->ReplaceInputTensorHandle(inputHandle.get(), slot), UnimplementedException);
1362     CHECK_THROWS_AS(workloadPtr->ReplaceOutputTensorHandle(outputHandle.get(), slot), UnimplementedException);
1363 }
1364 
1365 TEST_CASE("ClReplaceFunctionsfromFloat32toFloat16ActivationWorkload")
1366 {
1367     ClCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::Float32>();
1368 }
1369 
1370 }
1371