xref: /aosp_15_r20/external/armnn/src/backends/backendsCommon/test/layerTests/ConcatTestImpl.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ConcatTestImpl.hpp"
7 
8 #include <armnnUtils/QuantizeHelper.hpp>
9 #include <ResolveType.hpp>
10 
11 
12 #include <armnnUtils/Permute.hpp>
13 
14 #include <armnnTestUtils/TensorCopyUtils.hpp>
15 #include <armnnTestUtils/WorkloadTestUtils.hpp>
16 
17 #include <armnnTestUtils/TensorHelpers.hpp>
18 
19 using namespace armnn;
20 using namespace armnnUtils;
21 
22 //
23 // Helper functions and templates
24 //
25 
CreateDescriptorForConcat(const std::vector<TensorInfo> & inputTensorInfos,unsigned int concatDim)26 OriginsDescriptor CreateDescriptorForConcat(
27     const std::vector<TensorInfo> & inputTensorInfos,
28     unsigned int concatDim)
29 {
30     std::vector<TensorShape> shapes;
31     shapes.reserve(inputTensorInfos.size());
32     for (const TensorInfo& it: inputTensorInfos)
33     {
34         shapes.push_back(it.GetShape());
35     }
36 
37     return CreateDescriptorForConcatenation(shapes.begin(), shapes.end(), concatDim);
38 }
39 
40 //
41 // Concat is only supported for N and C dimensions for NCHW and the inner most dimension
42 // In case of <4 dimensions we need to make sure that the concat dimensions are at least
43 // the 3rd slowest iterating one or the inner most dimension.
44 //
45 
NeedPermuteForConcat(const std::vector<TensorInfo> & inputTensorInfos,unsigned int concatDim)46 bool NeedPermuteForConcat(
47     const std::vector<TensorInfo> & inputTensorInfos,
48     unsigned int concatDim)
49 {
50     // See note above. Additionally we expect the input shapes to have the
51     // same number of dimensions.
52     unsigned int nDimensions = 0;
53 
54     // Determine the number of dimensions as well as sanity check them
55     // agains test implementation issues.
56     for (auto && tensorInfo : inputTensorInfos)
57     {
58         if (!nDimensions)
59         {
60             nDimensions = tensorInfo.GetShape().GetNumDimensions();
61         }
62         else
63         {
64             ARMNN_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(),
65                 "Input shapes must have the same number of dimensions");
66         }
67     }
68 
69     return (nDimensions < 3 || (nDimensions == 3 && (nDimensions-concatDim) < 3 && (nDimensions-concatDim) != 1));
70 }
71 
ExpandTensorShapeTo3dForPermute(const TensorShape & inputShape)72 TensorShape ExpandTensorShapeTo3dForPermute(const TensorShape & inputShape)
73 {
74     unsigned int numDims = inputShape.GetNumDimensions();
75     if (numDims >= 3)
76     {
77         // Nothing to do if the inputShape has at least 3 dimensions.
78         return inputShape;
79     }
80 
81     std::vector<unsigned int> newDims(size_t(3), 1u);
82     unsigned int expandedBy = 3 - numDims;
83     for (unsigned int i=0; i<numDims; ++i)
84     {
85         newDims[expandedBy+i] = inputShape[i];
86     }
87     return TensorShape(3u, &newDims[0]);
88 }
89 
Generate3dPermuteVectorForConcat(unsigned int numDimensions,unsigned int & concatDim,std::pair<PermutationVector,PermutationVector> & permutations)90 void Generate3dPermuteVectorForConcat(
91     unsigned int numDimensions,
92     unsigned int & concatDim,
93     std::pair<PermutationVector, PermutationVector> & permutations)
94 {
95     ARMNN_ASSERT_MSG(numDimensions <= 3,
96        "Only dimensions 1,2 and 3 are supported by this helper");
97     unsigned int expandedBy = 3 - numDimensions;
98     unsigned int expandedConcatAxis = concatDim + expandedBy;
99 
100     if (expandedConcatAxis == 2)
101     {
102         concatDim = 0;
103         PermutationVector forwardPermutation({1, 2, 0});
104         PermutationVector reversePermutation({2, 0, 1});
105         permutations = std::make_pair(forwardPermutation, reversePermutation);
106     }
107     else if (expandedConcatAxis == 1)
108     {
109         concatDim = 0;
110         PermutationVector forwardPermutation({2, 0, 1});
111         PermutationVector reversePermutation({1, 2, 0});
112         permutations = std::make_pair(forwardPermutation, reversePermutation);
113     }
114     else
115     {
116         ARMNN_ASSERT(expandedConcatAxis == 0);
117         concatDim = 0;
118     }
119 }
120 
PermuteTensorData(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const PermutationVector & mappings,TensorInfo & inputTensorInfo,const T * inputData,std::vector<T> & outputData)121 template<typename T> void PermuteTensorData(
122     IWorkloadFactory& workloadFactory,
123     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
124     const armnn::ITensorHandleFactory& tensorHandleFactory,
125     const PermutationVector& mappings,
126     TensorInfo & inputTensorInfo,
127     const T * inputData,
128     std::vector<T>& outputData)
129 {
130     IgnoreUnused(memoryManager);
131     ARMNN_ASSERT_MSG(inputData != nullptr, "inputData must not be null");
132     if (inputData == nullptr)
133     {
134         // Nullptr is an error in the test. By returning without doing the concatenation
135         // I expect the caller to fail the test. It still makes sense to report this as
136         // an assert for Debug builds.
137         return;
138     }
139 
140     TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings);
141     std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
142     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
143 
144     PermuteQueueDescriptor queueDescriptor;
145     queueDescriptor.m_Parameters = PermuteDescriptor{mappings};
146     WorkloadInfo workloadInfo;
147     AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get());
148     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
149 
150     std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(LayerType::Permute,
151                                                                          queueDescriptor,
152                                                                          workloadInfo);
153 
154     inputHandle->Allocate();
155     outputHandle->Allocate();
156 
157     CopyDataToITensorHandle(inputHandle.get(), inputData);
158 
159     workload->PostAllocationConfigure();
160     workload->Execute();
161 
162     outputData.resize(outputTensorInfo.GetNumElements());
163     CopyDataFromITensorHandle(&outputData[0], outputHandle.get());
164     inputTensorInfo = outputTensorInfo;
165 }
166 
167 //
168 // Permute the input tensors so we can do a supported concatenation.
169 // Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions
170 // at the front. Finally this function tells what the output shape
171 // of the permuted concatenated tensor is going to be.
172 //
PermuteInputsForConcat(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,std::vector<TensorInfo> & inputTensorInfos,std::vector<T * > & inputData,std::vector<std::vector<T>> & inputDataStorage,PermutationVector & permuteVector,unsigned int & concatDim,TensorInfo & outputTensorInfo)173 template<typename T> void PermuteInputsForConcat(
174     IWorkloadFactory& workloadFactory,
175     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
176     const armnn::ITensorHandleFactory& tensorHandleFactory,
177     std::vector<TensorInfo> & inputTensorInfos,
178     std::vector<T *> & inputData,
179     std::vector<std::vector<T>> & inputDataStorage,
180     PermutationVector & permuteVector,
181     unsigned int & concatDim,
182     TensorInfo & outputTensorInfo)
183 {
184     IgnoreUnused(memoryManager);
185     ARMNN_ASSERT_MSG(inputTensorInfos.size() > 1,
186         "Expecting more than one tensor to be concatenated here");
187 
188     unsigned int numDims = 0;
189     unsigned int nthInput = 0;
190     const PermutationVector identity({0, 1, 2});
191 
192     std::pair<PermutationVector, PermutationVector> permutations =
193         std::make_pair(identity, identity);
194 
195     inputDataStorage.resize(inputData.size());
196 
197     for (auto && tensorInfo : inputTensorInfos)
198     {
199         if (numDims == 0)
200         {
201             numDims = tensorInfo.GetShape().GetNumDimensions();
202             Generate3dPermuteVectorForConcat(numDims, concatDim, permutations);
203 
204             // Store the reverese permutation.
205             permuteVector = permutations.second;
206             ARMNN_ASSERT_MSG(!permuteVector.IsEqual(identity),
207                 "Test logic error, we don't need permutation, so we shouldn't arrive here");
208         }
209         else
210         {
211             ARMNN_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(),
212                 "All inputs must have the same number of dimensions");
213         }
214 
215         TensorInfo newTensorInfo = tensorInfo;
216         newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape()));
217 
218         PermuteTensorData<T>(workloadFactory,
219                              memoryManager,
220                              tensorHandleFactory,
221                              permutations.first,
222                              newTensorInfo,
223                              inputData[nthInput],
224                              inputDataStorage[nthInput]);
225 
226         inputData[nthInput] = inputDataStorage[nthInput].data();
227         inputTensorInfos[nthInput] = newTensorInfo;
228 
229         ++nthInput;
230     }
231 
232     outputTensorInfo.SetShape(
233         armnnUtils::Permuted(
234             ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()),
235             permutations.first));
236 }
237 
238 //
239 // This is the pair of PermuteInputsForConcat(...) which permutes back
240 // the output of the concatenation so we can check it against an expected
241 // output.
242 //
PermuteOutputForConcat(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const TensorInfo & tensorInfo,const PermutationVector & permuteVector,std::unique_ptr<ITensorHandle> && inputDataHandle,T * data)243 template <typename T> void PermuteOutputForConcat(
244     IWorkloadFactory& workloadFactory,
245     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
246     const armnn::ITensorHandleFactory& tensorHandleFactory,
247     const TensorInfo & tensorInfo,
248     const PermutationVector & permuteVector,
249     std::unique_ptr<ITensorHandle> && inputDataHandle,
250     T * data)
251 {
252     ARMNN_ASSERT_MSG(data != nullptr, "data must not be null");
253     if (data == nullptr)
254     {
255         // Nullptr is an error in the test. By returning without doing the permutation
256         // I expect the caller to fail the test. It still makes sense to report this as
257         // an assert for Debug builds.
258         return;
259     }
260 
261     TensorInfo resultTensorInfo = tensorInfo;
262     std::vector<T> inputData(tensorInfo.GetNumElements());
263     std::vector<T> outputData;
264 
265     CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get());
266 
267     PermuteTensorData<T>(workloadFactory,
268                          memoryManager,
269                          tensorHandleFactory,
270                          permuteVector,
271                          resultTensorInfo,
272                          &inputData[0],
273                          outputData);
274 
275     ::memcpy(data, &outputData[0], sizeof(T)*outputData.size());
276 }
277 
Concatenate(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,std::initializer_list<const TensorInfo> inputTensorInfosOrig,std::initializer_list<T * > inputsOrig,const TensorInfo & outputTensorInfoOrig,T * output,unsigned int concatDim,bool useSubtensor)278 template<typename T> void Concatenate(
279     IWorkloadFactory& workloadFactory,
280     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
281     const armnn::ITensorHandleFactory& tensorHandleFactory,
282     std::initializer_list<const TensorInfo> inputTensorInfosOrig,
283     std::initializer_list<T *> inputsOrig,
284     const TensorInfo& outputTensorInfoOrig,
285     T * output,
286     unsigned int concatDim,
287     bool useSubtensor)
288 {
289     ARMNN_ASSERT_MSG(output != nullptr, "output must not be null");
290     if (output == nullptr)
291     {
292         // Nullptr is an error in the test. By returning without doing the permutation
293         // I expect the caller to fail the test. It still makes sense to report this as
294         // an assert for Debug builds.
295         return;
296     }
297 
298     // Saves a copy of the parameters which we might need to change.
299     std::vector<TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end());
300     std::vector<T *> inputs            = inputsOrig;
301     TensorInfo outputTensorInfo = outputTensorInfoOrig;
302 
303     PermutationVector permuteVector{0, 1, 2};
304 
305     // Holds and automatically releases memory for the reshaped input data.
306     std::vector<std::vector<T>> tmpInputDataStorage;
307 
308     const size_t inputCount = inputTensorInfos.size();
309 
310     bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim);
311 
312     if (needPermuteForConcat)
313     {
314         //
315         // We need to permute the inputs, because concatenation along
316         // the requested axis is not supported.
317         //
318         PermuteInputsForConcat<T>(workloadFactory,
319                                   memoryManager,
320                                   tensorHandleFactory,
321                                   inputTensorInfos,
322                                   inputs,
323                                   tmpInputDataStorage,
324                                   permuteVector,
325                                   concatDim,
326                                   outputTensorInfo);
327     }
328 
329     WorkloadInfo workloadInfo;
330 
331     std::vector<std::unique_ptr<ITensorHandle>> inputHandles;
332     inputHandles.reserve(inputCount);
333 
334     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
335 
336     ConcatQueueDescriptor queueDescriptor;
337     OriginsDescriptor viewsDescriptor = CreateDescriptorForConcat(inputTensorInfos, concatDim);
338     queueDescriptor.m_Parameters = viewsDescriptor;
339 
340     if (useSubtensor)
341     {
342         queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
343         for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
344         {
345             queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i),
346                 viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
347         }
348 
349         outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
350 
351         const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
352         for (unsigned int i = 0; i < inputCount; ++i)
353         {
354             const TensorInfo& inputTensorInfo = inputTensorInfos[i];
355 
356             std::unique_ptr<ITensorHandle> inputHandle =
357                 subTensorsSupported ?
358                     tensorHandleFactory.CreateSubTensorHandle(*outputHandle,
359                                                           inputTensorInfo.GetShape(),
360                                                           queueDescriptor.m_ViewOrigins[i].m_Origin.data()) :
361                                                           tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
362 
363             inputHandles.emplace_back(std::move(inputHandle));
364         }
365 
366 
367     }
368     else
369     {
370         for (unsigned int i = 0; i < inputCount; ++i)
371         {
372             std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfos[i]);
373             inputHandles.emplace_back(std::move(inputHandle));
374         }
375     }
376 
377     for (unsigned int i = 0; i < inputCount; ++i)
378     {
379         AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get());
380     }
381 
382     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
383 
384     std::unique_ptr<IWorkload> workload
385             = workloadFactory.CreateWorkload(LayerType::Concat, queueDescriptor, workloadInfo);
386 
387     for (auto& inputHandle : inputHandles)
388     {
389         inputHandle->Allocate();
390     }
391 
392     outputHandle->Allocate();
393 
394     unsigned int nextInputId = 0;
395     for (auto& inputHandle : inputHandles)
396     {
397         CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]);
398         ++nextInputId;
399     }
400 
401     workload->PostAllocationConfigure();
402     workload->Execute();
403 
404     if (needPermuteForConcat)
405     {
406         PermuteOutputForConcat<T>(workloadFactory,
407                                   memoryManager,
408                                   tensorHandleFactory,
409                                   outputTensorInfo,
410                                   permuteVector,
411                                   std::move(outputHandle),
412                                   output);
413     }
414     else
415     {
416         CopyDataFromITensorHandle(output, outputHandle.get());
417     }
418 }
419 
420 //
421 // Implementation templates
422 //
423 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
ConcatTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)424 LayerTestResult<T, 3> ConcatTestImpl(
425         IWorkloadFactory& workloadFactory,
426         const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
427         const armnn::ITensorHandleFactory& tensorHandleFactory)
428 {
429 
430     IgnoreUnused(memoryManager);
431 
432     unsigned int outputWidth = 3;
433     unsigned int outputHeight = 6;
434     unsigned int outputChannels = 3;
435 
436     unsigned int inputWidth1 = 3;
437     unsigned int inputHeight1 = 6;
438     unsigned int inputChannels1 = 2;
439 
440     unsigned int inputWidth2 = 3;
441     unsigned int inputHeight2 = 6;
442     unsigned int inputChannels2 = 1;
443 
444     // Define the tensor descriptors.
445     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, ArmnnType);
446     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, ArmnnType);
447     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, ArmnnType);
448 
449     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
450 
451     std::vector<T> expectedOutput =
452             {
453                     1, 2, 3,
454                     4, 5, 6,
455                     7, 8, 9,
456                     10, 11, 12,
457                     13, 14, 15,
458                     16, 17, 18,
459 
460                     19, 20, 21,
461                     22, 23, 24,
462                     25, 26, 27,
463                     28, 29, 30,
464                     31, 32, 33,
465                     34, 35, 36,
466 
467                     37, 38, 39,
468                     40, 41, 42,
469                     43, 44, 45,
470                     46, 47, 48,
471                     49, 50, 51,
472                     52, 53, 54
473             };
474 
475     std::vector<T> input1 =
476             {
477                     1, 2, 3,
478                     4, 5, 6,
479                     7, 8, 9,
480                     10, 11, 12,
481                     13, 14, 15,
482                     16, 17, 18,
483 
484                     19, 20, 21,
485                     22, 23, 24,
486                     25, 26, 27,
487                     28, 29, 30,
488                     31, 32, 33,
489                     34, 35, 36
490             };
491 
492     std::vector<T> input2 =
493             {
494                     37, 38, 39,
495                     40, 41, 42,
496                     43, 44, 45,
497                     46, 47, 48,
498                     49, 50, 51,
499                     52, 53, 54,
500             };
501 
502     std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0].
503     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
504 
505     std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1].
506     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
507 
508     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
509 
510     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
511 
512     std::unique_ptr<ITensorHandle> inputHandle1 =
513             subTensorsSupported ?
514             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
515             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
516 
517     std::unique_ptr<ITensorHandle> inputHandle2  =
518             subTensorsSupported ?
519             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
520             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
521 
522     ConcatQueueDescriptor data;
523     WorkloadInfo info;
524     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
525     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
526     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
527 
528     data.m_ViewOrigins.push_back(window1);
529     data.m_ViewOrigins.push_back(window2);
530 
531     std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info);
532 
533     inputHandle1->Allocate();
534     inputHandle2->Allocate();
535     outputHandle->Allocate();
536 
537     CopyDataToITensorHandle(inputHandle1.get(), input1.data());
538     CopyDataToITensorHandle(inputHandle2.get(), input2.data());
539 
540     workload->PostAllocationConfigure();
541     workload->Execute();
542 
543     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
544 
545     return LayerTestResult<T, 3>(actualOutput,
546                                  expectedOutput,
547                                  outputHandle->GetShape(),
548                                  outputTensorInfo.GetShape());
549 }
550 
551 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat1dTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)552 LayerTestResult<T, 1> Concat1dTestImpl(
553     IWorkloadFactory& workloadFactory,
554     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
555     const armnn::ITensorHandleFactory& tensorHandleFactory,
556     float qScale,
557     int32_t qOffset)
558 {
559     TensorInfo inputTensorInfo({ 3 }, ArmnnType, qScale, qOffset);
560 
561     auto input0 = QuantizedVector<T>({ 1.0f, 2.0f, 3.0f }, qScale, qOffset);
562     auto input1 = QuantizedVector<T>({ 4.0f, 5.0f, 6.0f }, qScale, qOffset);
563     auto input2 = QuantizedVector<T>({ 7.0f, 8.0f, 9.0f }, qScale, qOffset);
564 
565     TensorInfo outputTensorInfo({ 9 }, ArmnnType, qScale, qOffset);
566 
567     LayerTestResult<T, 1> result(outputTensorInfo);
568 
569     std::vector<T> output;
570     output.resize(outputTensorInfo.GetNumElements());
571     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
572                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
573                    { input0.data(), input1.data(), input2.data() },
574                    outputTensorInfo,
575                    output.data(),
576                    0,
577                    true);
578 
579     result.m_ActualData   = output;
580     result.m_ExpectedData = QuantizedVector<T>(
581         {
582             1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f
583         },
584         qScale, qOffset);
585 
586     return result;
587 }
588 
589 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const TensorInfo & outputTensorInfo,unsigned int dimension,const float qScale,const int32_t qOffset)590 LayerTestResult<T, 2> Concat2dTestImpl(
591     IWorkloadFactory& workloadFactory,
592     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
593     const armnn::ITensorHandleFactory& tensorHandleFactory,
594     const TensorInfo& outputTensorInfo,
595     unsigned int dimension,
596     const float qScale,
597     const int32_t qOffset)
598 {
599     TensorInfo inputTensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
600 
601     auto input0 = QuantizedVector<T>(
602         {
603             // Batch 0
604             1.0f, 2.0f, 3.0f,
605 
606             // Batch 1
607             10.0f, 11.0f, 12.0f,
608         },
609         qScale, qOffset);
610 
611     auto input1 = QuantizedVector<T>(
612          {
613             // Batch 0
614             4.0f, 5.0f, 6.0f,
615 
616             // Batch 1
617             13.0f, 14.0f, 15.0f,
618         },
619         qScale, qOffset);
620 
621     auto input2 = QuantizedVector<T>(
622         {
623             // Batch 0
624             7.0f, 8.0f, 9.0f,
625 
626             // Batch 1
627             16.0f, 17.0f, 18.0f,
628         },
629         qScale, qOffset);
630 
631     LayerTestResult<T, 2> result(outputTensorInfo);
632 
633     std::vector<T> output;
634     output.resize(outputTensorInfo.GetNumElements());
635     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
636                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
637                    { input0.data(), input1.data(), input2.data() },
638                    outputTensorInfo,
639                    output.data(),
640                    dimension,
641                    true);
642 
643     result.m_ActualData = output;
644     return result;
645 }
646 
647 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dDim0TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)648 LayerTestResult<T, 2> Concat2dDim0TestImpl(
649     IWorkloadFactory& workloadFactory,
650     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
651     const armnn::ITensorHandleFactory& tensorHandleFactory,
652     float qScale,
653     int32_t qOffset)
654 {
655     TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
656 
657     LayerTestResult<T, 2> result = Concat2dTestImpl<ArmnnType>(
658         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, qScale, qOffset);
659 
660     result.m_ExpectedData = QuantizedVector<T>(
661         {
662             // Batch 0
663             1.0f, 2.0f, 3.0f,
664 
665             // Batch 1
666             10.0f, 11.0f, 12.0f,
667 
668             // Batch 2
669             4.0f, 5.0f, 6.0f,
670 
671             // Batch 3
672             13.0f, 14.0f, 15.0f,
673 
674             // Batch 4
675             7.0f, 8.0f, 9.0f,
676 
677             // Batch 5
678             16.0f, 17.0f, 18.0f,
679         },
680         qScale, qOffset);
681 
682     return result;
683 }
684 
685 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dDim1TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)686 LayerTestResult<T, 2> Concat2dDim1TestImpl(
687     IWorkloadFactory& workloadFactory,
688     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
689     const armnn::ITensorHandleFactory& tensorHandleFactory,
690     float qScale,
691     int32_t qOffset)
692 {
693     TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
694 
695     LayerTestResult<T, 2> result = Concat2dTestImpl<ArmnnType>(
696         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, qScale, qOffset);
697 
698     result.m_ExpectedData = QuantizedVector<T>(
699         {
700             // Batch 0
701             1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
702 
703             // Batch 1
704             10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
705         },
706         qScale, qOffset);
707 
708     return result;
709 }
710 
711 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dDim0DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)712 LayerTestResult<T, 2> Concat2dDim0DiffInputDimsTestImpl(
713     IWorkloadFactory& workloadFactory,
714     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
715     const armnn::ITensorHandleFactory& tensorHandleFactory,
716     float qScale,
717     int32_t qOffset)
718 {
719     TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
720     auto input0 = QuantizedVector<T>(
721         {
722             // Batch 0
723             1.0f, 2.0f, 3.0f,
724 
725             // Batch 1
726             10.0f, 11.0f, 12.0f,
727         },
728         qScale, qOffset);
729 
730     TensorInfo input1TensorInfo({ 3, 3 }, ArmnnType, qScale, qOffset);
731     auto input1 = QuantizedVector<T>(
732         {
733             // Batch 0
734             4.0f, 5.0f, 6.0f,
735 
736             // Batch 1
737             13.0f, 14.0f, 15.0f,
738 
739             // Batch 0
740             7.0f, 8.0f, 9.0f,
741         },
742         qScale, qOffset);
743 
744     TensorInfo input2TensorInfo({ 1, 3 }, ArmnnType, qScale, qOffset);
745     auto input2 = QuantizedVector<T>(
746         {
747             // Batch 1
748             16.0f, 17.0f, 18.0f,
749         },
750         qScale, qOffset);
751 
752     TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
753     LayerTestResult<T, 2> result(outputTensorInfo);
754 
755     std::vector<T> output;
756     output.resize(outputTensorInfo.GetNumElements());
757     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
758                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
759                    { input0.data(), input1.data(), input2.data() },
760                    outputTensorInfo,
761                    output.data(),
762                    0,
763                    true);
764 
765     result.m_ActualData = output;
766     result.m_ExpectedData = QuantizedVector<T>(
767         {
768             // Batch 0
769             1.0f, 2.0f, 3.0f,
770 
771             // Batch 1
772             10.0f, 11.0f, 12.0f,
773 
774             // Batch 2
775             4.0f, 5.0f, 6.0f,
776 
777             // Batch 3
778             13.0f, 14.0f, 15.0f,
779 
780             // Batch 4
781             7.0f, 8.0f, 9.0f,
782 
783             // Batch 5
784             16.0f, 17.0f, 18.0f,
785         },
786         qScale, qOffset);
787 
788     return result;
789 }
790 
791 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dDim1DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)792 LayerTestResult<T, 2> Concat2dDim1DiffInputDimsTestImpl(
793     IWorkloadFactory& workloadFactory,
794     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
795     const armnn::ITensorHandleFactory& tensorHandleFactory,
796     float qScale,
797     int32_t qOffset)
798 {
799     TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
800     auto input0 = QuantizedVector<T>(
801         {
802             // Batch 0
803             1.0f, 2.0f, 3.0f,
804 
805             // Batch 1
806             10.0f, 11.0f, 12.0f,
807         },
808         qScale, qOffset);
809 
810     TensorInfo input1TensorInfo({ 2, 5 }, ArmnnType, qScale, qOffset);
811     auto input1 = QuantizedVector<T>(
812         {
813             // Batch 0
814             4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
815 
816             // Batch 1
817             13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
818         },
819         qScale, qOffset);
820 
821     TensorInfo input2TensorInfo({ 2, 1 }, ArmnnType, qScale, qOffset);
822     auto input2 = QuantizedVector<T>(
823         {
824             // Batch 0
825             9.0f,
826 
827             // Batch 1
828             18.0f
829         },
830         qScale, qOffset);
831 
832     TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
833     LayerTestResult<T, 2> result(outputTensorInfo);
834 
835     std::vector<T> output;
836     output.resize(outputTensorInfo.GetNumElements());
837     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
838                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
839                    { input0.data(), input1.data(), input2.data() },
840                    outputTensorInfo,
841                    output.data(),
842                    1,
843                    true);
844 
845     result.m_ActualData = output;
846     result.m_ExpectedData = QuantizedVector<T>(
847         {
848             // Batch 0
849             1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
850 
851             // Batch 1
852             10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
853         },
854         qScale, qOffset);
855 
856     return result;
857 }
858 
859 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const TensorInfo & outputTensorInfo,unsigned int dimension,bool useSubtensor,float qScale,int32_t qOffset)860 LayerTestResult<T, 3> Concat3dTestImpl(
861     IWorkloadFactory& workloadFactory,
862     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
863     const armnn::ITensorHandleFactory& tensorHandleFactory,
864     const TensorInfo& outputTensorInfo,
865     unsigned int dimension,
866     bool useSubtensor,
867     float qScale,
868     int32_t qOffset)
869 {
870     TensorInfo inputTensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
871 
872     auto input0 = QuantizedVector<T>(
873         {
874             // Batch 0, Channel 0
875             1.0f, 2.0f,
876 
877             // Batch 0, Channel 1
878             3.0f, 4.0f,
879 
880             // Batch 0, Channel 2
881             5.0f, 6.0f,
882 
883             // Batch 1, Channel 0
884             19.0f, 20.0f,
885 
886             // Batch 1, Channel 1
887             21.0f, 22.0f,
888 
889             // Batch 1, Channel 2
890             23.0f, 24.0f
891         },
892         qScale, qOffset);
893 
894     auto input1 = QuantizedVector<T>(
895         {
896             // Batch 0, Channel 0
897             7.0f, 8.0f,
898 
899             // Batch 0, Channel 1
900             9.0f, 10.0f,
901 
902             // Batch 0, Channel 2
903             11.0f, 12.0f,
904 
905             // Batch 1, Channel 0
906             25.0f, 26.0f,
907 
908             // Batch 1, Channel 1
909             27.0f, 28.0f,
910 
911             // Batch 1, Channel 2
912             29.0f, 30.0f
913         },
914         qScale, qOffset);
915 
916     auto input2 = QuantizedVector<T>(
917         {
918             // Batch 0, Channel 0
919             13.0f, 14.0f,
920 
921             // Batch 0, Channel 1
922             15.0f, 16.0f,
923 
924             // Batch 0, Channel 2
925             17.0f, 18.0f,
926 
927             // Batch 1, Channel 0
928             31.0f, 32.0f,
929 
930             // Batch 1, Channel 1
931             33.0f, 34.0f,
932 
933             // Batch 1, Channel 2
934             35.0f, 36.0f
935         },
936         qScale, qOffset);
937 
938     LayerTestResult<T, 3> result(outputTensorInfo);
939 
940     std::vector<T> output;
941     output.resize(outputTensorInfo.GetNumElements());
942     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
943                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
944                    { input0.data(), input1.data(), input2.data() },
945                    outputTensorInfo,
946                    output.data(),
947                    dimension,
948                    useSubtensor);
949 
950     result.m_ActualData = output;
951     return result;
952 }
953 
954 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim0TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)955 LayerTestResult<T, 3> Concat3dDim0TestImpl(
956     IWorkloadFactory& workloadFactory,
957     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
958     const armnn::ITensorHandleFactory& tensorHandleFactory,
959     float qScale,
960     int32_t qOffset)
961 {
962     TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType, qScale, qOffset);
963 
964     LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
965         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, true, qScale, qOffset);
966 
967     result.m_ExpectedData = QuantizedVector<T>(
968         {
969             // Batch 0, Channel 0
970             1.0f, 2.0f,
971 
972             // Batch 0, Channel 1
973             3.0f, 4.0f,
974 
975             // Batch 0, Channel 2
976             5.0f, 6.0f,
977 
978             // Batch 1, Channel 0
979             19.0f, 20.0f,
980 
981             // Batch 1, Channel 1
982             21.0f, 22.0f,
983 
984             // Batch 1, Channel 2
985             23.0f, 24.0f,
986 
987             // Batch 2, Channel 0
988             7.0f, 8.0f,
989 
990             // Batch 2, Channel 1
991             9.0f, 10.0f,
992 
993             // Batch 2, Channel 2
994             11.0f, 12.0f,
995 
996             // Batch 3, Channel 0
997             25.0f, 26.0f,
998 
999             // Batch 3, Channel 1
1000             27.0f, 28.0f,
1001 
1002             // Batch 3, Channel 2
1003             29.0f, 30.0f,
1004 
1005             // Batch 4, Channel 0
1006             13.0f, 14.0f,
1007 
1008             // Batch 4, Channel 1
1009             15.0f, 16.0f,
1010 
1011             // Batch 4, Channel 2
1012             17.0f, 18.0f,
1013 
1014             // Batch 5, Channel 0
1015             31.0f, 32.0f,
1016 
1017             // Batch 5, Channel 1
1018             33.0f, 34.0f,
1019 
1020             // Batch 5, Channel 2
1021             35.0f, 36.0f
1022         },
1023         qScale, qOffset);
1024 
1025     return result;
1026 }
1027 
1028 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim1TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1029 LayerTestResult<T, 3> Concat3dDim1TestImpl(
1030     IWorkloadFactory& workloadFactory,
1031     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1032     const armnn::ITensorHandleFactory& tensorHandleFactory,
1033     float qScale,
1034     int32_t qOffset)
1035 {
1036     TensorInfo outputTensorInfo({ 2, 9, 2 }, ArmnnType, qScale, qOffset);
1037 
1038     LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
1039         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, true, qScale, qOffset);
1040 
1041     result.m_ExpectedData = QuantizedVector<T>(
1042         {
1043             // Batch 0, Channel 0
1044             1.0f, 2.0f,
1045 
1046             // Batch 0, Channel 1
1047             3.0f, 4.0f,
1048 
1049             // Batch 0, Channel 2
1050             5.0f, 6.0f,
1051 
1052             // Batch 0, Channel 3
1053             7.0f, 8.0f,
1054 
1055             // Batch 0, Channel 4
1056             9.0f, 10.0f,
1057 
1058             // Batch 0, Channel 5
1059             11.0f, 12.0f,
1060 
1061             // Batch 0, Channel 6
1062             13.0f, 14.0f,
1063 
1064             // Batch 0, Channel 7
1065             15.0f, 16.0f,
1066 
1067             // Batch 0, Channel 8
1068             17.0f, 18.0f,
1069 
1070             // Batch 1, Channel 0
1071             19.0f, 20.0f,
1072 
1073             // Batch 1, Channel 1
1074             21.0f, 22.0f,
1075 
1076             // Batch 1, Channel 2
1077             23.0f, 24.0f,
1078 
1079             // Batch 1, Channel 3
1080             25.0f, 26.0f,
1081 
1082             // Batch 1, Channel 4
1083             27.0f, 28.0f,
1084 
1085             // Batch 1, Channel 5
1086             29.0f, 30.0f,
1087 
1088             // Batch 1, Channel 6
1089             31.0f, 32.0f,
1090 
1091             // Batch 1, Channel 7
1092             33.0f, 34.0f,
1093 
1094             // Batch 1, Channel 8
1095             35.0f, 36.0f
1096         },
1097         qScale, qOffset);
1098 
1099     return result;
1100 }
1101 
1102 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim2TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor,float qScale,int32_t qOffset)1103 LayerTestResult<T, 3> Concat3dDim2TestImpl(
1104     IWorkloadFactory& workloadFactory,
1105     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1106     const armnn::ITensorHandleFactory& tensorHandleFactory,
1107     bool useSubtensor,
1108     float qScale,
1109     int32_t qOffset)
1110 {
1111     TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
1112 
1113     LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
1114         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 2, useSubtensor, qScale, qOffset);
1115 
1116     result.m_ExpectedData = QuantizedVector<T>(
1117         {
1118             // Batch 0, Channel 0
1119             1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f,
1120 
1121             // Batch 0, Channel 1
1122             3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f,
1123 
1124             // Batch 0, Channel 2
1125             5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f,
1126 
1127             // Batch 1, Channel 0
1128             19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f,
1129 
1130             // Batch 1, Channel 1
1131             21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f,
1132 
1133             // Batch 1, Channel 2
1134             23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f,
1135         },
1136         qScale, qOffset);
1137 
1138     return result;
1139 }
1140 
1141 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim0DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1142 LayerTestResult<T, 3> Concat3dDim0DiffInputDimsTestImpl(
1143     IWorkloadFactory& workloadFactory,
1144     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1145     const armnn::ITensorHandleFactory& tensorHandleFactory,
1146     float qScale,
1147     int32_t qOffset)
1148 {
1149     TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType);
1150     auto input0 = QuantizedVector<T>(
1151         {
1152             // Batch 0, Channel 0
1153             1.0f, 2.0f,
1154 
1155             // Batch 0, Channel 1
1156             3.0f, 4.0f,
1157 
1158             // Batch 0, Channel 2
1159             5.0f, 6.0f,
1160 
1161             // Batch 1, Channel 0
1162             19.0f, 20.0f,
1163 
1164             // Batch 1, Channel 1
1165             21.0f, 22.0f,
1166 
1167             // Batch 1, Channel 2
1168             23.0f, 24.0f
1169         },
1170         qScale, qOffset);
1171 
1172     TensorInfo input1TensorInfo({ 1, 3, 2 }, ArmnnType);
1173     auto input1 = QuantizedVector<T>(
1174         {
1175             // Batch 0, Channel 0
1176             7.0f, 8.0f,
1177 
1178             // Batch 0, Channel 1
1179             9.0f, 10.0f,
1180 
1181             // Batch 0, Channel 2
1182             11.0f, 12.0f,
1183         },
1184         qScale, qOffset);
1185 
1186     TensorInfo input2TensorInfo({ 3, 3, 2 }, ArmnnType);
1187     auto input2 = QuantizedVector<T>(
1188         {
1189             // Batch 0, Channel 0
1190             25.0f, 26.0f,
1191 
1192             // Batch 0, Channel 1
1193             27.0f, 28.0f,
1194 
1195             // Batch 0, Channel 2
1196             29.0f, 30.0f,
1197 
1198             // Batch 1, Channel 0
1199             13.0f, 14.0f,
1200 
1201             // Batch 1, Channel 1
1202             15.0f, 16.0f,
1203 
1204             // Batch 1, Channel 2
1205             17.0f, 18.0f,
1206 
1207             // Batch 2, Channel 0
1208             31.0f, 32.0f,
1209 
1210             // Batch 2, Channel 1
1211             33.0f, 34.0f,
1212 
1213             // Batch 2, Channel 2
1214             35.0f, 36.0f
1215         },
1216         qScale, qOffset);
1217 
1218     TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType);
1219     LayerTestResult<T, 3> result(outputTensorInfo);
1220 
1221     std::vector<T> output;
1222     output.resize(outputTensorInfo.GetNumElements());
1223     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
1224                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1225                    { input0.data(), input1.data(), input2.data() },
1226                    outputTensorInfo,
1227                    output.data(),
1228                    0,
1229                    true);
1230 
1231     result.m_ActualData = output;
1232     result.m_ExpectedData = QuantizedVector<T>(
1233         {
1234             // Batch 0, Channel 0
1235             1.0f, 2.0f,
1236 
1237             // Batch 0, Channel 1
1238             3.0f, 4.0f,
1239 
1240             // Batch 0, Channel 2
1241             5.0f, 6.0f,
1242 
1243             // Batch 1, Channel 0
1244             19.0f, 20.0f,
1245 
1246             // Batch 1, Channel 1
1247             21.0f, 22.0f,
1248 
1249             // Batch 1, Channel 2
1250             23.0f, 24.0f,
1251 
1252             // Batch 2, Channel 0
1253             7.0f, 8.0f,
1254 
1255             // Batch 2, Channel 1
1256             9.0f, 10.0f,
1257 
1258             // Batch 2, Channel 2
1259             11.0f, 12.0f,
1260 
1261             // Batch 3, Channel 0
1262             25.0f, 26.0f,
1263 
1264             // Batch 3, Channel 1
1265             27.0f, 28.0f,
1266 
1267             // Batch 3, Channel 2
1268             29.0f, 30.0f,
1269 
1270             // Batch 4, Channel 0
1271             13.0f, 14.0f,
1272 
1273             // Batch 4, Channel 1
1274             15.0f, 16.0f,
1275 
1276             // Batch 4, Channel 2
1277             17.0f, 18.0f,
1278 
1279             // Batch 5, Channel 0
1280             31.0f, 32.0f,
1281 
1282             // Batch 5, Channel 1
1283             33.0f, 34.0f,
1284 
1285             // Batch 5, Channel 2
1286             35.0f, 36.0f
1287         },
1288         qScale, qOffset);
1289 
1290     return result;
1291 }
1292 
1293 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim1DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1294 LayerTestResult<T, 3> Concat3dDim1DiffInputDimsTestImpl(
1295     IWorkloadFactory& workloadFactory,
1296     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1297     const armnn::ITensorHandleFactory& tensorHandleFactory,
1298     float qScale,
1299     int32_t qOffset)
1300 {
1301     TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
1302     auto input0 = QuantizedVector<T>(
1303         {
1304             // Batch 0, Channel 0
1305             1.0f, 2.0f,
1306 
1307             // Batch 0, Channel 1
1308             3.0f, 4.0f,
1309 
1310             // Batch 0, Channel 2
1311             5.0f, 6.0f,
1312 
1313             // Batch 1, Channel 0
1314             19.0f, 20.0f,
1315 
1316             // Batch 1, Channel 1
1317             21.0f, 22.0f,
1318 
1319             // Batch 1, Channel 2
1320             23.0f, 24.0f
1321         },
1322         qScale, qOffset);
1323 
1324     TensorInfo input1TensorInfo({ 2, 4, 2 }, ArmnnType, qScale, qOffset);
1325     auto input1 = QuantizedVector<T>(
1326         {
1327             // Batch 0, Channel 0
1328             7.0f, 8.0f,
1329 
1330             // Batch 0, Channel 1
1331             9.0f, 10.0f,
1332 
1333             // Batch 0, Channel 2
1334             11.0f, 12.0f,
1335 
1336             // Batch 0, Channel 3
1337             25.0f, 26.0f,
1338 
1339             // Batch 1, Channel 0
1340             27.0f, 28.0f,
1341 
1342             // Batch 1, Channel 1
1343             29.0f, 30.0f,
1344 
1345             // Batch 1, Channel 2
1346             13.0f, 14.0f,
1347 
1348             // Batch 1, Channel 3
1349             15.0f, 16.0f,
1350         },
1351         qScale, qOffset);
1352 
1353     TensorInfo input2TensorInfo({ 2, 1, 2 }, ArmnnType, qScale, qOffset);
1354     auto input2 = QuantizedVector<T>(
1355         {
1356             // Batch 0, Channel 0
1357             17.0f, 18.0f,
1358 
1359             // Batch 1, Channel 0
1360             31.0f, 32.0f,
1361         },
1362         qScale, qOffset);
1363 
1364     TensorInfo outputTensorInfo({ 2, 8, 2 }, ArmnnType, qScale, qOffset);
1365     LayerTestResult<T, 3> result(outputTensorInfo);
1366 
1367     std::vector<T> output;
1368     output.resize(outputTensorInfo.GetNumElements());
1369     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
1370                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1371                    { input0.data(), input1.data(), input2.data() },
1372                    outputTensorInfo,
1373                    output.data(),
1374                    1,
1375                    true);
1376 
1377     result.m_ActualData = output;
1378     result.m_ExpectedData = QuantizedVector<T>(
1379         {
1380             // Batch 0, Channel 0
1381             1.0f, 2.0f,
1382 
1383             // Batch 0, Channel 1
1384             3.0f, 4.0f,
1385 
1386             // Batch 0, Channel 2
1387             5.0f, 6.0f,
1388 
1389             // Batch 0, Channel 3
1390             7.0f, 8.0f,
1391 
1392             // Batch 0, Channel 4
1393             9.0f, 10.0f,
1394 
1395             // Batch 0, Channel 5
1396             11.0f, 12.0f,
1397 
1398             // Batch 0, Channel 6
1399             25.0f, 26.0f,
1400 
1401             // Batch 0, Channel 7
1402             17.0f, 18.0f,
1403 
1404             // Batch 1, Channel 0
1405             19.0f, 20.0f,
1406 
1407             // Batch 1, Channel 1
1408             21.0f, 22.0f,
1409 
1410             // Batch 1, Channel 2
1411             23.0f, 24.0f,
1412 
1413             // Batch 1, Channel 3
1414             27.0f, 28.0f,
1415 
1416             // Batch 1, Channel 4
1417             29.0f, 30.0f,
1418 
1419             // Batch 1, Channel 5
1420             13.0f, 14.0f,
1421 
1422             // Batch 1, Channel 6
1423             15.0f, 16.0f,
1424 
1425             // Batch 1, Channel 7
1426             31.0f, 32.0f,
1427         },
1428         qScale, qOffset);
1429 
1430     return result;
1431 }
1432 
1433 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim2DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor,float qScale,int32_t qOffset)1434 LayerTestResult<T, 3> Concat3dDim2DiffInputDimsTestImpl(
1435     IWorkloadFactory& workloadFactory,
1436     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1437     const armnn::ITensorHandleFactory& tensorHandleFactory,
1438     bool useSubtensor,
1439     float qScale,
1440     int32_t qOffset)
1441 {
1442     TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
1443     auto input0 = QuantizedVector<T>(
1444         {
1445             // Batch 0, Channel 0
1446             1.0f, 2.0f,
1447 
1448             // Batch 0, Channel 1
1449             3.0f, 4.0f,
1450 
1451             // Batch 0, Channel 2
1452             5.0f, 6.0f,
1453 
1454             // Batch 1, Channel 0
1455             19.0f, 20.0f,
1456 
1457             // Batch 1, Channel 1
1458             21.0f, 22.0f,
1459 
1460             // Batch 1, Channel 2
1461             23.0f, 24.0f
1462         },
1463         qScale, qOffset);
1464 
1465     TensorInfo input1TensorInfo({ 2, 3, 1 }, ArmnnType, qScale, qOffset);
1466     auto input1 = QuantizedVector<T>(
1467         {
1468             // Batch 0, Channel 0
1469             7.0f,
1470 
1471             // Batch 0, Channel 1
1472             9.0f,
1473 
1474             // Batch 0, Channel 2
1475             11.0f,
1476 
1477             // Batch 1, Channel 0
1478             25.0f,
1479 
1480             // Batch 1, Channel 1
1481             27.0f,
1482 
1483             // Batch 1, Channel 2
1484             29.0f
1485         },
1486         qScale, qOffset);
1487 
1488     TensorInfo input2TensorInfo({ 2, 3, 3 }, ArmnnType, qScale, qOffset);
1489     auto input2 = QuantizedVector<T>(
1490         {
1491             // Batch 0, Channel 0
1492             13.0f, 14.0f, 50.0f,
1493 
1494             // Batch 0, Channel 1
1495             15.0f, 16.0f, 51.0f,
1496 
1497             // Batch 0, Channel 2
1498             17.0f, 18.0f, 52.0f,
1499 
1500             // Batch 1, Channel 0
1501             31.0f, 32.0f, 53.0f,
1502 
1503             // Batch 1, Channel 1
1504             33.0f, 34.0f, 54.0f,
1505 
1506             // Batch 1, Channel 2
1507             35.0f, 36.0f, 55.0f,
1508         },
1509         qScale, qOffset);
1510 
1511     TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
1512     LayerTestResult<T, 3> result(outputTensorInfo);
1513 
1514     std::vector<T> output;
1515     output.resize(outputTensorInfo.GetNumElements());
1516     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
1517                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1518                    { input0.data(), input1.data(), input2.data() },
1519                    outputTensorInfo,
1520                    output.data(),
1521                    2,
1522                    useSubtensor);
1523 
1524     result.m_ActualData = output;
1525     result.m_ExpectedData = QuantizedVector<T>(
1526         {
1527             // Batch 0, Channel 0
1528             1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f,
1529 
1530             // Batch 0, Channel 1
1531             3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f,
1532 
1533             // Batch 0, Channel 2
1534             5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f,
1535 
1536             // Batch 1, Channel 0
1537             19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f,
1538 
1539             // Batch 1, Channel 1
1540             21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f,
1541 
1542             // Batch 1, Channel 2
1543             23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f,
1544         },
1545         qScale, qOffset);
1546 
1547     return result;
1548 }
1549 
1550 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const TensorInfo & outputTensorInfo,unsigned int dimension,bool useSubtensor,float qScale,int32_t qOffset)1551 LayerTestResult<T, 4> Concat4dTestImpl(
1552     IWorkloadFactory& workloadFactory,
1553     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1554     const armnn::ITensorHandleFactory& tensorHandleFactory,
1555     const TensorInfo& outputTensorInfo,
1556     unsigned int dimension,
1557     bool useSubtensor,
1558     float qScale,
1559     int32_t qOffset)
1560 {
1561     TensorInfo inputTensorInfo({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1562 
1563     auto input0 = QuantizedVector<T>(
1564         {
1565              1.0f,  2.0f,
1566              3.0f,  4.0f,
1567              5.0f,  6.0f,
1568              7.0f,  8.0f,
1569              9.0f, 10.0f,
1570             11.0f, 12.0f
1571         },
1572         qScale, qOffset);
1573 
1574     auto input1 = QuantizedVector<T>(
1575         {
1576             11.0f, 12.0f,
1577             13.0f, 14.0f,
1578             15.0f, 16.0f,
1579             17.0f, 18.0f,
1580             19.0f, 20.0f,
1581             21.0f, 22.0f
1582         },
1583         qScale, qOffset);
1584 
1585     auto input2 = QuantizedVector<T>(
1586         {
1587             21.0f, 22.0f,
1588             23.0f, 24.0f,
1589             25.0f, 26.0f,
1590             27.0f, 28.0f,
1591             29.0f, 30.0f,
1592             31.0f, 32.0f
1593         },
1594         qScale, qOffset);
1595 
1596     LayerTestResult<T, 4> result(outputTensorInfo);
1597 
1598     std::vector<T> output;
1599     output.resize(outputTensorInfo.GetNumElements());
1600 
1601     Concatenate<T>(workloadFactory,
1602                    memoryManager,
1603                    tensorHandleFactory,
1604                    {inputTensorInfo, inputTensorInfo, inputTensorInfo},
1605                    {input0.data(), input1.data(), input2.data()},
1606                    outputTensorInfo,
1607                    output.data(),
1608                    dimension,
1609                    useSubtensor);
1610 
1611     result.m_ActualData = output;
1612     return result;
1613 }
1614 
1615 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDim0TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1616 LayerTestResult<T, 4> Concat4dDim0TestImpl(
1617     IWorkloadFactory& workloadFactory,
1618     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1619     const armnn::ITensorHandleFactory& tensorHandleFactory,
1620     float qScale,
1621     int32_t qOffset)
1622 {
1623     TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1624 
1625     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
1626         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, true, qScale, qOffset);
1627 
1628     result.m_ExpectedData = QuantizedVector<T>(
1629         {
1630              1.0f,  2.0f,
1631              3.0f,  4.0f,
1632              5.0f,  6.0f,
1633              7.0f,  8.0f,
1634              9.0f, 10.0f,
1635             11.0f, 12.0f,
1636 
1637             11.0f, 12.0f,
1638             13.0f, 14.0f,
1639             15.0f, 16.0f,
1640             17.0f, 18.0f,
1641             19.0f, 20.0f,
1642             21.0f, 22.0f,
1643 
1644             21.0f, 22.0f,
1645             23.0f, 24.0f,
1646             25.0f, 26.0f,
1647             27.0f, 28.0f,
1648             29.0f, 30.0f,
1649             31.0f, 32.0f
1650         },
1651         qScale, qOffset);
1652 
1653     return result;
1654 }
1655 
1656 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDim1TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1657 LayerTestResult<T, 4> Concat4dDim1TestImpl(
1658     IWorkloadFactory& workloadFactory,
1659     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1660     const armnn::ITensorHandleFactory& tensorHandleFactory,
1661     float qScale,
1662     int32_t qOffset)
1663 {
1664     TensorInfo outputTensorInfo({ 1, 9, 2, 2 }, ArmnnType, qScale, qOffset);
1665 
1666     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
1667         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, true, qScale, qOffset);
1668 
1669     result.m_ExpectedData = QuantizedVector<T>(
1670         {
1671              1.0f,  2.0f,
1672              3.0f,  4.0f,
1673              5.0f,  6.0f,
1674              7.0f,  8.0f,
1675              9.0f, 10.0f,
1676             11.0f, 12.0f,
1677 
1678             11.0f, 12.0f,
1679             13.0f, 14.0f,
1680             15.0f, 16.0f,
1681             17.0f, 18.0f,
1682             19.0f, 20.0f,
1683             21.0f, 22.0f,
1684 
1685             21.0f, 22.0f,
1686             23.0f, 24.0f,
1687             25.0f, 26.0f,
1688             27.0f, 28.0f,
1689             29.0f, 30.0f,
1690             31.0f, 32.0f
1691         },
1692         qScale, qOffset);
1693 
1694     return result;
1695 }
1696 
1697 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDim2TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1698 LayerTestResult<T, 4> Concat4dDim2TestImpl(
1699     IWorkloadFactory& workloadFactory,
1700     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1701     const armnn::ITensorHandleFactory& tensorHandleFactory,
1702     float qScale,
1703     int32_t qOffset)
1704 {
1705     TensorInfo outputTensorInfo({ 1, 3, 6, 2 }, ArmnnType, qScale, qOffset);
1706 
1707     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
1708         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 2, true, qScale, qOffset);
1709 
1710     result.m_ExpectedData = QuantizedVector<T>(
1711         {
1712              1.0f,  2.0f,
1713              3.0f,  4.0f,
1714             11.0f, 12.0f,
1715             13.0f, 14.0f,
1716             21.0f, 22.0f,
1717             23.0f, 24.0f,
1718 
1719              5.0f,  6.0f,
1720              7.0f,  8.0f,
1721             15.0f, 16.0f,
1722             17.0f, 18.0f,
1723             25.0f, 26.0f,
1724             27.0f, 28.0f,
1725 
1726              9.0f, 10.0f,
1727             11.0f, 12.0f,
1728             19.0f, 20.0f,
1729             21.0f, 22.0f,
1730             29.0f, 30.0f,
1731             31.0f, 32.0f
1732         },
1733         qScale, qOffset);
1734 
1735     return result;
1736 }
1737 
1738 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDim3TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool useSubtensor)1739 LayerTestResult<T, 4> Concat4dDim3TestImpl(
1740     IWorkloadFactory& workloadFactory,
1741     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1742     const armnn::ITensorHandleFactory& tensorHandleFactory,
1743     float qScale,
1744     int32_t qOffset,
1745     bool useSubtensor)
1746 {
1747     TensorInfo outputTensorInfo({ 1, 3, 2, 6 }, ArmnnType, qScale, qOffset);
1748 
1749     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
1750         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 3, useSubtensor, qScale, qOffset);
1751 
1752     result.m_ExpectedData = QuantizedVector<T>(
1753         {
1754              1.0f,  2.0f,
1755             11.0f, 12.0f,
1756             21.0f, 22.0f,
1757              3.0f,  4.0f,
1758             13.0f, 14.0f,
1759             23.0f, 24.0f,
1760 
1761              5.0f,  6.0f,
1762             15.0f, 16.0f,
1763             25.0f, 26.0f,
1764              7.0f,  8.0f,
1765             17.0f, 18.0f,
1766             27.0f, 28.0f,
1767 
1768              9.0f, 10.0f,
1769             19.0f, 20.0f,
1770             29.0f, 30.0f,
1771             11.0f, 12.0f,
1772             21.0f, 22.0f,
1773             31.0f, 32.0f
1774         },
1775         qScale, qOffset);
1776 
1777     return result;
1778 }
1779 
1780 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDiffShapeDim0TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1781 LayerTestResult<T, 4> Concat4dDiffShapeDim0TestImpl(
1782     IWorkloadFactory& workloadFactory,
1783     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1784     const armnn::ITensorHandleFactory& tensorHandleFactory,
1785     float qScale,
1786     int32_t qOffset)
1787 {
1788     constexpr unsigned int dimension = 0u;
1789 
1790     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1791     auto input0 = QuantizedVector<T>(
1792         {
1793              1.0f,  2.0f,
1794              3.0f,  4.0f,
1795              5.0f,  6.0f,
1796              7.0f,  8.0f,
1797              9.0f, 10.0f,
1798             11.0f, 12.0f
1799         },
1800         qScale, qOffset);
1801 
1802     TensorInfo inputTensorInfo1({ 2, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1803 
1804     auto input1 = QuantizedVector<T>(
1805         {
1806             11.0f, 12.0f,
1807             13.0f, 14.0f,
1808             15.0f, 16.0f,
1809             17.0f, 18.0f,
1810             19.0f, 20.0f,
1811             21.0f, 22.0f,
1812 
1813             21.0f, 22.0f,
1814             23.0f, 24.0f,
1815             25.0f, 26.0f,
1816             27.0f, 28.0f,
1817             29.0f, 30.0f,
1818             31.0f, 32.0f
1819         },
1820         qScale, qOffset);
1821 
1822     TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1823 
1824     LayerTestResult<T, 4> result(outputTensorInfo);
1825 
1826     std::vector<T> output;
1827     output.resize(outputTensorInfo.GetNumElements());
1828     Concatenate<T>(workloadFactory,
1829                    memoryManager,
1830                    tensorHandleFactory,
1831                    {inputTensorInfo0, inputTensorInfo1},
1832                    {input0.data(), input1.data()},
1833                    outputTensorInfo,
1834                    output.data(),
1835                    dimension,
1836                    true);
1837 
1838     result.m_ActualData = output;
1839     result.m_ExpectedData = QuantizedVector<T>(
1840         {
1841              1.0f, 2.0f,
1842              3.0f, 4.0f,
1843              5.0f, 6.0f,
1844              7.0f, 8.0f,
1845              9.0f, 10.0f,
1846             11.0f, 12.0f,
1847 
1848             11.0f, 12.0f,
1849             13.0f, 14.0f,
1850             15.0f, 16.0f,
1851             17.0f, 18.0f,
1852             19.0f, 20.0f,
1853             21.0f, 22.0f,
1854 
1855             21.0f, 22.0f,
1856             23.0f, 24.0f,
1857             25.0f, 26.0f,
1858             27.0f, 28.0f,
1859             29.0f, 30.0f,
1860             31.0f, 32.0f
1861         },
1862         qScale, qOffset);
1863 
1864     return result;
1865 }
1866 
1867 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDiffShapeDim1TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1868 LayerTestResult<T, 4> Concat4dDiffShapeDim1TestImpl(
1869     IWorkloadFactory& workloadFactory,
1870     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1871     const armnn::ITensorHandleFactory& tensorHandleFactory,
1872     float qScale,
1873     int32_t qOffset)
1874 {
1875     constexpr unsigned int dimension = 1u;
1876 
1877     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1878     auto input0 = QuantizedVector<T>(
1879         {
1880              1.0f,  2.0f,
1881              3.0f,  4.0f,
1882              5.0f,  6.0f,
1883              7.0f,  8.0f,
1884              9.0f, 10.0f,
1885             11.0f, 12.0f
1886         },
1887         qScale, qOffset);
1888 
1889     TensorInfo inputTensorInfo1({ 1, 2, 2, 2 }, ArmnnType, qScale, qOffset);
1890 
1891     auto input1 = QuantizedVector<T>(
1892         {
1893             11.0f, 12.0f,
1894             13.0f, 14.0f,
1895             15.0f, 16.0f,
1896             17.0f, 18.0f,
1897         },
1898         qScale, qOffset);
1899 
1900     TensorInfo outputTensorInfo({ 1, 5, 2, 2 }, ArmnnType, qScale, qOffset);
1901 
1902     LayerTestResult<T, 4> result(outputTensorInfo);
1903 
1904     std::vector<T> output;
1905     output.resize(outputTensorInfo.GetNumElements());
1906     Concatenate<T>(workloadFactory,
1907                    memoryManager,
1908                    tensorHandleFactory,
1909                    {inputTensorInfo0, inputTensorInfo1},
1910                    {input0.data(), input1.data()},
1911                    outputTensorInfo,
1912                    output.data(),
1913                    dimension,
1914                    true);
1915 
1916     result.m_ActualData = output;
1917     result.m_ExpectedData = QuantizedVector<T>(
1918         {
1919              1.0f,  2.0f,
1920              3.0f,  4.0f,
1921              5.0f,  6.0f,
1922              7.0f,  8.0f,
1923              9.0f, 10.0f,
1924             11.0f, 12.0f,
1925             11.0f, 12.0f,
1926             13.0f, 14.0f,
1927             15.0f, 16.0f,
1928             17.0f, 18.0f
1929         },
1930         qScale, qOffset);
1931 
1932     return result;
1933 }
1934 
1935 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDiffShapeDim2TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1936 LayerTestResult<T, 4> Concat4dDiffShapeDim2TestImpl(
1937     IWorkloadFactory& workloadFactory,
1938     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1939     const armnn::ITensorHandleFactory& tensorHandleFactory,
1940     float qScale,
1941     int32_t qOffset)
1942 {
1943     constexpr unsigned int dimension = 2u;
1944 
1945     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1946     auto input0 = QuantizedVector<T>(
1947         {
1948              1.0f, 2.0f,
1949              3.0f, 4.0f,
1950              5.0f, 6.0f,
1951              7.0f, 8.0f,
1952             9.0f, 10.0f,
1953             11.0f, 12.0f
1954         },
1955         qScale, qOffset);
1956 
1957     TensorInfo inputTensorInfo1({ 1, 3, 3, 2 }, ArmnnType, qScale, qOffset);
1958     auto input1 = QuantizedVector<T>(
1959         {
1960             11.0f, 12.0f,
1961             13.0f, 14.0f,
1962             15.0f, 16.0f,
1963             17.0f, 18.0f,
1964             19.0f, 20.0f,
1965             21.0f, 22.0f,
1966             23.0f, 24.0f,
1967             25.0f, 26.0f,
1968             27.0f, 28.0f
1969         },
1970         qScale, qOffset);
1971 
1972     TensorInfo outputTensorInfo({ 1, 3, 5, 2 }, ArmnnType, qScale, qOffset);
1973     LayerTestResult<T, 4> result(outputTensorInfo);
1974 
1975     std::vector<T> output;
1976     output.resize(outputTensorInfo.GetNumElements());
1977     Concatenate<T>(workloadFactory,
1978                    memoryManager,
1979                    tensorHandleFactory,
1980                    {inputTensorInfo0, inputTensorInfo1},
1981                    {input0.data(), input1.data()},
1982                    outputTensorInfo,
1983                    output.data(),
1984                    dimension,
1985                    true);
1986 
1987     result.m_ActualData   = output;
1988     result.m_ExpectedData = QuantizedVector<T>(
1989         {
1990              1.0f,  2.0f,
1991              3.0f,  4.0f,
1992             11.0f, 12.0f,
1993             13.0f, 14.0f,
1994             15.0f, 16.0f,
1995 
1996              5.0f,  6.0f,
1997              7.0f,  8.0f,
1998             17.0f, 18.0f,
1999             19.0f, 20.0f,
2000             21.0f, 22.0f,
2001 
2002              9.0f, 10.0f,
2003             11.0f, 12.0f,
2004             23.0f, 24.0f,
2005             25.0f, 26.0f,
2006             27.0f, 28.0f
2007         },
2008         qScale, qOffset);
2009 
2010     return result;
2011 }
2012 
2013 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDiffShapeDim3TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool useSubtensor)2014 LayerTestResult<T, 4> Concat4dDiffShapeDim3TestImpl(
2015     IWorkloadFactory& workloadFactory,
2016     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2017     const armnn::ITensorHandleFactory& tensorHandleFactory,
2018     float qScale,
2019     int32_t qOffset,
2020     bool useSubtensor)
2021 {
2022     constexpr unsigned int dimension = 3u;
2023 
2024     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
2025     auto input0 = QuantizedVector<T>(
2026         {
2027              1.0f,  2.0f,
2028              3.0f,  4.0f,
2029              5.0f,  6.0f,
2030              7.0f,  8.0f,
2031              9.0f, 10.0f,
2032             11.0f, 12.0f
2033         },
2034         qScale, qOffset);
2035 
2036     TensorInfo inputTensorInfo1({ 1, 3, 2, 3 }, ArmnnType, qScale, qOffset);
2037     auto input1 = QuantizedVector<T>(
2038         {
2039             11.0f, 12.0f, 13.0f,
2040             14.0f, 15.0f, 16.0f,
2041 
2042             17.0f, 18.0f, 19.0f,
2043             20.0f, 21.0f, 22.0f,
2044 
2045             23.0f, 24.0f, 25.0f,
2046             26.0f, 27.0f, 28.0f
2047         },
2048         qScale, qOffset);
2049 
2050     TensorInfo outputTensorInfo({ 1, 3, 2, 5 }, ArmnnType, qScale, qOffset);
2051 
2052     LayerTestResult<T, 4> result(outputTensorInfo);
2053 
2054     std::vector<T> output;
2055     output.resize(outputTensorInfo.GetNumElements());
2056     Concatenate<T>(workloadFactory,
2057                    memoryManager,
2058                    tensorHandleFactory,
2059                    {inputTensorInfo0, inputTensorInfo1},
2060                    {input0.data(), input1.data()},
2061                    outputTensorInfo,
2062                    output.data(),
2063                    dimension,
2064                    useSubtensor);
2065 
2066     result.m_ActualData = output;
2067     result.m_ExpectedData = QuantizedVector<T>(
2068         {
2069             1.0f, 2.0f, 11.0f, 12.0f, 13.0f,
2070             3.0f, 4.0f, 14.0f, 15.0f, 16.0f,
2071             5.0f, 6.0f, 17.0f, 18.0f, 19.0f,
2072             7.0f, 8.0f, 20.0f, 21.0f, 22.0f,
2073             9.0f, 10.0f, 23.0f, 24.0f, 25.0f,
2074             11.0f, 12.0f, 26.0f, 27.0f, 28.0f
2075         },
2076         qScale, qOffset);
2077 
2078     return result;
2079 }
2080 
2081 template<DataType ArmnnType, typename T>
ConcatDifferentInputOutputQParamTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2082 LayerTestResult<T, 3> ConcatDifferentInputOutputQParamTest(
2083     IWorkloadFactory& workloadFactory,
2084     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2085     const armnn::ITensorHandleFactory& tensorHandleFactory,
2086     bool useSubtensor)
2087 {
2088     IgnoreUnused(memoryManager);
2089 
2090     // Defines the tensor descriptors.
2091     TensorInfo outputTensorInfo({ 3, 6, 3 }, ArmnnType);
2092     TensorInfo inputTensorInfo1({ 3, 6, 2 }, ArmnnType);
2093     TensorInfo inputTensorInfo2({ 3, 6, 1 }, ArmnnType);
2094 
2095     std::vector<TensorShape> inputTensorShapes({inputTensorInfo1.GetShape(), inputTensorInfo2.GetShape()});
2096 
2097     // Quantized input1 tensor.
2098     const float inputScale1 = 0.5f;
2099     const int32_t inputOffset1 = 5;
2100 
2101     std::vector<T> input1 =
2102     {
2103         1, 2, 3,
2104         4, 5, 6,
2105         7, 8, 9,
2106         10, 11, 12,
2107         13, 14, 15,
2108         16, 17, 18,
2109 
2110         19, 20, 21,
2111         22, 23, 24,
2112         25, 26, 27,
2113         28, 29, 30,
2114         31, 32, 33,
2115         34, 35, 36
2116     };
2117 
2118     // Quatized input2 tensor.
2119     const float inputScale2 = 0.2f;
2120     const int32_t inputOffset2 = 10;
2121 
2122     std::vector<T> input2 =
2123     {
2124         37, 38, 39,
2125         40, 41, 42,
2126         43, 44, 45,
2127         46, 47, 48,
2128         49, 50, 51,
2129         52, 53, 54
2130     };
2131 
2132     // Quantized output tensor.
2133     const float outputScale = 0.1f;
2134     const int32_t outputOffset = 20;
2135 
2136     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
2137 
2138     std::vector<T> expectedOutput =
2139     {
2140         0,   5,  74,
2141         10,  15,  76,
2142         20,  25,  78,
2143         30,  35,  80,
2144         40,  45,  82,
2145         50,  55,  84,
2146 
2147         60,  65,  86,
2148         70,  75,  88,
2149         80,  85,  90,
2150         90,  95,  92,
2151         100, 105,  94,
2152         110, 115,  96,
2153 
2154         120, 125,  98,
2155         130, 135, 100,
2156         140, 145, 102,
2157         150, 155, 104,
2158         160, 165, 106,
2159         170, 175, 108
2160     };
2161 
2162     outputTensorInfo.SetQuantizationScale(outputScale);
2163     outputTensorInfo.SetQuantizationOffset(outputOffset);
2164     inputTensorInfo1.SetQuantizationScale(inputScale1);
2165     inputTensorInfo1.SetQuantizationOffset(inputOffset1);
2166     inputTensorInfo2.SetQuantizationScale(inputScale2);
2167     inputTensorInfo2.SetQuantizationOffset(inputOffset2);
2168 
2169     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
2170     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2171 
2172     std::vector<unsigned int> wOrigin2 = { 0, 0, 2 }; //Extent of the window is defined by size of input[1].
2173     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2174 
2175     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2176 
2177     bool subTensorsSupported = useSubtensor && workloadFactory.SupportsSubTensors();
2178 
2179     std::unique_ptr<ITensorHandle> inputHandle1 =
2180             subTensorsSupported ?
2181             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2182             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
2183 
2184     std::unique_ptr<ITensorHandle> inputHandle2 =
2185             subTensorsSupported ?
2186             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2187             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
2188 
2189     ConcatQueueDescriptor data;
2190     OriginsDescriptor desc = CreateDescriptorForConcatenation(
2191             inputTensorShapes.begin(),inputTensorShapes.end(), 2);
2192     data.m_Parameters = desc;
2193 
2194     WorkloadInfo info;
2195     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2196     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2197     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2198 
2199     data.m_ViewOrigins.push_back(window1);
2200     data.m_ViewOrigins.push_back(window2);
2201 
2202     std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info);
2203 
2204     inputHandle1->Allocate();
2205     inputHandle2->Allocate();
2206     outputHandle->Allocate();
2207 
2208     CopyDataToITensorHandle(inputHandle1.get(), input1.data());
2209     CopyDataToITensorHandle(inputHandle2.get(), input2.data());
2210 
2211     workload->PostAllocationConfigure();
2212     workload->Execute();
2213 
2214     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2215 
2216     return LayerTestResult<T, 3>(actualOutput,
2217                                  expectedOutput,
2218                                  outputHandle->GetShape(),
2219                                  outputTensorInfo.GetShape());
2220 }
2221 
2222 //
2223 // Explicit template specializations
2224 //
2225 
2226 template LayerTestResult<ResolveType<DataType::QAsymmU8>, 3>
2227 ConcatDifferentInputOutputQParamTest<DataType::QAsymmU8>(
2228     IWorkloadFactory& workloadFactory,
2229     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2230     const armnn::ITensorHandleFactory& tensorHandleFactory,
2231     bool useSubtensor);
2232 
2233 template LayerTestResult<ResolveType<DataType::QSymmS16>, 3>
2234 ConcatDifferentInputOutputQParamTest<DataType::QSymmS16>(
2235     IWorkloadFactory& workloadFactory,
2236     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2237     const armnn::ITensorHandleFactory& tensorHandleFactory,
2238     bool useSubtensor);
2239 
2240 //
2241 // Implementation functions
2242 //
2243 
ConcatTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2244 LayerTestResult<float,3> ConcatTest(
2245     IWorkloadFactory& workloadFactory,
2246     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2247     const armnn::ITensorHandleFactory& tensorHandleFactory)
2248 {
2249     return ConcatTestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory);
2250 }
2251 
ConcatInt32Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2252 LayerTestResult<int32_t, 3> ConcatInt32Test(
2253         IWorkloadFactory& workloadFactory,
2254         const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2255         const armnn::ITensorHandleFactory& tensorHandleFactory)
2256 {
2257     return ConcatTestImpl<DataType::Signed32>(workloadFactory, memoryManager, tensorHandleFactory);
2258 }
2259 
Concat1dTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2260 LayerTestResult<float, 1> Concat1dTest(
2261     IWorkloadFactory& workloadFactory,
2262     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2263     const armnn::ITensorHandleFactory& tensorHandleFactory)
2264 {
2265     return Concat1dTestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2266 }
2267 
Concat2dDim0Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2268 LayerTestResult<float, 2> Concat2dDim0Test(
2269     IWorkloadFactory& workloadFactory,
2270     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2271     const armnn::ITensorHandleFactory& tensorHandleFactory)
2272 {
2273     return Concat2dDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2274 }
2275 
Concat2dDim1Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2276 LayerTestResult<float, 2> Concat2dDim1Test(
2277     IWorkloadFactory& workloadFactory,
2278     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2279     const armnn::ITensorHandleFactory& tensorHandleFactory)
2280 {
2281     return Concat2dDim1TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2282 }
2283 
Concat2dDim0DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2284 LayerTestResult<float, 2> Concat2dDim0DiffInputDimsTest(
2285     IWorkloadFactory& workloadFactory,
2286     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2287     const armnn::ITensorHandleFactory& tensorHandleFactory)
2288 {
2289     return Concat2dDim0DiffInputDimsTestImpl<DataType::Float32>(workloadFactory, memoryManager,
2290                                                                 tensorHandleFactory, 0.0f, 0);
2291 }
2292 
Concat2dDim1DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2293 LayerTestResult<float, 2> Concat2dDim1DiffInputDimsTest(
2294     IWorkloadFactory& workloadFactory,
2295     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2296     const armnn::ITensorHandleFactory& tensorHandleFactory)
2297 {
2298     return Concat2dDim1DiffInputDimsTestImpl<DataType::Float32>(workloadFactory,
2299                                                                 memoryManager,
2300                                                                 tensorHandleFactory,
2301                                                                 0.0f,
2302                                                                 0);
2303 }
2304 
Concat3dDim0Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2305 LayerTestResult<float, 3> Concat3dDim0Test(
2306     IWorkloadFactory& workloadFactory,
2307     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2308     const armnn::ITensorHandleFactory& tensorHandleFactory)
2309 {
2310     return Concat3dDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2311 }
2312 
Concat3dDim1Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2313 LayerTestResult<float, 3> Concat3dDim1Test(
2314     IWorkloadFactory& workloadFactory,
2315     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2316     const armnn::ITensorHandleFactory& tensorHandleFactory)
2317 {
2318     return Concat3dDim1TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2319 }
2320 
Concat3dDim2Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2321 LayerTestResult<float, 3> Concat3dDim2Test(
2322     IWorkloadFactory& workloadFactory,
2323     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2324     const armnn::ITensorHandleFactory& tensorHandleFactory,
2325     bool useSubtensor)
2326 {
2327     return Concat3dDim2TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory,
2328                                                    useSubtensor, 0.0f, 0);
2329 }
2330 
Concat3dDim0DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2331 LayerTestResult<float, 3> Concat3dDim0DiffInputDimsTest(
2332     IWorkloadFactory& workloadFactory,
2333     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2334     const armnn::ITensorHandleFactory& tensorHandleFactory)
2335 {
2336     return Concat3dDim0DiffInputDimsTestImpl<DataType::Float32>(
2337         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2338 }
2339 
Concat3dDim1DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2340 LayerTestResult<float, 3> Concat3dDim1DiffInputDimsTest(
2341     IWorkloadFactory& workloadFactory,
2342     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2343     const armnn::ITensorHandleFactory& tensorHandleFactory)
2344 {
2345     return Concat3dDim1DiffInputDimsTestImpl<DataType::Float32>(workloadFactory, memoryManager,
2346                                                                 tensorHandleFactory, 0.0f, 0);
2347 }
2348 
Concat3dDim2DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2349 LayerTestResult<float, 3> Concat3dDim2DiffInputDimsTest(
2350     IWorkloadFactory& workloadFactory,
2351     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2352     const armnn::ITensorHandleFactory& tensorHandleFactory,
2353     bool useSubtensor)
2354 {
2355     return Concat3dDim2DiffInputDimsTestImpl<DataType::Float32>(
2356         workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.0f, 0);
2357 }
2358 
Concat4dDim0Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2359 LayerTestResult<float, 4> Concat4dDim0Test(
2360     IWorkloadFactory& workloadFactory,
2361     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2362     const armnn::ITensorHandleFactory& tensorHandleFactory)
2363 {
2364     return Concat4dDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2365 }
2366 
Concat4dDim1Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2367 LayerTestResult<float, 4> Concat4dDim1Test(
2368     IWorkloadFactory& workloadFactory,
2369     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2370     const armnn::ITensorHandleFactory& tensorHandleFactory)
2371 {
2372     return Concat4dDim1TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2373 }
2374 
Concat4dDim2Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2375 LayerTestResult<float, 4> Concat4dDim2Test(
2376     IWorkloadFactory& workloadFactory,
2377     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2378     const armnn::ITensorHandleFactory& tensorHandleFactory)
2379 {
2380     return Concat4dDim2TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2381 }
2382 
Concat4dDim3Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2383 LayerTestResult<float, 4> Concat4dDim3Test(
2384     IWorkloadFactory& workloadFactory,
2385     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2386     const armnn::ITensorHandleFactory& tensorHandleFactory,
2387     bool useSubtensor)
2388 {
2389     return Concat4dDim3TestImpl<DataType::Float32>(workloadFactory, memoryManager,
2390                                                    tensorHandleFactory, 0.0f, 0, useSubtensor);
2391 }
2392 
Concat4dDiffShapeDim0Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2393 LayerTestResult<float, 4> Concat4dDiffShapeDim0Test(
2394     IWorkloadFactory& workloadFactory,
2395     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2396     const armnn::ITensorHandleFactory& tensorHandleFactory)
2397 {
2398     return Concat4dDiffShapeDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager,
2399                                                             tensorHandleFactory, 0.0f, 0);
2400 }
2401 
Concat4dDiffShapeDim1Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2402 LayerTestResult<float, 4> Concat4dDiffShapeDim1Test(
2403     IWorkloadFactory& workloadFactory,
2404     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2405     const armnn::ITensorHandleFactory& tensorHandleFactory)
2406 {
2407     return Concat4dDiffShapeDim1TestImpl<DataType::Float32>(
2408         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2409 }
2410 
Concat4dDiffShapeDim2Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2411 LayerTestResult<float, 4> Concat4dDiffShapeDim2Test(
2412     IWorkloadFactory& workloadFactory,
2413     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2414     const armnn::ITensorHandleFactory& tensorHandleFactory)
2415 {
2416     return Concat4dDiffShapeDim2TestImpl<DataType::Float32>(workloadFactory, memoryManager,
2417                                                             tensorHandleFactory, 0.0f, 0);
2418 }
2419 
Concat4dDiffShapeDim3Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2420 LayerTestResult<float, 4> Concat4dDiffShapeDim3Test(
2421     IWorkloadFactory& workloadFactory,
2422     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2423     const armnn::ITensorHandleFactory& tensorHandleFactory,
2424     bool useSubtensor)
2425 {
2426     return Concat4dDiffShapeDim3TestImpl<DataType::Float32>(
2427         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, useSubtensor);
2428 }
2429 
ConcatFloat16Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2430 LayerTestResult<Half, 3> ConcatFloat16Test(
2431     IWorkloadFactory& workloadFactory,
2432     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2433     const armnn::ITensorHandleFactory& tensorHandleFactory)
2434 {
2435     return Concat3dDim1TestImpl<DataType::Float16>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2436 }
2437 
ConcatBFloat16Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2438 LayerTestResult<BFloat16, 3> ConcatBFloat16Test(
2439     IWorkloadFactory& workloadFactory,
2440     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2441     const armnn::ITensorHandleFactory& tensorHandleFactory)
2442 {
2443     return Concat3dDim1TestImpl<DataType::BFloat16>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2444 }
2445 
ConcatUint8DifferentQParamsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2446 LayerTestResult<uint8_t, 3> ConcatUint8DifferentQParamsTest(
2447     IWorkloadFactory& workloadFactory,
2448     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2449     const armnn::ITensorHandleFactory& tensorHandleFactory)
2450 {
2451     IgnoreUnused(memoryManager);
2452 
2453     unsigned int outputWidth = 3;
2454     unsigned int outputHeight = 6;
2455     unsigned int outputChannels = 3;
2456 
2457     unsigned int inputWidth1 = 3;
2458     unsigned int inputHeight1 = 6;
2459     unsigned int inputChannels1 = 2;
2460 
2461     unsigned int inputWidth2 = 3;
2462     unsigned int inputHeight2 = 6;
2463     unsigned int inputChannels2 = 1;
2464 
2465     // Defines the tensor descriptors.
2466     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QAsymmU8);
2467     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QAsymmU8);
2468     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QAsymmU8);
2469 
2470     // Quantized input1 tensor. Range [-3, 1]
2471     const float inputScale1 = 0.015686f;
2472     const int32_t inputOffset1 = 192;
2473 
2474     std::vector<uint8_t> input1 =
2475     {
2476         1, 2, 3,
2477         4, 5, 6,
2478         7, 8, 9,
2479         10, 11, 12,
2480         13, 14, 15,
2481         16, 17, 18,
2482 
2483         19, 20, 21,
2484         22, 23, 24,
2485         25, 26, 27,
2486         28, 29, 30,
2487         31, 32, 33,
2488         34, 35, 36
2489     };
2490 
2491     // Quatized input2 tensor. Range [-1, 4]
2492     const float inputScale2 = 0.019608f;
2493     const int32_t inputOffset2 = 50;
2494 
2495     std::vector<uint8_t> input2 =
2496     {
2497         37, 38, 39,
2498         40, 41, 42,
2499         43, 44, 45,
2500         46, 47, 48,
2501         49, 50, 51,
2502         52, 53, 54
2503     };
2504 
2505     // Output has the same quantization parameters than input1,
2506     // so that only the requantization of input2 is required
2507     const float outputScale = 0.015686f;
2508     const int32_t outputOffset = 192;
2509 
2510     std::vector<uint8_t> actualOutput(outputTensorInfo.GetNumElements());
2511 
2512     std::vector<uint8_t> expectedOutput =
2513     {
2514         1, 2, 3,
2515         4, 5, 6,
2516         7, 8, 9,
2517         10, 11, 12,
2518         13, 14, 15,
2519         16, 17, 18,
2520 
2521         19, 20, 21,
2522         22, 23, 24,
2523         25, 26, 27,
2524         28, 29, 30,
2525         31, 32, 33,
2526         34, 35, 36,
2527 
2528         176, 177, 178,
2529         179, 181, 182,
2530         183, 184, 186,
2531         187, 188, 189,
2532         191, 192, 193,
2533         195, 196, 197
2534     };
2535 
2536     outputTensorInfo.SetQuantizationScale(outputScale);
2537     outputTensorInfo.SetQuantizationOffset(outputOffset);
2538     inputTensorInfo1.SetQuantizationScale(inputScale1);
2539     inputTensorInfo1.SetQuantizationOffset(inputOffset1);
2540     inputTensorInfo2.SetQuantizationScale(inputScale2);
2541     inputTensorInfo2.SetQuantizationOffset(inputOffset2);
2542 
2543     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
2544     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2545 
2546     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
2547     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2548 
2549     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2550 
2551     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
2552 
2553     std::unique_ptr<ITensorHandle> inputHandle1 =
2554             subTensorsSupported ?
2555             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2556             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
2557 
2558     std::unique_ptr<ITensorHandle> inputHandle2 =
2559             subTensorsSupported ?
2560             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2561             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
2562 
2563     ConcatQueueDescriptor data;
2564     WorkloadInfo info;
2565     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2566     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2567     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2568 
2569     data.m_ViewOrigins.push_back(window1);
2570     data.m_ViewOrigins.push_back(window2);
2571 
2572     std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info);
2573 
2574     inputHandle1->Allocate();
2575     inputHandle2->Allocate();
2576     outputHandle->Allocate();
2577 
2578     CopyDataToITensorHandle(inputHandle1.get(), input1.data());
2579     CopyDataToITensorHandle(inputHandle2.get(), input2.data());
2580 
2581     workload->PostAllocationConfigure();
2582     workload->Execute();
2583 
2584     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2585 
2586     return LayerTestResult<uint8_t, 3>(actualOutput,
2587                                        expectedOutput,
2588                                        outputHandle->GetShape(),
2589                                        outputTensorInfo.GetShape());
2590 }
2591 
ConcatUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2592 LayerTestResult<uint8_t, 3> ConcatUint8Test(
2593     IWorkloadFactory& workloadFactory,
2594     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2595     const armnn::ITensorHandleFactory& tensorHandleFactory)
2596 {
2597     IgnoreUnused(memoryManager);
2598 
2599     unsigned int outputWidth = 3;
2600     unsigned int outputHeight = 6;
2601     unsigned int outputChannels = 3;
2602 
2603     unsigned int inputWidth1 = 3;
2604     unsigned int inputHeight1 = 6;
2605     unsigned int inputChannels1 = 2;
2606 
2607     unsigned int inputWidth2 = 3;
2608     unsigned int inputHeight2 = 6;
2609     unsigned int inputChannels2 = 1;
2610 
2611     // Defines the tensor descriptors.
2612     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QAsymmU8);
2613     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QAsymmU8);
2614     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QAsymmU8);
2615 
2616     // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
2617     const float scale = 0.13497836f;
2618     const int32_t offset = -7;
2619 
2620     outputTensorInfo.SetQuantizationScale(scale);
2621     outputTensorInfo.SetQuantizationOffset(offset);
2622     inputTensorInfo1.SetQuantizationScale(scale);
2623     inputTensorInfo1.SetQuantizationOffset(offset);
2624     inputTensorInfo2.SetQuantizationScale(scale);
2625     inputTensorInfo2.SetQuantizationOffset(offset);
2626 
2627     std::vector<uint8_t> actualOutput(outputTensorInfo.GetNumElements());
2628 
2629     std::vector<uint8_t> expectedOutput =
2630     {
2631         1, 2, 3,
2632         4, 5, 6,
2633         7, 8, 9,
2634         10, 11, 12,
2635         13, 14, 15,
2636         16, 17, 18,
2637 
2638         19, 20, 21,
2639         22, 23, 24,
2640         25, 26, 27,
2641         28, 29, 30,
2642         31, 32, 33,
2643         34, 35, 36,
2644 
2645         37, 38, 39,
2646         40, 41, 42,
2647         43, 44, 45,
2648         46, 47, 48,
2649         49, 50, 51,
2650         52, 53, 54
2651     };
2652 
2653     std::vector<uint8_t> input1 =
2654     {
2655         1, 2, 3,
2656         4, 5, 6,
2657         7, 8, 9,
2658         10, 11, 12,
2659         13, 14, 15,
2660         16, 17, 18,
2661 
2662         19, 20, 21,
2663         22, 23, 24,
2664         25, 26, 27,
2665         28, 29, 30,
2666         31, 32, 33,
2667         34, 35, 36
2668     };
2669 
2670     std::vector<uint8_t> input2 =
2671     {
2672         37, 38, 39,
2673         40, 41, 42,
2674         43, 44, 45,
2675         46, 47, 48,
2676         49, 50, 51,
2677         52, 53, 54
2678     };
2679 
2680     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
2681     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2682 
2683     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
2684     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2685 
2686     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2687 
2688     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
2689 
2690     std::unique_ptr<ITensorHandle> inputHandle1 =
2691         subTensorsSupported ?
2692             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2693             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
2694 
2695     std::unique_ptr<ITensorHandle> inputHandle2 =
2696         subTensorsSupported ?
2697             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2698             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
2699 
2700 
2701     ConcatQueueDescriptor data;
2702     WorkloadInfo info;
2703     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2704     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2705     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2706 
2707     data.m_ViewOrigins.push_back(window1);
2708     data.m_ViewOrigins.push_back(window2);
2709 
2710     std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info);
2711 
2712     inputHandle1->Allocate();
2713     inputHandle2->Allocate();
2714     outputHandle->Allocate();
2715 
2716     CopyDataToITensorHandle(inputHandle1.get(), input1.data());
2717     CopyDataToITensorHandle(inputHandle2.get(), input2.data());
2718 
2719     workload->PostAllocationConfigure();
2720     workload->Execute();
2721 
2722     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2723 
2724     return LayerTestResult<uint8_t, 3>(actualOutput,
2725                                        expectedOutput,
2726                                        outputHandle->GetShape(),
2727                                        outputTensorInfo.GetShape());
2728 }
2729 
ConcatUint16Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2730 LayerTestResult<uint16_t, 3> ConcatUint16Test(
2731         IWorkloadFactory& workloadFactory,
2732         const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2733         const armnn::ITensorHandleFactory& tensorHandleFactory)
2734 {
2735     IgnoreUnused(memoryManager);
2736 
2737     unsigned int outputWidth = 3;
2738     unsigned int outputHeight = 6;
2739     unsigned int outputChannels = 3;
2740 
2741     unsigned int inputWidth1 = 3;
2742     unsigned int inputHeight1 = 6;
2743     unsigned int inputChannels1 = 2;
2744 
2745     unsigned int inputWidth2 = 3;
2746     unsigned int inputHeight2 = 6;
2747     unsigned int inputChannels2 = 1;
2748 
2749     // Defines the tensor descriptors.
2750     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QSymmS16);
2751     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QSymmS16);
2752     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QSymmS16);
2753 
2754     // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
2755     const float scale = 0.13497836f;
2756     const int32_t offset = -7;
2757 
2758     outputTensorInfo.SetQuantizationScale(scale);
2759     outputTensorInfo.SetQuantizationOffset(offset);
2760     inputTensorInfo1.SetQuantizationScale(scale);
2761     inputTensorInfo1.SetQuantizationOffset(offset);
2762     inputTensorInfo2.SetQuantizationScale(scale);
2763     inputTensorInfo2.SetQuantizationOffset(offset);
2764 
2765     std::vector<uint16_t> actualOutput(outputTensorInfo.GetNumElements());
2766 
2767     std::vector<uint16_t> expectedOutput =
2768     {
2769         1, 2, 3,
2770         4, 5, 6,
2771         7, 8, 9,
2772         10, 11, 12,
2773         13, 14, 15,
2774         16, 17, 18,
2775 
2776         19, 20, 21,
2777         22, 23, 24,
2778         25, 26, 27,
2779         28, 29, 30,
2780         31, 32, 33,
2781         34, 35, 36,
2782 
2783         37, 38, 39,
2784         40, 41, 42,
2785         43, 44, 45,
2786         46, 47, 48,
2787         49, 50, 51,
2788         52, 53, 54
2789     };
2790 
2791     std::vector<uint16_t> input1 =
2792     {
2793         1, 2, 3,
2794         4, 5, 6,
2795         7, 8, 9,
2796         10, 11, 12,
2797         13, 14, 15,
2798         16, 17, 18,
2799 
2800         19, 20, 21,
2801         22, 23, 24,
2802         25, 26, 27,
2803         28, 29, 30,
2804         31, 32, 33,
2805         34, 35, 36,
2806     };
2807 
2808     std::vector<uint16_t> input2 =
2809     {
2810         37, 38, 39,
2811         40, 41, 42,
2812         43, 44, 45,
2813         46, 47, 48,
2814         49, 50, 51,
2815         52, 53, 54,
2816     };
2817 
2818     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
2819     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2820 
2821     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
2822     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2823 
2824 
2825     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2826 
2827     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
2828 
2829     std::unique_ptr<ITensorHandle> inputHandle1 =
2830             subTensorsSupported ?
2831             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2832             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
2833 
2834     std::unique_ptr<ITensorHandle> inputHandle2 =
2835             subTensorsSupported ?
2836             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2837             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
2838 
2839 
2840     ConcatQueueDescriptor data;
2841     WorkloadInfo info;
2842     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2843     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2844     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2845 
2846     data.m_ViewOrigins.push_back(window1);
2847     data.m_ViewOrigins.push_back(window2);
2848 
2849     std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(LayerType::Concat, data, info);
2850 
2851     inputHandle1->Allocate();
2852     inputHandle2->Allocate();
2853     outputHandle->Allocate();
2854 
2855     CopyDataToITensorHandle(inputHandle1.get(), input1.data());
2856     CopyDataToITensorHandle(inputHandle2.get(), input2.data());
2857 
2858     workload->PostAllocationConfigure();
2859     workload->Execute();
2860 
2861     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2862 
2863     return LayerTestResult<uint16_t, 3>(actualOutput,
2864                                        expectedOutput,
2865                                        outputHandle->GetShape(),
2866                                        outputTensorInfo.GetShape());
2867 }
2868 
Concat1dUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2869 LayerTestResult<uint8_t, 1> Concat1dUint8Test(
2870     IWorkloadFactory& workloadFactory,
2871     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2872     const armnn::ITensorHandleFactory& tensorHandleFactory)
2873 {
2874     return Concat1dTestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2875 }
2876 
Concat2dDim0Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2877 LayerTestResult<uint8_t, 2> Concat2dDim0Uint8Test(
2878     IWorkloadFactory& workloadFactory,
2879     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2880     const armnn::ITensorHandleFactory& tensorHandleFactory)
2881 {
2882     return Concat2dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2883 }
2884 
Concat2dDim1Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2885 LayerTestResult<uint8_t, 2> Concat2dDim1Uint8Test(
2886     IWorkloadFactory& workloadFactory,
2887     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2888     const armnn::ITensorHandleFactory& tensorHandleFactory)
2889 {
2890     return Concat2dDim1TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2891 }
2892 
Concat2dDim0DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2893 LayerTestResult<uint8_t, 2> Concat2dDim0DiffInputDimsUint8Test(
2894     IWorkloadFactory& workloadFactory,
2895     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2896     const armnn::ITensorHandleFactory& tensorHandleFactory)
2897 {
2898     return Concat2dDim0DiffInputDimsTestImpl<DataType::QAsymmU8>(
2899         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2900 }
2901 
Concat2dDim1DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2902 LayerTestResult<uint8_t, 2> Concat2dDim1DiffInputDimsUint8Test(
2903     IWorkloadFactory& workloadFactory,
2904     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2905     const armnn::ITensorHandleFactory& tensorHandleFactory)
2906 {
2907     return Concat2dDim1DiffInputDimsTestImpl<DataType::QAsymmU8>(
2908         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2909 }
2910 
Concat3dDim0Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2911 LayerTestResult<uint8_t, 3> Concat3dDim0Uint8Test(
2912     IWorkloadFactory& workloadFactory,
2913     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2914     const armnn::ITensorHandleFactory& tensorHandleFactory)
2915 {
2916     return Concat3dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2917 }
2918 
Concat3dDim1Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2919 LayerTestResult<uint8_t, 3> Concat3dDim1Uint8Test(
2920     IWorkloadFactory& workloadFactory,
2921     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2922     const armnn::ITensorHandleFactory& tensorHandleFactory)
2923 {
2924     return Concat3dDim1TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2925 }
2926 
Concat3dDim2Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2927 LayerTestResult<uint8_t, 3> Concat3dDim2Uint8Test(
2928     IWorkloadFactory& workloadFactory,
2929     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2930     const armnn::ITensorHandleFactory& tensorHandleFactory,
2931     bool useSubtensor)
2932 {
2933     return Concat3dDim2TestImpl<DataType::QAsymmU8>(
2934         workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.5f, -1);
2935 }
2936 
Concat3dDim0DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2937 LayerTestResult<uint8_t, 3> Concat3dDim0DiffInputDimsUint8Test(
2938     IWorkloadFactory& workloadFactory,
2939     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2940     const armnn::ITensorHandleFactory& tensorHandleFactory)
2941 {
2942     return Concat3dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2943 }
2944 
Concat3dDim1DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2945 LayerTestResult<uint8_t, 3> Concat3dDim1DiffInputDimsUint8Test(
2946     IWorkloadFactory& workloadFactory,
2947     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2948     const armnn::ITensorHandleFactory& tensorHandleFactory)
2949 {
2950     return Concat3dDim1DiffInputDimsTestImpl<DataType::QAsymmU8>(
2951         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2952 }
2953 
Concat3dDim2DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2954 LayerTestResult<uint8_t, 3> Concat3dDim2DiffInputDimsUint8Test(
2955     IWorkloadFactory& workloadFactory,
2956     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2957     const armnn::ITensorHandleFactory& tensorHandleFactory,
2958     bool useSubtensor)
2959 {
2960     return Concat3dDim2DiffInputDimsTestImpl<DataType::QAsymmU8>(
2961         workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.5f, -1);
2962 }
2963 
Concat4dDim0Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2964 LayerTestResult<uint8_t, 4> Concat4dDim0Uint8Test(
2965     IWorkloadFactory& workloadFactory,
2966     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2967     const armnn::ITensorHandleFactory& tensorHandleFactory)
2968 {
2969     return Concat4dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2970 }
2971 
Concat4dDim1Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2972 LayerTestResult<uint8_t, 4> Concat4dDim1Uint8Test(
2973     IWorkloadFactory& workloadFactory,
2974     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2975     const armnn::ITensorHandleFactory& tensorHandleFactory)
2976 {
2977     return Concat4dDim1TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2978 }
2979 
Concat4dDim2Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2980 LayerTestResult<uint8_t, 4> Concat4dDim2Uint8Test(
2981     IWorkloadFactory& workloadFactory,
2982     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2983     const armnn::ITensorHandleFactory& tensorHandleFactory)
2984 {
2985     return Concat4dDim2TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2986 }
2987 
Concat4dDim3Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2988 LayerTestResult<uint8_t, 4> Concat4dDim3Uint8Test(
2989     IWorkloadFactory& workloadFactory,
2990     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2991     const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor)
2992 {
2993     return Concat4dDim3TestImpl<DataType::QAsymmU8>(
2994         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1, useSubtensor);
2995 }
2996 
Concat4dDiffShapeDim0Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2997 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim0Uint8Test(
2998     IWorkloadFactory& workloadFactory,
2999     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3000     const armnn::ITensorHandleFactory& tensorHandleFactory)
3001 {
3002     return Concat4dDiffShapeDim0TestImpl<DataType::QAsymmU8>(
3003         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
3004 }
3005 
Concat4dDiffShapeDim1Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)3006 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim1Uint8Test(
3007     IWorkloadFactory& workloadFactory,
3008     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3009     const armnn::ITensorHandleFactory& tensorHandleFactory)
3010 {
3011     return Concat4dDiffShapeDim1TestImpl<DataType::QAsymmU8>(
3012         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
3013 }
3014 
Concat4dDiffShapeDim2Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)3015 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim2Uint8Test(
3016     IWorkloadFactory& workloadFactory,
3017     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3018     const armnn::ITensorHandleFactory& tensorHandleFactory)
3019 {
3020     return Concat4dDiffShapeDim2TestImpl<DataType::QAsymmU8>(
3021         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
3022 }
3023 
Concat4dDiffShapeDim3Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)3024 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim3Uint8Test(
3025     IWorkloadFactory& workloadFactory,
3026     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3027     const armnn::ITensorHandleFactory& tensorHandleFactory,
3028     bool useSubtensor)
3029 {
3030     return Concat4dDiffShapeDim3TestImpl<DataType::QAsymmU8>(
3031         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1, useSubtensor);
3032 }
3033