xref: /aosp_15_r20/external/armnn/src/backends/neon/NeonWorkloadFactory.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackendId.hpp"
7 #include "NeonBackendModelContext.hpp"
8 #include "NeonTensorHandle.hpp"
9 #include "NeonWorkloadFactory.hpp"
10 
11 #include <Layer.hpp>
12 
13 #include <armnn/Utils.hpp>
14 #include <armnn/utility/IgnoreUnused.hpp>
15 #include <armnn/utility/NumericCast.hpp>
16 #include <armnn/utility/PolymorphicDowncast.hpp>
17 
18 #include <backendsCommon/MakeWorkloadHelper.hpp>
19 #include <armnn/backends/MemCopyWorkload.hpp>
20 #include <backendsCommon/MemImportWorkload.hpp>
21 #include <armnn/backends/TensorHandle.hpp>
22 
23 #include <neon/workloads/NeonWorkloadUtils.hpp>
24 #include <neon/workloads/NeonWorkloads.hpp>
25 
26 namespace armnn
27 {
28 
29 namespace
30 {
31 static const BackendId s_Id{NeonBackendId()};
32 }
33 
IsLayerSupported(const Layer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported)34 bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer,
35                                            Optional<DataType> dataType,
36                                            std::string& outReasonIfUnsupported)
37 {
38     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
39 }
40 
IsLayerSupported(const IConnectableLayer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported,const ModelOptions & modelOptions)41 bool NeonWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
42                                            Optional<DataType> dataType,
43                                            std::string& outReasonIfUnsupported,
44                                            const ModelOptions& modelOptions)
45 {
46     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
47 }
48 
GetBackendId() const49 const BackendId& NeonWorkloadFactory::GetBackendId() const
50 {
51     return s_Id;
52 }
53 
SetNumberOfThreads()54 void NeonWorkloadFactory::SetNumberOfThreads()
55 {
56     if (m_ModelContextPtr)
57     {
58         const unsigned int MIN_THREADS = 1;
59         const unsigned int MAX_THREADS = 64;
60 
61         // Set the number of threads to be used if the user has set NumberOfThreads param
62         // Only set if within limit or valid input
63         auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
64         auto numberOfThreads = modelOptions->GetNumberOfThreads();
65 
66         if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS)
67         {
68             arm_compute::Scheduler::get().set_num_threads(numberOfThreads);
69         }
70     }
71 }
72 
NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager> & memoryManager)73 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
74     : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
75 {
76     SetNumberOfThreads();
77 }
78 
NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager> & memoryManager,const IBackendInternal::IBackendSpecificModelContextPtr & modelContextPtr)79 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
80                                          const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
81     : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
82 {
83     SetNumberOfThreads();
84 }
85 
CreateSubTensorHandle(ITensorHandle & parent,TensorShape const & subTensorShape,unsigned int const * subTensorOrigin) const86 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
87     TensorShape const& subTensorShape,
88     unsigned int const* subTensorOrigin) const
89 {
90     const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
91 
92     arm_compute::Coordinates coords;
93     coords.set_num_dimensions(subTensorShape.GetNumDimensions());
94     for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
95     {
96         // Arm compute indexes tensor coords in reverse order.
97         unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
98         coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
99     }
100 
101     const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
102     if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
103     {
104         return nullptr;
105     }
106 
107     return std::make_unique<NeonSubTensorHandle>(
108         PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
109 }
110 
CreateTensorHandle(const TensorInfo & tensorInfo,const bool IsMemoryManaged) const111 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
112                                                                        const bool IsMemoryManaged) const
113 {
114     auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
115     if (IsMemoryManaged)
116     {
117         tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
118     }
119     return tensorHandle;
120 }
121 
CreateTensorHandle(const TensorInfo & tensorInfo,DataLayout dataLayout,const bool IsMemoryManaged) const122 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
123                                                                        DataLayout dataLayout,
124                                                                        const bool IsMemoryManaged) const
125 {
126     auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
127     if (IsMemoryManaged)
128     {
129         tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
130     }
131     return tensorHandle;
132 }
133 
CreateWorkload(LayerType type,const QueueDescriptor & descriptor,const WorkloadInfo & info) const134 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateWorkload(LayerType type,
135                                                                const QueueDescriptor& descriptor,
136                                                                const WorkloadInfo& info) const
137 {
138     switch(type)
139     {
140         case LayerType::Activation :
141         {
142             auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
143             return std::make_unique<NeonActivationWorkload>(*activationQueueDescriptor, info);
144         }
145         case LayerType::Addition :
146         {
147             auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
148             return std::make_unique<NeonAdditionWorkload>(*additionQueueDescriptor, info);
149         }
150         case LayerType::ArgMinMax :
151         {
152             auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
153             return std::make_unique<NeonArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info);
154         }
155         case LayerType::BatchMatMul :
156         {
157             auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
158             return std::make_unique<NeonBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info);
159         }
160         case LayerType::BatchNormalization :
161         {
162             auto batchNormalizationQueueDescriptor
163                     = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
164             return std::make_unique<NeonBatchNormalizationWorkload>(*batchNormalizationQueueDescriptor, info);
165         }
166         case LayerType::BatchToSpaceNd :
167         {
168             auto batchToSpaceNdQueueDescriptor
169                     = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
170             return std::make_unique<NeonBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info);
171         }
172         case LayerType::Cast :
173         {
174             auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
175             return std::make_unique<NeonCastWorkload>(*castQueueDescriptor, info);
176         }
177         case LayerType::ChannelShuffle :
178         {
179             auto channelShuffleQueueDescriptor = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
180             return std::make_unique<NeonChannelShuffleWorkload>(*channelShuffleQueueDescriptor, info);
181         }
182         case LayerType::Comparison :
183         {
184             auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
185             return std::make_unique<NeonComparisonWorkload>(*comparisonQueueDescriptor, info);
186         }
187         case LayerType::Concat :
188         {
189             auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
190             return std::make_unique<NeonConcatWorkload>(*concatQueueDescriptor, info);
191         }
192         case LayerType::Constant :
193         {
194             auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
195             return std::make_unique<NeonConstantWorkload>(*constantQueueDescriptor, info);
196         }
197         case LayerType::ConvertFp16ToFp32 :
198         {
199             auto convertFp16ToFp32QueueDescriptor
200                     = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
201             return std::make_unique<NeonConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor, info);
202         }
203         case LayerType::ConvertFp32ToFp16 :
204         {
205             auto convertFp32ToFp16QueueDescriptor
206                     = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
207             return std::make_unique<NeonConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor, info);
208         }
209         case LayerType::Convolution2d :
210         {
211             auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
212 
213             bool isFastMathEnabled = false;
214             if (m_ModelContextPtr)
215             {
216                 if (m_ModelContextPtr.get() != nullptr)
217                 {
218                     auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
219                     if (modelOptions)
220                     {
221                         isFastMathEnabled = modelOptions->IsFastMathEnabled();
222                     }
223                 }
224             }
225             return std::make_unique<NeonConvolution2dWorkload>(*convolution2dQueueDescriptor,
226                                                                info,
227                                                                m_MemoryManager->GetIntraLayerManager(),
228                                                                isFastMathEnabled);
229         }
230         case LayerType::Convolution3d :
231         {
232             auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
233 
234             bool isFastMathEnabled = false;
235             if (m_ModelContextPtr)
236             {
237                 if (m_ModelContextPtr.get() != nullptr)
238                 {
239                     auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
240                     if (modelOptions)
241                     {
242                         isFastMathEnabled = modelOptions->IsFastMathEnabled();
243                     }
244                 }
245             }
246             return std::make_unique<NeonConvolution3dWorkload>(*convolution3dQueueDescriptor,
247                                                                info,
248                                                                m_MemoryManager->GetIntraLayerManager(),
249                                                                isFastMathEnabled);
250         }
251         case LayerType::Debug :
252         {
253             auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
254             return MakeWorkloadHelper<NullWorkload, NullWorkload>(*debugQueueDescriptor, info);
255         }
256         case LayerType::DepthToSpace :
257         {
258             auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
259             return std::make_unique<NeonDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor, info);
260         }
261         case LayerType::DepthwiseConvolution2d :
262         {
263             auto depthwiseConvolution2dQueueDescriptor
264                     = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
265             return std::make_unique<NeonDepthwiseConvolutionWorkload>(*depthwiseConvolution2dQueueDescriptor, info);
266         }
267         case LayerType::Dequantize :
268         {
269             auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
270             return std::make_unique<NeonDequantizeWorkload>(*dequantizeQueueDescriptor, info);
271         }
272         case LayerType::DetectionPostProcess :
273         {
274             auto detectionPostProcessQueueDescriptor
275                     = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
276             return MakeWorkloadHelper<NullWorkload, NullWorkload>(*detectionPostProcessQueueDescriptor, info);
277         }
278         case LayerType::Division :
279         {
280             auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
281             return std::make_unique<NeonDivisionWorkload>(*divisionQueueDescriptor, info);
282         }
283         case LayerType::ElementwiseBinary :
284         {
285             auto elementwiseBinaryQueueDescriptor
286                     = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
287 
288             switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
289             {
290                 case BinaryOperation::Add:
291                 {
292                     AdditionQueueDescriptor additionQueueDescriptor;
293                     additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
294                     additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
295                     return std::make_unique<NeonAdditionWorkload>(additionQueueDescriptor, info);
296                 }
297                 case BinaryOperation::Div:
298                 {
299                     DivisionQueueDescriptor divisionQueueDescriptor;
300                     divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
301                     divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
302                     return std::make_unique<NeonDivisionWorkload>(divisionQueueDescriptor, info);
303                 }
304                 case BinaryOperation::Maximum:
305                 {
306                     MaximumQueueDescriptor maximumQueueDescriptor;
307                     maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
308                     maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
309                     return std::make_unique<NeonMaximumWorkload>(maximumQueueDescriptor, info);
310                 }
311                 case BinaryOperation::Minimum:
312                 {
313                     MinimumQueueDescriptor minimumQueueDescriptor;
314                     minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
315                     minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
316                     return std::make_unique<NeonMinimumWorkload>(minimumQueueDescriptor, info);
317                 }
318                 case BinaryOperation::Mul:
319                 {
320                     MultiplicationQueueDescriptor multiplicationQueueDescriptor;
321                     multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
322                     multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
323                     return std::make_unique<NeonMultiplicationWorkload>(multiplicationQueueDescriptor, info);
324                 }
325                 case BinaryOperation::Sub:
326                 {
327                     SubtractionQueueDescriptor subtractionQueueDescriptor;
328                     subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
329                     subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
330                     return std::make_unique<NeonSubtractionWorkload>(subtractionQueueDescriptor, info);
331                 }
332                 default:
333                     return nullptr;
334             }
335         }
336         case LayerType::ElementwiseUnary :
337         {
338             auto elementwiseUnaryQueueDescriptor
339                     = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
340 
341             switch(elementwiseUnaryQueueDescriptor->m_Parameters.m_Operation)
342             {
343                 case UnaryOperation::Abs:
344                 {
345                     AbsQueueDescriptor absQueueDescriptor;
346                     absQueueDescriptor.m_Inputs  = elementwiseUnaryQueueDescriptor->m_Inputs;
347                     absQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
348 
349                     return std::make_unique<NeonAbsWorkload>(absQueueDescriptor, info);
350                 }
351                 case UnaryOperation::Exp:
352                     return std::make_unique<NeonExpWorkload>(*elementwiseUnaryQueueDescriptor, info);
353                 case UnaryOperation::LogicalNot:
354                     return std::make_unique<NeonLogicalNotWorkload>(*elementwiseUnaryQueueDescriptor, info);
355                 case UnaryOperation::Log:
356                     return std::make_unique<NeonLogWorkload>(*elementwiseUnaryQueueDescriptor, info);
357                 case UnaryOperation::Neg:
358                     return std::make_unique<NeonNegWorkload>(*elementwiseUnaryQueueDescriptor, info);
359                 case UnaryOperation::Rsqrt:
360                 {
361                     RsqrtQueueDescriptor rsqrtQueueDescriptor;
362                     rsqrtQueueDescriptor.m_Inputs  = elementwiseUnaryQueueDescriptor->m_Inputs;
363                     rsqrtQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
364 
365                     return std::make_unique<NeonRsqrtWorkload>(rsqrtQueueDescriptor, info);
366                 }
367                 case UnaryOperation::Sin:
368                     return std::make_unique<NeonSinWorkload>(*elementwiseUnaryQueueDescriptor, info);
369                 case UnaryOperation::Sqrt:
370                     return std::make_unique<NeonSqrtWorkload>(*elementwiseUnaryQueueDescriptor, info);
371                 default:
372                     return nullptr;
373             }
374         }
375         case LayerType::Fill :
376         {
377             auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
378             return std::make_unique<NeonFillWorkload>(*fillQueueDescriptor, info);
379         }
380         case LayerType::Floor :
381         {
382             auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
383             return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(*floorQueueDescriptor, info);
384         }
385         case LayerType::FullyConnected :
386         {
387             auto fullyConnectedQueueDescriptor = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
388             return std::make_unique<NeonFullyConnectedWorkload>(*fullyConnectedQueueDescriptor,
389                                                                 info,
390                                                                 m_MemoryManager->GetIntraLayerManager());
391         }
392         case LayerType::Gather :
393         {
394             auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
395             return std::make_unique<NeonGatherWorkload>(*gatherQueueDescriptor, info);
396         }
397         case LayerType::GatherNd :
398         {
399             auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
400             return std::make_unique<NeonGatherNdWorkload>(*gatherNdQueueDescriptor, info);
401         }
402         case LayerType::Input :
403         {
404             auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
405             return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor, info);
406         }
407         case LayerType::InstanceNormalization :
408         {
409             auto instanceNormalizationQueueDescriptor
410                     = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
411             return std::make_unique<NeonInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor, info);
412         }
413         case LayerType::L2Normalization :
414         {
415             auto l2NormalizationQueueDescriptor
416                     = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
417             return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>
418                     (*l2NormalizationQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager());
419         }
420         case LayerType::LogSoftmax :
421         {
422             auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
423             return std::make_unique<NeonLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor,
424                                                             info,
425                                                             m_MemoryManager->GetIntraLayerManager());
426         }
427         case LayerType::LogicalBinary :
428         {
429             auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
430 
431             switch(logicalBinaryQueueDescriptor->m_Parameters.m_Operation)
432             {
433                 case LogicalBinaryOperation::LogicalAnd:
434                     return std::make_unique<NeonLogicalAndWorkload>(*logicalBinaryQueueDescriptor, info);
435                 case LogicalBinaryOperation::LogicalOr:
436                     return std::make_unique<NeonLogicalOrWorkload>(*logicalBinaryQueueDescriptor, info);
437                 default:
438                     return nullptr;
439             }
440         }
441         case LayerType::Lstm :
442         {
443             auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
444             return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(*lstmQueueDescriptor, info);
445         }
446         case LayerType::Maximum :
447         {
448             auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
449             return std::make_unique<NeonMaximumWorkload>(*maximumQueueDescriptor, info);
450         }
451         case LayerType::Mean :
452         {
453             auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
454             return std::make_unique<NeonMeanWorkload>(*meanQueueDescriptor, info);
455         }
456         case LayerType::MemCopy :
457         {
458             auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
459             if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0])
460             {
461                 throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
462             }
463             return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(*memCopyQueueDescriptor, info);
464         }
465         case LayerType::MemImport :
466         {
467             auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
468             if (memImportQueueDescriptor->m_Inputs.empty() || !memImportQueueDescriptor->m_Inputs[0])
469             {
470                 throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload");
471             }
472             return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor, info);
473         }
474         case LayerType::Minimum :
475         {
476             auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
477             return std::make_unique<NeonMinimumWorkload>(*minimumQueueDescriptor, info);
478         }
479         case LayerType::Multiplication :
480         {
481             auto multiplicationQueueDescriptor = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
482             return std::make_unique<NeonMultiplicationWorkload>(*multiplicationQueueDescriptor, info);
483         }
484         case LayerType::Normalization :
485         {
486             auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
487             return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>
488                     (*normalizationQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager());
489         }
490         case LayerType::Output :
491         {
492             auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
493             return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor, info);
494         }
495         case LayerType::Pad :
496         {
497             auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
498             return std::make_unique<NeonPadWorkload>(*padQueueDescriptor, info);
499         }
500         case LayerType::Permute :
501         {
502             auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
503             return std::make_unique<NeonPermuteWorkload>(*permuteQueueDescriptor, info);
504         }
505         case LayerType::Pooling2d :
506         {
507             auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
508             return std::make_unique<NeonPooling2dWorkload>(*pooling2dQueueDescriptor, info);
509         }
510         case LayerType::Pooling3d :
511         {
512             auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
513             return std::make_unique<NeonPooling3dWorkload>(*pooling3dQueueDescriptor, info);
514         }
515         case LayerType::PreCompiled :
516         {
517             auto preCompiledQueueDescriptor = PolymorphicDowncast<const PreCompiledQueueDescriptor*>(&descriptor);
518             return MakeWorkloadHelper<NullWorkload, NullWorkload>(*preCompiledQueueDescriptor, info);
519         }
520         case LayerType::Prelu :
521         {
522             auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
523             return std::make_unique<NeonPreluWorkload>(*preluQueueDescriptor, info);
524         }
525         case LayerType::QLstm :
526         {
527             auto qLstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
528             return std::make_unique<NeonQLstmWorkload>(*qLstmQueueDescriptor, info);
529         }
530         case LayerType::Quantize :
531         {
532             auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
533             return std::make_unique<NeonQuantizeWorkload>(*quantizeQueueDescriptor, info);
534         }
535         case LayerType::QuantizedLstm :
536         {
537             auto quantizedLstmQueueDescriptor = PolymorphicDowncast<const QuantizedLstmQueueDescriptor*>(&descriptor);
538             return std::make_unique<NeonQuantizedLstmWorkload>(*quantizedLstmQueueDescriptor, info);
539         }
540         case LayerType::Rank :
541         {
542             auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
543             return std::make_unique<NeonRankWorkload>(*rankQueueDescriptor, info);
544         }
545         case LayerType::Reduce :
546         {
547             auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
548             return std::make_unique<NeonReduceWorkload>(*reduceQueueDescriptor, info);
549         }
550         case LayerType::Reshape :
551         {
552             auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
553             return std::make_unique<NeonReshapeWorkload>(*reshapeQueueDescriptor, info);
554         }
555         case LayerType::Resize :
556         {
557             auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
558             return std::make_unique<NeonResizeWorkload>(*resizeQueueDescriptor, info);
559         }
560         case LayerType::Slice :
561         {
562             auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
563             return std::make_unique<NeonSliceWorkload>(*sliceQueueDescriptor, info);
564         }
565         case LayerType::Softmax :
566         {
567             auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
568             return std::make_unique<NeonSoftmaxWorkload>(*softmaxQueueDescriptor,
569                                                          info,
570                                                          m_MemoryManager->GetIntraLayerManager());
571         }
572         case LayerType::SpaceToBatchNd :
573         {
574             auto spaceToBatchNdQueueDescriptor
575                     = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
576             return std::make_unique<NeonSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor, info);
577         }
578         case LayerType::SpaceToDepth :
579         {
580             auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
581             return std::make_unique<NeonSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor, info);
582         }
583         case LayerType::Splitter :
584         {
585             auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
586             return std::make_unique<NeonSplitterWorkload>(*splitterQueueDescriptor, info);
587         }
588         case LayerType::Stack :
589         {
590             auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
591             return std::make_unique<NeonStackWorkload>(*stackQueueDescriptor, info);
592         }
593         case LayerType::StridedSlice :
594         {
595             auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
596             return std::make_unique<NeonStridedSliceWorkload>(*stridedSliceQueueDescriptor, info);
597         }
598         case LayerType::Subtraction :
599         {
600             auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
601             return std::make_unique<NeonSubtractionWorkload>(*subtractionQueueDescriptor, info);
602         }
603         case LayerType::Transpose :
604         {
605             auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
606             return std::make_unique<NeonTransposeWorkload>(*transposeQueueDescriptor, info);
607         }
608         case LayerType::TransposeConvolution2d :
609         {
610             auto transposeConvolution2dQueueDescriptor
611                     = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
612             return std::make_unique<NeonTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor,
613                                                                         info,
614                                                                         m_MemoryManager->GetIntraLayerManager());
615         }
616         case LayerType::UnidirectionalSequenceLstm :
617         {
618             auto desc = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
619 
620             if ((info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Float32) &&
621                 (info.m_InputTensorInfos[1].GetDataType() == armnn::DataType::Float32) &&
622                 (info.m_InputTensorInfos[2].GetDataType() == armnn::DataType::Float32) &&
623                 (info.m_OutputTensorInfos[0].GetDataType() == armnn::DataType::Float32) &&
624                 (info.m_OutputTensorInfos[1].GetDataType() == armnn::DataType::Float32) &&
625                 (info.m_OutputTensorInfos[2].GetDataType() == armnn::DataType::Float32))
626             {
627                 return std::make_unique<NeonUnidirectionalSequenceLstmFloatWorkload>(*desc, info);
628             }
629             else
630             {
631                 return std::make_unique<NeonUnidirectionalSequenceLstmWorkload>(*desc, info);
632             }
633         }
634         default:
635             return nullptr;
636     }
637 }
638 
CreateActivation(const ActivationQueueDescriptor & descriptor,const WorkloadInfo & info) const639 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
640                                                                  const WorkloadInfo&              info) const
641 {
642     return std::make_unique<NeonActivationWorkload>(descriptor, info);
643 }
644 
CreateAddition(const AdditionQueueDescriptor & descriptor,const WorkloadInfo & info) const645 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
646                                                                       const WorkloadInfo&            info) const
647 {
648     return std::make_unique<NeonAdditionWorkload>(descriptor, info);
649 }
650 
CreateArgMinMax(const ArgMinMaxQueueDescriptor & descriptor,const WorkloadInfo & info) const651 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
652                                                                 const WorkloadInfo& info) const
653 {
654     return std::make_unique<NeonArgMinMaxWorkload>(descriptor, info);
655 }
656 
CreateBatchNormalization(const BatchNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const657 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
658     const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
659 {
660     return std::make_unique<NeonBatchNormalizationWorkload>(descriptor, info);
661 }
662 
CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor & descriptor,const WorkloadInfo & info) const663 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
664                                                                      const WorkloadInfo& info) const
665 {
666     return std::make_unique<NeonBatchToSpaceNdWorkload>(descriptor, info);
667 }
668 
CreateCast(const CastQueueDescriptor & descriptor,const WorkloadInfo & info) const669 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
670                                                            const WorkloadInfo& info) const
671 {
672     return std::make_unique<NeonCastWorkload>(descriptor, info);
673 }
674 
CreateChannelShuffle(const ChannelShuffleQueueDescriptor & descriptor,const WorkloadInfo & info) const675 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateChannelShuffle(const ChannelShuffleQueueDescriptor& descriptor,
676                                                                      const WorkloadInfo& info) const
677 {
678     return std::make_unique<NeonChannelShuffleWorkload>(descriptor, info);
679 }
680 
CreateComparison(const ComparisonQueueDescriptor & descriptor,const WorkloadInfo & info) const681 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
682                                                                  const WorkloadInfo& info) const
683 {
684     return std::make_unique<NeonComparisonWorkload>(descriptor, info);
685 }
686 
CreateConcat(const ConcatQueueDescriptor & descriptor,const WorkloadInfo & info) const687 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
688                                                                     const WorkloadInfo&          info) const
689 {
690     return std::make_unique<NeonConcatWorkload>(descriptor, info);
691 }
692 
CreateConstant(const ConstantQueueDescriptor & descriptor,const WorkloadInfo & info) const693 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
694                                                                const WorkloadInfo& info) const
695 {
696     return std::make_unique<NeonConstantWorkload>(descriptor, info);
697 }
698 
CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor & descriptor,const WorkloadInfo & info) const699 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32(
700     const ConvertFp16ToFp32QueueDescriptor& descriptor,
701     const WorkloadInfo& info) const
702 {
703     return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info);
704 }
705 
CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor & descriptor,const WorkloadInfo & info) const706 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16(
707     const ConvertFp32ToFp16QueueDescriptor& descriptor,
708     const WorkloadInfo& info) const
709 {
710     return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info);
711 }
712 
CreateConvolution2d(const Convolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const713 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
714     const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
715 {
716     bool isFastMathEnabled = false;
717     if (m_ModelContextPtr)
718     {
719         if (m_ModelContextPtr.get() != nullptr)
720         {
721             auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
722             if (modelOptions)
723             {
724                 isFastMathEnabled = modelOptions->IsFastMathEnabled();
725             }
726         }
727     }
728     return std::make_unique<NeonConvolution2dWorkload>(descriptor,
729                                                        info,
730                                                        m_MemoryManager->GetIntraLayerManager(),
731                                                        isFastMathEnabled);
732 }
733 
CreateConvolution3d(const Convolution3dQueueDescriptor & descriptor,const WorkloadInfo & info) const734 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution3d(
735         const Convolution3dQueueDescriptor& descriptor, const WorkloadInfo& info) const
736 {
737     bool isFastMathEnabled = false;
738     if (m_ModelContextPtr)
739     {
740         if (m_ModelContextPtr.get() != nullptr)
741         {
742             auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
743             if (modelOptions)
744             {
745                 isFastMathEnabled = modelOptions->IsFastMathEnabled();
746             }
747         }
748     }
749     return std::make_unique<NeonConvolution3dWorkload>(descriptor,
750                                                        info,
751                                                        m_MemoryManager->GetIntraLayerManager(),
752                                                        isFastMathEnabled);
753 }
754 
CreateDebug(const DebugQueueDescriptor & descriptor,const WorkloadInfo & info) const755 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
756                                                             const WorkloadInfo& info) const
757 {
758     return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
759 }
760 
CreateDepthToSpace(const DepthToSpaceQueueDescriptor & descriptor,const WorkloadInfo & info) const761 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
762                                                                    const WorkloadInfo& info) const
763 {
764     return std::make_unique<NeonDepthToSpaceWorkload>(descriptor, info);
765 }
766 
CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const767 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
768     const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
769 {
770     return std::make_unique<NeonDepthwiseConvolutionWorkload>(descriptor, info);
771 }
772 
CreateDequantize(const DequantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const773 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
774                                                                  const WorkloadInfo& info) const
775 {
776     return std::make_unique<NeonDequantizeWorkload>(descriptor, info);
777 }
778 
CreateDetectionPostProcess(const armnn::DetectionPostProcessQueueDescriptor & descriptor,const armnn::WorkloadInfo & info) const779 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDetectionPostProcess(
780     const armnn::DetectionPostProcessQueueDescriptor& descriptor, const armnn::WorkloadInfo& info) const
781 {
782     return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
783 }
784 
CreateDivision(const DivisionQueueDescriptor & descriptor,const WorkloadInfo & info) const785 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
786     const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
787 {
788     return std::make_unique<NeonDivisionWorkload>(descriptor, info);
789 }
790 
CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor & descriptor,const WorkloadInfo & info) const791 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateElementwiseUnary(
792     const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) const
793 {
794     switch(descriptor.m_Parameters.m_Operation)
795     {
796         case UnaryOperation::Abs:
797         {
798             AbsQueueDescriptor absQueueDescriptor;
799             absQueueDescriptor.m_Inputs  = descriptor.m_Inputs;
800             absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
801 
802             return std::make_unique<NeonAbsWorkload>(absQueueDescriptor, info);
803         }
804         case UnaryOperation::Exp:
805             return std::make_unique<NeonExpWorkload>(descriptor, info);
806         case UnaryOperation::LogicalNot:
807             return std::make_unique<NeonLogicalNotWorkload>(descriptor, info);
808         case UnaryOperation::Log:
809             return std::make_unique<NeonLogWorkload>(descriptor, info);
810         case UnaryOperation::Neg:
811             return std::make_unique<NeonNegWorkload>(descriptor, info);
812         case UnaryOperation::Rsqrt:
813         {
814             RsqrtQueueDescriptor rsqrtQueueDescriptor;
815             rsqrtQueueDescriptor.m_Inputs  = descriptor.m_Inputs;
816             rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
817 
818             return std::make_unique<NeonRsqrtWorkload>(rsqrtQueueDescriptor, info);
819         }
820         case UnaryOperation::Sin:
821             return std::make_unique<NeonSinWorkload>(descriptor, info);
822         default:
823             return nullptr;
824     }
825 }
826 
CreateFill(const FillQueueDescriptor & descriptor,const WorkloadInfo & info) const827 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
828                                                            const WorkloadInfo& info) const
829 {
830     return std::make_unique<NeonFillWorkload>(descriptor, info);
831 }
832 
CreateFloor(const FloorQueueDescriptor & descriptor,const WorkloadInfo & info) const833 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
834                                                             const WorkloadInfo& info) const
835 {
836     return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(descriptor, info);
837 }
838 
CreateFullyConnected(const FullyConnectedQueueDescriptor & descriptor,const WorkloadInfo & info) const839 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
840     const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
841 {
842     return std::make_unique<NeonFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
843 }
844 
CreateGather(const armnn::GatherQueueDescriptor & descriptor,const armnn::WorkloadInfo & info) const845 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGather(const armnn::GatherQueueDescriptor& descriptor,
846                                                              const armnn::WorkloadInfo& info) const
847 {
848     return std::make_unique<NeonGatherWorkload>(descriptor, info);
849 }
850 
CreateInput(const InputQueueDescriptor & descriptor,const WorkloadInfo & info) const851 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
852                                                             const WorkloadInfo&        info) const
853 {
854     return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
855 }
856 
CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const857 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInstanceNormalization(
858     const InstanceNormalizationQueueDescriptor& descriptor,
859     const WorkloadInfo& info) const
860 {
861     return std::make_unique<NeonInstanceNormalizationWorkload>(descriptor, info);
862 }
863 
CreateL2Normalization(const L2NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const864 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
865                                                                       const WorkloadInfo& info) const
866 {
867     return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info,
868                                                                               m_MemoryManager->GetIntraLayerManager());
869 }
870 
CreateLogSoftmax(const LogSoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const871 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
872                                                                  const WorkloadInfo& info) const
873 {
874     return std::make_unique<NeonLogSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
875 }
876 
CreateLogicalBinary(const LogicalBinaryQueueDescriptor & descriptor,const WorkloadInfo & info) const877 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
878                                                                     const WorkloadInfo& info) const
879 {
880     switch(descriptor.m_Parameters.m_Operation)
881     {
882         case LogicalBinaryOperation::LogicalAnd:
883             return std::make_unique<NeonLogicalAndWorkload>(descriptor, info);
884         case LogicalBinaryOperation::LogicalOr:
885             return std::make_unique<NeonLogicalOrWorkload>(descriptor, info);
886         default:
887             return nullptr;
888     }
889 }
890 
CreateLstm(const LstmQueueDescriptor & descriptor,const WorkloadInfo & info) const891 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
892                                                            const WorkloadInfo& info) const
893 {
894     return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(descriptor, info);
895 }
896 
CreateMaximum(const MaximumQueueDescriptor & descriptor,const WorkloadInfo & info) const897 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
898                                                               const WorkloadInfo& info) const
899 {
900     return std::make_unique<NeonMaximumWorkload>(descriptor, info);
901 }
902 
CreateMean(const MeanQueueDescriptor & descriptor,const WorkloadInfo & info) const903 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
904                                                            const WorkloadInfo& info) const
905 {
906     return std::make_unique<NeonMeanWorkload>(descriptor, info);
907 }
908 
CreateMemCopy(const MemCopyQueueDescriptor & descriptor,const WorkloadInfo & info) const909 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
910                                                                      const WorkloadInfo&        info) const
911 {
912     if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
913     {
914         throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
915     }
916 
917     return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
918 }
919 
CreateMemImport(const MemImportQueueDescriptor & descriptor,const WorkloadInfo & info) const920 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
921                                                                        const WorkloadInfo&        info) const
922 {
923     if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
924     {
925         throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload");
926     }
927 
928     return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
929 }
930 
CreateMinimum(const MinimumQueueDescriptor & descriptor,const WorkloadInfo & info) const931 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
932                                                               const WorkloadInfo& info) const
933 {
934     return std::make_unique<NeonMinimumWorkload>(descriptor, info);
935 }
936 
CreateMultiplication(const MultiplicationQueueDescriptor & descriptor,const WorkloadInfo & info) const937 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
938     const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
939 {
940     return std::make_unique<NeonMultiplicationWorkload>(descriptor, info);
941 }
942 
CreateNormalization(const NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const943 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
944     const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
945 {
946     return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info,
947                                                                             m_MemoryManager->GetIntraLayerManager());
948 }
949 
CreateOutput(const OutputQueueDescriptor & descriptor,const WorkloadInfo & info) const950 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
951                                                              const WorkloadInfo& info) const
952 {
953     return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
954 }
955 
CreatePad(const PadQueueDescriptor & descriptor,const WorkloadInfo & info) const956 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
957                                                           const WorkloadInfo& info) const
958 {
959     return std::make_unique<NeonPadWorkload>(descriptor, info);
960 }
961 
CreatePermute(const PermuteQueueDescriptor & descriptor,const WorkloadInfo & info) const962 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
963                                                                      const WorkloadInfo& info) const
964 {
965     return std::make_unique<NeonPermuteWorkload>(descriptor, info);
966 }
967 
CreatePooling2d(const Pooling2dQueueDescriptor & descriptor,const WorkloadInfo & info) const968 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
969                                                                        const WorkloadInfo& info) const
970 {
971     return std::make_unique<NeonPooling2dWorkload>(descriptor, info);
972 }
973 
CreatePreCompiled(const PreCompiledQueueDescriptor & descriptor,const WorkloadInfo & info) const974 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
975                                                                   const WorkloadInfo& info) const
976 {
977     return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
978 }
979 
CreatePrelu(const armnn::PreluQueueDescriptor & descriptor,const armnn::WorkloadInfo & info) const980 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePrelu(const armnn::PreluQueueDescriptor &descriptor,
981                                                                    const armnn::WorkloadInfo &info) const
982 {
983     return std::make_unique<NeonPreluWorkload>(descriptor, info);
984 }
985 
CreateQLstm(const QLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const986 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
987                                                             const WorkloadInfo& info) const
988 {
989     return std::make_unique<NeonQLstmWorkload>(descriptor, info);
990 }
991 
CreateQuantize(const QuantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const992 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
993                                                                       const WorkloadInfo& info) const
994 {
995     return std::make_unique<NeonQuantizeWorkload>(descriptor, info);
996 }
997 
CreateQuantizedLstm(const QuantizedLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const998 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
999                                                                     const WorkloadInfo& info) const
1000 {
1001     return std::make_unique<NeonQuantizedLstmWorkload>(descriptor, info);
1002 }
1003 
CreateRank(const RankQueueDescriptor & descriptor,const WorkloadInfo & info) const1004 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
1005                                                            const WorkloadInfo& info) const
1006 {
1007     return std::make_unique<NeonRankWorkload>(descriptor, info);
1008 }
1009 
CreateReduce(const ReduceQueueDescriptor & descriptor,const WorkloadInfo & info) const1010 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
1011                                                              const WorkloadInfo& info) const
1012 {
1013     return std::make_unique<NeonReduceWorkload>(descriptor, info);
1014 }
1015 
CreateReshape(const ReshapeQueueDescriptor & descriptor,const WorkloadInfo & info) const1016 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
1017                                                               const WorkloadInfo& info) const
1018 {
1019     return std::make_unique<NeonReshapeWorkload>(descriptor, info);
1020 }
1021 
CreateResize(const ResizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1022 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
1023                                                              const WorkloadInfo& info) const
1024 {
1025     return std::make_unique<NeonResizeWorkload>(descriptor, info);
1026 }
1027 
CreateSlice(const SliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1028 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
1029                                                             const WorkloadInfo& info) const
1030 {
1031     return std::make_unique<NeonSliceWorkload>(descriptor, info);
1032 }
1033 
CreateSoftmax(const SoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const1034 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
1035                                                               const WorkloadInfo& info) const
1036 {
1037     return std::make_unique<NeonSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
1038 }
1039 
CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor & descriptor,const WorkloadInfo & info) const1040 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
1041                                                                      const WorkloadInfo& info) const
1042 {
1043     return std::make_unique<NeonSpaceToBatchNdWorkload>(descriptor, info);
1044 }
1045 
CreateSpaceToDepth(const SpaceToDepthQueueDescriptor & descriptor,const WorkloadInfo & info) const1046 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
1047                                                                    const WorkloadInfo& info) const
1048 {
1049     return std::make_unique<NeonSpaceToDepthWorkload>(descriptor, info);
1050 }
1051 
CreateSplitter(const SplitterQueueDescriptor & descriptor,const WorkloadInfo & info) const1052 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
1053                                                                const WorkloadInfo&            info) const
1054 {
1055     return std::make_unique<NeonSplitterWorkload>(descriptor, info);
1056 }
1057 
CreateStack(const StackQueueDescriptor & descriptor,const WorkloadInfo & info) const1058 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
1059                                                             const WorkloadInfo& info) const
1060 {
1061     return std::make_unique<NeonStackWorkload>(descriptor, info);
1062 }
1063 
CreateStridedSlice(const StridedSliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1064 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
1065                                                                    const WorkloadInfo& info) const
1066 {
1067     return std::make_unique<NeonStridedSliceWorkload>(descriptor, info);
1068 }
1069 
CreateSubtraction(const SubtractionQueueDescriptor & descriptor,const WorkloadInfo & info) const1070 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction(
1071     const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const
1072 {
1073     return std::make_unique<NeonSubtractionWorkload>(descriptor, info);
1074 }
1075 
CreateTranspose(const TransposeQueueDescriptor & descriptor,const WorkloadInfo & info) const1076 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
1077                                                                        const WorkloadInfo& info) const
1078 {
1079     return std::make_unique<NeonTransposeWorkload>(descriptor, info);
1080 }
1081 
CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1082 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateTransposeConvolution2d(
1083     const TransposeConvolution2dQueueDescriptor &descriptor,
1084     const WorkloadInfo &info) const
1085 {
1086     return std::make_unique<NeonTransposeConvolution2dWorkload>(descriptor, info,
1087                                                                 m_MemoryManager->GetIntraLayerManager());
1088 }
1089 
1090 } // namespace armnn
1091