1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NeonBackendId.hpp"
7 #include "NeonBackendModelContext.hpp"
8 #include "NeonTensorHandle.hpp"
9 #include "NeonWorkloadFactory.hpp"
10
11 #include <Layer.hpp>
12
13 #include <armnn/Utils.hpp>
14 #include <armnn/utility/IgnoreUnused.hpp>
15 #include <armnn/utility/NumericCast.hpp>
16 #include <armnn/utility/PolymorphicDowncast.hpp>
17
18 #include <backendsCommon/MakeWorkloadHelper.hpp>
19 #include <armnn/backends/MemCopyWorkload.hpp>
20 #include <backendsCommon/MemImportWorkload.hpp>
21 #include <armnn/backends/TensorHandle.hpp>
22
23 #include <neon/workloads/NeonWorkloadUtils.hpp>
24 #include <neon/workloads/NeonWorkloads.hpp>
25
26 namespace armnn
27 {
28
29 namespace
30 {
31 static const BackendId s_Id{NeonBackendId()};
32 }
33
IsLayerSupported(const Layer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported)34 bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer,
35 Optional<DataType> dataType,
36 std::string& outReasonIfUnsupported)
37 {
38 return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
39 }
40
IsLayerSupported(const IConnectableLayer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported,const ModelOptions & modelOptions)41 bool NeonWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
42 Optional<DataType> dataType,
43 std::string& outReasonIfUnsupported,
44 const ModelOptions& modelOptions)
45 {
46 return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
47 }
48
GetBackendId() const49 const BackendId& NeonWorkloadFactory::GetBackendId() const
50 {
51 return s_Id;
52 }
53
SetNumberOfThreads()54 void NeonWorkloadFactory::SetNumberOfThreads()
55 {
56 if (m_ModelContextPtr)
57 {
58 const unsigned int MIN_THREADS = 1;
59 const unsigned int MAX_THREADS = 64;
60
61 // Set the number of threads to be used if the user has set NumberOfThreads param
62 // Only set if within limit or valid input
63 auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
64 auto numberOfThreads = modelOptions->GetNumberOfThreads();
65
66 if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS)
67 {
68 arm_compute::Scheduler::get().set_num_threads(numberOfThreads);
69 }
70 }
71 }
72
NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager> & memoryManager)73 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
74 : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
75 {
76 SetNumberOfThreads();
77 }
78
NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager> & memoryManager,const IBackendInternal::IBackendSpecificModelContextPtr & modelContextPtr)79 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
80 const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
81 : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
82 {
83 SetNumberOfThreads();
84 }
85
CreateSubTensorHandle(ITensorHandle & parent,TensorShape const & subTensorShape,unsigned int const * subTensorOrigin) const86 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
87 TensorShape const& subTensorShape,
88 unsigned int const* subTensorOrigin) const
89 {
90 const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
91
92 arm_compute::Coordinates coords;
93 coords.set_num_dimensions(subTensorShape.GetNumDimensions());
94 for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
95 {
96 // Arm compute indexes tensor coords in reverse order.
97 unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
98 coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
99 }
100
101 const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
102 if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
103 {
104 return nullptr;
105 }
106
107 return std::make_unique<NeonSubTensorHandle>(
108 PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
109 }
110
CreateTensorHandle(const TensorInfo & tensorInfo,const bool IsMemoryManaged) const111 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
112 const bool IsMemoryManaged) const
113 {
114 auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
115 if (IsMemoryManaged)
116 {
117 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
118 }
119 return tensorHandle;
120 }
121
CreateTensorHandle(const TensorInfo & tensorInfo,DataLayout dataLayout,const bool IsMemoryManaged) const122 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
123 DataLayout dataLayout,
124 const bool IsMemoryManaged) const
125 {
126 auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
127 if (IsMemoryManaged)
128 {
129 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
130 }
131 return tensorHandle;
132 }
133
CreateWorkload(LayerType type,const QueueDescriptor & descriptor,const WorkloadInfo & info) const134 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateWorkload(LayerType type,
135 const QueueDescriptor& descriptor,
136 const WorkloadInfo& info) const
137 {
138 switch(type)
139 {
140 case LayerType::Activation :
141 {
142 auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
143 return std::make_unique<NeonActivationWorkload>(*activationQueueDescriptor, info);
144 }
145 case LayerType::Addition :
146 {
147 auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
148 return std::make_unique<NeonAdditionWorkload>(*additionQueueDescriptor, info);
149 }
150 case LayerType::ArgMinMax :
151 {
152 auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
153 return std::make_unique<NeonArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info);
154 }
155 case LayerType::BatchMatMul :
156 {
157 auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
158 return std::make_unique<NeonBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info);
159 }
160 case LayerType::BatchNormalization :
161 {
162 auto batchNormalizationQueueDescriptor
163 = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
164 return std::make_unique<NeonBatchNormalizationWorkload>(*batchNormalizationQueueDescriptor, info);
165 }
166 case LayerType::BatchToSpaceNd :
167 {
168 auto batchToSpaceNdQueueDescriptor
169 = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
170 return std::make_unique<NeonBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info);
171 }
172 case LayerType::Cast :
173 {
174 auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
175 return std::make_unique<NeonCastWorkload>(*castQueueDescriptor, info);
176 }
177 case LayerType::ChannelShuffle :
178 {
179 auto channelShuffleQueueDescriptor = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
180 return std::make_unique<NeonChannelShuffleWorkload>(*channelShuffleQueueDescriptor, info);
181 }
182 case LayerType::Comparison :
183 {
184 auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
185 return std::make_unique<NeonComparisonWorkload>(*comparisonQueueDescriptor, info);
186 }
187 case LayerType::Concat :
188 {
189 auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
190 return std::make_unique<NeonConcatWorkload>(*concatQueueDescriptor, info);
191 }
192 case LayerType::Constant :
193 {
194 auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
195 return std::make_unique<NeonConstantWorkload>(*constantQueueDescriptor, info);
196 }
197 case LayerType::ConvertFp16ToFp32 :
198 {
199 auto convertFp16ToFp32QueueDescriptor
200 = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
201 return std::make_unique<NeonConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor, info);
202 }
203 case LayerType::ConvertFp32ToFp16 :
204 {
205 auto convertFp32ToFp16QueueDescriptor
206 = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
207 return std::make_unique<NeonConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor, info);
208 }
209 case LayerType::Convolution2d :
210 {
211 auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
212
213 bool isFastMathEnabled = false;
214 if (m_ModelContextPtr)
215 {
216 if (m_ModelContextPtr.get() != nullptr)
217 {
218 auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
219 if (modelOptions)
220 {
221 isFastMathEnabled = modelOptions->IsFastMathEnabled();
222 }
223 }
224 }
225 return std::make_unique<NeonConvolution2dWorkload>(*convolution2dQueueDescriptor,
226 info,
227 m_MemoryManager->GetIntraLayerManager(),
228 isFastMathEnabled);
229 }
230 case LayerType::Convolution3d :
231 {
232 auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
233
234 bool isFastMathEnabled = false;
235 if (m_ModelContextPtr)
236 {
237 if (m_ModelContextPtr.get() != nullptr)
238 {
239 auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
240 if (modelOptions)
241 {
242 isFastMathEnabled = modelOptions->IsFastMathEnabled();
243 }
244 }
245 }
246 return std::make_unique<NeonConvolution3dWorkload>(*convolution3dQueueDescriptor,
247 info,
248 m_MemoryManager->GetIntraLayerManager(),
249 isFastMathEnabled);
250 }
251 case LayerType::Debug :
252 {
253 auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
254 return MakeWorkloadHelper<NullWorkload, NullWorkload>(*debugQueueDescriptor, info);
255 }
256 case LayerType::DepthToSpace :
257 {
258 auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
259 return std::make_unique<NeonDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor, info);
260 }
261 case LayerType::DepthwiseConvolution2d :
262 {
263 auto depthwiseConvolution2dQueueDescriptor
264 = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
265 return std::make_unique<NeonDepthwiseConvolutionWorkload>(*depthwiseConvolution2dQueueDescriptor, info);
266 }
267 case LayerType::Dequantize :
268 {
269 auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
270 return std::make_unique<NeonDequantizeWorkload>(*dequantizeQueueDescriptor, info);
271 }
272 case LayerType::DetectionPostProcess :
273 {
274 auto detectionPostProcessQueueDescriptor
275 = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
276 return MakeWorkloadHelper<NullWorkload, NullWorkload>(*detectionPostProcessQueueDescriptor, info);
277 }
278 case LayerType::Division :
279 {
280 auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
281 return std::make_unique<NeonDivisionWorkload>(*divisionQueueDescriptor, info);
282 }
283 case LayerType::ElementwiseBinary :
284 {
285 auto elementwiseBinaryQueueDescriptor
286 = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
287
288 switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
289 {
290 case BinaryOperation::Add:
291 {
292 AdditionQueueDescriptor additionQueueDescriptor;
293 additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
294 additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
295 return std::make_unique<NeonAdditionWorkload>(additionQueueDescriptor, info);
296 }
297 case BinaryOperation::Div:
298 {
299 DivisionQueueDescriptor divisionQueueDescriptor;
300 divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
301 divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
302 return std::make_unique<NeonDivisionWorkload>(divisionQueueDescriptor, info);
303 }
304 case BinaryOperation::Maximum:
305 {
306 MaximumQueueDescriptor maximumQueueDescriptor;
307 maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
308 maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
309 return std::make_unique<NeonMaximumWorkload>(maximumQueueDescriptor, info);
310 }
311 case BinaryOperation::Minimum:
312 {
313 MinimumQueueDescriptor minimumQueueDescriptor;
314 minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
315 minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
316 return std::make_unique<NeonMinimumWorkload>(minimumQueueDescriptor, info);
317 }
318 case BinaryOperation::Mul:
319 {
320 MultiplicationQueueDescriptor multiplicationQueueDescriptor;
321 multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
322 multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
323 return std::make_unique<NeonMultiplicationWorkload>(multiplicationQueueDescriptor, info);
324 }
325 case BinaryOperation::Sub:
326 {
327 SubtractionQueueDescriptor subtractionQueueDescriptor;
328 subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
329 subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
330 return std::make_unique<NeonSubtractionWorkload>(subtractionQueueDescriptor, info);
331 }
332 default:
333 return nullptr;
334 }
335 }
336 case LayerType::ElementwiseUnary :
337 {
338 auto elementwiseUnaryQueueDescriptor
339 = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
340
341 switch(elementwiseUnaryQueueDescriptor->m_Parameters.m_Operation)
342 {
343 case UnaryOperation::Abs:
344 {
345 AbsQueueDescriptor absQueueDescriptor;
346 absQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
347 absQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
348
349 return std::make_unique<NeonAbsWorkload>(absQueueDescriptor, info);
350 }
351 case UnaryOperation::Exp:
352 return std::make_unique<NeonExpWorkload>(*elementwiseUnaryQueueDescriptor, info);
353 case UnaryOperation::LogicalNot:
354 return std::make_unique<NeonLogicalNotWorkload>(*elementwiseUnaryQueueDescriptor, info);
355 case UnaryOperation::Log:
356 return std::make_unique<NeonLogWorkload>(*elementwiseUnaryQueueDescriptor, info);
357 case UnaryOperation::Neg:
358 return std::make_unique<NeonNegWorkload>(*elementwiseUnaryQueueDescriptor, info);
359 case UnaryOperation::Rsqrt:
360 {
361 RsqrtQueueDescriptor rsqrtQueueDescriptor;
362 rsqrtQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
363 rsqrtQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
364
365 return std::make_unique<NeonRsqrtWorkload>(rsqrtQueueDescriptor, info);
366 }
367 case UnaryOperation::Sin:
368 return std::make_unique<NeonSinWorkload>(*elementwiseUnaryQueueDescriptor, info);
369 case UnaryOperation::Sqrt:
370 return std::make_unique<NeonSqrtWorkload>(*elementwiseUnaryQueueDescriptor, info);
371 default:
372 return nullptr;
373 }
374 }
375 case LayerType::Fill :
376 {
377 auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
378 return std::make_unique<NeonFillWorkload>(*fillQueueDescriptor, info);
379 }
380 case LayerType::Floor :
381 {
382 auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
383 return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(*floorQueueDescriptor, info);
384 }
385 case LayerType::FullyConnected :
386 {
387 auto fullyConnectedQueueDescriptor = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
388 return std::make_unique<NeonFullyConnectedWorkload>(*fullyConnectedQueueDescriptor,
389 info,
390 m_MemoryManager->GetIntraLayerManager());
391 }
392 case LayerType::Gather :
393 {
394 auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
395 return std::make_unique<NeonGatherWorkload>(*gatherQueueDescriptor, info);
396 }
397 case LayerType::GatherNd :
398 {
399 auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
400 return std::make_unique<NeonGatherNdWorkload>(*gatherNdQueueDescriptor, info);
401 }
402 case LayerType::Input :
403 {
404 auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
405 return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor, info);
406 }
407 case LayerType::InstanceNormalization :
408 {
409 auto instanceNormalizationQueueDescriptor
410 = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
411 return std::make_unique<NeonInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor, info);
412 }
413 case LayerType::L2Normalization :
414 {
415 auto l2NormalizationQueueDescriptor
416 = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
417 return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>
418 (*l2NormalizationQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager());
419 }
420 case LayerType::LogSoftmax :
421 {
422 auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
423 return std::make_unique<NeonLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor,
424 info,
425 m_MemoryManager->GetIntraLayerManager());
426 }
427 case LayerType::LogicalBinary :
428 {
429 auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
430
431 switch(logicalBinaryQueueDescriptor->m_Parameters.m_Operation)
432 {
433 case LogicalBinaryOperation::LogicalAnd:
434 return std::make_unique<NeonLogicalAndWorkload>(*logicalBinaryQueueDescriptor, info);
435 case LogicalBinaryOperation::LogicalOr:
436 return std::make_unique<NeonLogicalOrWorkload>(*logicalBinaryQueueDescriptor, info);
437 default:
438 return nullptr;
439 }
440 }
441 case LayerType::Lstm :
442 {
443 auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
444 return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(*lstmQueueDescriptor, info);
445 }
446 case LayerType::Maximum :
447 {
448 auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
449 return std::make_unique<NeonMaximumWorkload>(*maximumQueueDescriptor, info);
450 }
451 case LayerType::Mean :
452 {
453 auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
454 return std::make_unique<NeonMeanWorkload>(*meanQueueDescriptor, info);
455 }
456 case LayerType::MemCopy :
457 {
458 auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
459 if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0])
460 {
461 throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
462 }
463 return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(*memCopyQueueDescriptor, info);
464 }
465 case LayerType::MemImport :
466 {
467 auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
468 if (memImportQueueDescriptor->m_Inputs.empty() || !memImportQueueDescriptor->m_Inputs[0])
469 {
470 throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload");
471 }
472 return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor, info);
473 }
474 case LayerType::Minimum :
475 {
476 auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
477 return std::make_unique<NeonMinimumWorkload>(*minimumQueueDescriptor, info);
478 }
479 case LayerType::Multiplication :
480 {
481 auto multiplicationQueueDescriptor = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
482 return std::make_unique<NeonMultiplicationWorkload>(*multiplicationQueueDescriptor, info);
483 }
484 case LayerType::Normalization :
485 {
486 auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
487 return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>
488 (*normalizationQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager());
489 }
490 case LayerType::Output :
491 {
492 auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
493 return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor, info);
494 }
495 case LayerType::Pad :
496 {
497 auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
498 return std::make_unique<NeonPadWorkload>(*padQueueDescriptor, info);
499 }
500 case LayerType::Permute :
501 {
502 auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
503 return std::make_unique<NeonPermuteWorkload>(*permuteQueueDescriptor, info);
504 }
505 case LayerType::Pooling2d :
506 {
507 auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
508 return std::make_unique<NeonPooling2dWorkload>(*pooling2dQueueDescriptor, info);
509 }
510 case LayerType::Pooling3d :
511 {
512 auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
513 return std::make_unique<NeonPooling3dWorkload>(*pooling3dQueueDescriptor, info);
514 }
515 case LayerType::PreCompiled :
516 {
517 auto preCompiledQueueDescriptor = PolymorphicDowncast<const PreCompiledQueueDescriptor*>(&descriptor);
518 return MakeWorkloadHelper<NullWorkload, NullWorkload>(*preCompiledQueueDescriptor, info);
519 }
520 case LayerType::Prelu :
521 {
522 auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
523 return std::make_unique<NeonPreluWorkload>(*preluQueueDescriptor, info);
524 }
525 case LayerType::QLstm :
526 {
527 auto qLstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
528 return std::make_unique<NeonQLstmWorkload>(*qLstmQueueDescriptor, info);
529 }
530 case LayerType::Quantize :
531 {
532 auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
533 return std::make_unique<NeonQuantizeWorkload>(*quantizeQueueDescriptor, info);
534 }
535 case LayerType::QuantizedLstm :
536 {
537 auto quantizedLstmQueueDescriptor = PolymorphicDowncast<const QuantizedLstmQueueDescriptor*>(&descriptor);
538 return std::make_unique<NeonQuantizedLstmWorkload>(*quantizedLstmQueueDescriptor, info);
539 }
540 case LayerType::Rank :
541 {
542 auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
543 return std::make_unique<NeonRankWorkload>(*rankQueueDescriptor, info);
544 }
545 case LayerType::Reduce :
546 {
547 auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
548 return std::make_unique<NeonReduceWorkload>(*reduceQueueDescriptor, info);
549 }
550 case LayerType::Reshape :
551 {
552 auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
553 return std::make_unique<NeonReshapeWorkload>(*reshapeQueueDescriptor, info);
554 }
555 case LayerType::Resize :
556 {
557 auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
558 return std::make_unique<NeonResizeWorkload>(*resizeQueueDescriptor, info);
559 }
560 case LayerType::Slice :
561 {
562 auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
563 return std::make_unique<NeonSliceWorkload>(*sliceQueueDescriptor, info);
564 }
565 case LayerType::Softmax :
566 {
567 auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
568 return std::make_unique<NeonSoftmaxWorkload>(*softmaxQueueDescriptor,
569 info,
570 m_MemoryManager->GetIntraLayerManager());
571 }
572 case LayerType::SpaceToBatchNd :
573 {
574 auto spaceToBatchNdQueueDescriptor
575 = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
576 return std::make_unique<NeonSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor, info);
577 }
578 case LayerType::SpaceToDepth :
579 {
580 auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
581 return std::make_unique<NeonSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor, info);
582 }
583 case LayerType::Splitter :
584 {
585 auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
586 return std::make_unique<NeonSplitterWorkload>(*splitterQueueDescriptor, info);
587 }
588 case LayerType::Stack :
589 {
590 auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
591 return std::make_unique<NeonStackWorkload>(*stackQueueDescriptor, info);
592 }
593 case LayerType::StridedSlice :
594 {
595 auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
596 return std::make_unique<NeonStridedSliceWorkload>(*stridedSliceQueueDescriptor, info);
597 }
598 case LayerType::Subtraction :
599 {
600 auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
601 return std::make_unique<NeonSubtractionWorkload>(*subtractionQueueDescriptor, info);
602 }
603 case LayerType::Transpose :
604 {
605 auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
606 return std::make_unique<NeonTransposeWorkload>(*transposeQueueDescriptor, info);
607 }
608 case LayerType::TransposeConvolution2d :
609 {
610 auto transposeConvolution2dQueueDescriptor
611 = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
612 return std::make_unique<NeonTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor,
613 info,
614 m_MemoryManager->GetIntraLayerManager());
615 }
616 case LayerType::UnidirectionalSequenceLstm :
617 {
618 auto desc = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
619
620 if ((info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Float32) &&
621 (info.m_InputTensorInfos[1].GetDataType() == armnn::DataType::Float32) &&
622 (info.m_InputTensorInfos[2].GetDataType() == armnn::DataType::Float32) &&
623 (info.m_OutputTensorInfos[0].GetDataType() == armnn::DataType::Float32) &&
624 (info.m_OutputTensorInfos[1].GetDataType() == armnn::DataType::Float32) &&
625 (info.m_OutputTensorInfos[2].GetDataType() == armnn::DataType::Float32))
626 {
627 return std::make_unique<NeonUnidirectionalSequenceLstmFloatWorkload>(*desc, info);
628 }
629 else
630 {
631 return std::make_unique<NeonUnidirectionalSequenceLstmWorkload>(*desc, info);
632 }
633 }
634 default:
635 return nullptr;
636 }
637 }
638
CreateActivation(const ActivationQueueDescriptor & descriptor,const WorkloadInfo & info) const639 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
640 const WorkloadInfo& info) const
641 {
642 return std::make_unique<NeonActivationWorkload>(descriptor, info);
643 }
644
CreateAddition(const AdditionQueueDescriptor & descriptor,const WorkloadInfo & info) const645 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
646 const WorkloadInfo& info) const
647 {
648 return std::make_unique<NeonAdditionWorkload>(descriptor, info);
649 }
650
CreateArgMinMax(const ArgMinMaxQueueDescriptor & descriptor,const WorkloadInfo & info) const651 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
652 const WorkloadInfo& info) const
653 {
654 return std::make_unique<NeonArgMinMaxWorkload>(descriptor, info);
655 }
656
CreateBatchNormalization(const BatchNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const657 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
658 const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
659 {
660 return std::make_unique<NeonBatchNormalizationWorkload>(descriptor, info);
661 }
662
CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor & descriptor,const WorkloadInfo & info) const663 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
664 const WorkloadInfo& info) const
665 {
666 return std::make_unique<NeonBatchToSpaceNdWorkload>(descriptor, info);
667 }
668
CreateCast(const CastQueueDescriptor & descriptor,const WorkloadInfo & info) const669 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
670 const WorkloadInfo& info) const
671 {
672 return std::make_unique<NeonCastWorkload>(descriptor, info);
673 }
674
CreateChannelShuffle(const ChannelShuffleQueueDescriptor & descriptor,const WorkloadInfo & info) const675 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateChannelShuffle(const ChannelShuffleQueueDescriptor& descriptor,
676 const WorkloadInfo& info) const
677 {
678 return std::make_unique<NeonChannelShuffleWorkload>(descriptor, info);
679 }
680
CreateComparison(const ComparisonQueueDescriptor & descriptor,const WorkloadInfo & info) const681 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
682 const WorkloadInfo& info) const
683 {
684 return std::make_unique<NeonComparisonWorkload>(descriptor, info);
685 }
686
CreateConcat(const ConcatQueueDescriptor & descriptor,const WorkloadInfo & info) const687 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
688 const WorkloadInfo& info) const
689 {
690 return std::make_unique<NeonConcatWorkload>(descriptor, info);
691 }
692
CreateConstant(const ConstantQueueDescriptor & descriptor,const WorkloadInfo & info) const693 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
694 const WorkloadInfo& info) const
695 {
696 return std::make_unique<NeonConstantWorkload>(descriptor, info);
697 }
698
CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor & descriptor,const WorkloadInfo & info) const699 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32(
700 const ConvertFp16ToFp32QueueDescriptor& descriptor,
701 const WorkloadInfo& info) const
702 {
703 return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info);
704 }
705
CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor & descriptor,const WorkloadInfo & info) const706 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16(
707 const ConvertFp32ToFp16QueueDescriptor& descriptor,
708 const WorkloadInfo& info) const
709 {
710 return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info);
711 }
712
CreateConvolution2d(const Convolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const713 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
714 const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
715 {
716 bool isFastMathEnabled = false;
717 if (m_ModelContextPtr)
718 {
719 if (m_ModelContextPtr.get() != nullptr)
720 {
721 auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
722 if (modelOptions)
723 {
724 isFastMathEnabled = modelOptions->IsFastMathEnabled();
725 }
726 }
727 }
728 return std::make_unique<NeonConvolution2dWorkload>(descriptor,
729 info,
730 m_MemoryManager->GetIntraLayerManager(),
731 isFastMathEnabled);
732 }
733
CreateConvolution3d(const Convolution3dQueueDescriptor & descriptor,const WorkloadInfo & info) const734 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution3d(
735 const Convolution3dQueueDescriptor& descriptor, const WorkloadInfo& info) const
736 {
737 bool isFastMathEnabled = false;
738 if (m_ModelContextPtr)
739 {
740 if (m_ModelContextPtr.get() != nullptr)
741 {
742 auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
743 if (modelOptions)
744 {
745 isFastMathEnabled = modelOptions->IsFastMathEnabled();
746 }
747 }
748 }
749 return std::make_unique<NeonConvolution3dWorkload>(descriptor,
750 info,
751 m_MemoryManager->GetIntraLayerManager(),
752 isFastMathEnabled);
753 }
754
CreateDebug(const DebugQueueDescriptor & descriptor,const WorkloadInfo & info) const755 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
756 const WorkloadInfo& info) const
757 {
758 return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
759 }
760
CreateDepthToSpace(const DepthToSpaceQueueDescriptor & descriptor,const WorkloadInfo & info) const761 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
762 const WorkloadInfo& info) const
763 {
764 return std::make_unique<NeonDepthToSpaceWorkload>(descriptor, info);
765 }
766
CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const767 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
768 const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
769 {
770 return std::make_unique<NeonDepthwiseConvolutionWorkload>(descriptor, info);
771 }
772
CreateDequantize(const DequantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const773 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
774 const WorkloadInfo& info) const
775 {
776 return std::make_unique<NeonDequantizeWorkload>(descriptor, info);
777 }
778
CreateDetectionPostProcess(const armnn::DetectionPostProcessQueueDescriptor & descriptor,const armnn::WorkloadInfo & info) const779 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDetectionPostProcess(
780 const armnn::DetectionPostProcessQueueDescriptor& descriptor, const armnn::WorkloadInfo& info) const
781 {
782 return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
783 }
784
CreateDivision(const DivisionQueueDescriptor & descriptor,const WorkloadInfo & info) const785 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
786 const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
787 {
788 return std::make_unique<NeonDivisionWorkload>(descriptor, info);
789 }
790
CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor & descriptor,const WorkloadInfo & info) const791 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateElementwiseUnary(
792 const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) const
793 {
794 switch(descriptor.m_Parameters.m_Operation)
795 {
796 case UnaryOperation::Abs:
797 {
798 AbsQueueDescriptor absQueueDescriptor;
799 absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
800 absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
801
802 return std::make_unique<NeonAbsWorkload>(absQueueDescriptor, info);
803 }
804 case UnaryOperation::Exp:
805 return std::make_unique<NeonExpWorkload>(descriptor, info);
806 case UnaryOperation::LogicalNot:
807 return std::make_unique<NeonLogicalNotWorkload>(descriptor, info);
808 case UnaryOperation::Log:
809 return std::make_unique<NeonLogWorkload>(descriptor, info);
810 case UnaryOperation::Neg:
811 return std::make_unique<NeonNegWorkload>(descriptor, info);
812 case UnaryOperation::Rsqrt:
813 {
814 RsqrtQueueDescriptor rsqrtQueueDescriptor;
815 rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
816 rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
817
818 return std::make_unique<NeonRsqrtWorkload>(rsqrtQueueDescriptor, info);
819 }
820 case UnaryOperation::Sin:
821 return std::make_unique<NeonSinWorkload>(descriptor, info);
822 default:
823 return nullptr;
824 }
825 }
826
CreateFill(const FillQueueDescriptor & descriptor,const WorkloadInfo & info) const827 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
828 const WorkloadInfo& info) const
829 {
830 return std::make_unique<NeonFillWorkload>(descriptor, info);
831 }
832
CreateFloor(const FloorQueueDescriptor & descriptor,const WorkloadInfo & info) const833 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
834 const WorkloadInfo& info) const
835 {
836 return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(descriptor, info);
837 }
838
CreateFullyConnected(const FullyConnectedQueueDescriptor & descriptor,const WorkloadInfo & info) const839 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
840 const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
841 {
842 return std::make_unique<NeonFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
843 }
844
CreateGather(const armnn::GatherQueueDescriptor & descriptor,const armnn::WorkloadInfo & info) const845 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateGather(const armnn::GatherQueueDescriptor& descriptor,
846 const armnn::WorkloadInfo& info) const
847 {
848 return std::make_unique<NeonGatherWorkload>(descriptor, info);
849 }
850
CreateInput(const InputQueueDescriptor & descriptor,const WorkloadInfo & info) const851 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
852 const WorkloadInfo& info) const
853 {
854 return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
855 }
856
CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const857 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInstanceNormalization(
858 const InstanceNormalizationQueueDescriptor& descriptor,
859 const WorkloadInfo& info) const
860 {
861 return std::make_unique<NeonInstanceNormalizationWorkload>(descriptor, info);
862 }
863
CreateL2Normalization(const L2NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const864 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
865 const WorkloadInfo& info) const
866 {
867 return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info,
868 m_MemoryManager->GetIntraLayerManager());
869 }
870
CreateLogSoftmax(const LogSoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const871 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
872 const WorkloadInfo& info) const
873 {
874 return std::make_unique<NeonLogSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
875 }
876
CreateLogicalBinary(const LogicalBinaryQueueDescriptor & descriptor,const WorkloadInfo & info) const877 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
878 const WorkloadInfo& info) const
879 {
880 switch(descriptor.m_Parameters.m_Operation)
881 {
882 case LogicalBinaryOperation::LogicalAnd:
883 return std::make_unique<NeonLogicalAndWorkload>(descriptor, info);
884 case LogicalBinaryOperation::LogicalOr:
885 return std::make_unique<NeonLogicalOrWorkload>(descriptor, info);
886 default:
887 return nullptr;
888 }
889 }
890
CreateLstm(const LstmQueueDescriptor & descriptor,const WorkloadInfo & info) const891 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
892 const WorkloadInfo& info) const
893 {
894 return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(descriptor, info);
895 }
896
CreateMaximum(const MaximumQueueDescriptor & descriptor,const WorkloadInfo & info) const897 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
898 const WorkloadInfo& info) const
899 {
900 return std::make_unique<NeonMaximumWorkload>(descriptor, info);
901 }
902
CreateMean(const MeanQueueDescriptor & descriptor,const WorkloadInfo & info) const903 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
904 const WorkloadInfo& info) const
905 {
906 return std::make_unique<NeonMeanWorkload>(descriptor, info);
907 }
908
CreateMemCopy(const MemCopyQueueDescriptor & descriptor,const WorkloadInfo & info) const909 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
910 const WorkloadInfo& info) const
911 {
912 if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
913 {
914 throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
915 }
916
917 return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
918 }
919
CreateMemImport(const MemImportQueueDescriptor & descriptor,const WorkloadInfo & info) const920 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
921 const WorkloadInfo& info) const
922 {
923 if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
924 {
925 throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload");
926 }
927
928 return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
929 }
930
CreateMinimum(const MinimumQueueDescriptor & descriptor,const WorkloadInfo & info) const931 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
932 const WorkloadInfo& info) const
933 {
934 return std::make_unique<NeonMinimumWorkload>(descriptor, info);
935 }
936
CreateMultiplication(const MultiplicationQueueDescriptor & descriptor,const WorkloadInfo & info) const937 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
938 const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
939 {
940 return std::make_unique<NeonMultiplicationWorkload>(descriptor, info);
941 }
942
CreateNormalization(const NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const943 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
944 const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
945 {
946 return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info,
947 m_MemoryManager->GetIntraLayerManager());
948 }
949
CreateOutput(const OutputQueueDescriptor & descriptor,const WorkloadInfo & info) const950 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
951 const WorkloadInfo& info) const
952 {
953 return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
954 }
955
CreatePad(const PadQueueDescriptor & descriptor,const WorkloadInfo & info) const956 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
957 const WorkloadInfo& info) const
958 {
959 return std::make_unique<NeonPadWorkload>(descriptor, info);
960 }
961
CreatePermute(const PermuteQueueDescriptor & descriptor,const WorkloadInfo & info) const962 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
963 const WorkloadInfo& info) const
964 {
965 return std::make_unique<NeonPermuteWorkload>(descriptor, info);
966 }
967
CreatePooling2d(const Pooling2dQueueDescriptor & descriptor,const WorkloadInfo & info) const968 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
969 const WorkloadInfo& info) const
970 {
971 return std::make_unique<NeonPooling2dWorkload>(descriptor, info);
972 }
973
CreatePreCompiled(const PreCompiledQueueDescriptor & descriptor,const WorkloadInfo & info) const974 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
975 const WorkloadInfo& info) const
976 {
977 return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
978 }
979
CreatePrelu(const armnn::PreluQueueDescriptor & descriptor,const armnn::WorkloadInfo & info) const980 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePrelu(const armnn::PreluQueueDescriptor &descriptor,
981 const armnn::WorkloadInfo &info) const
982 {
983 return std::make_unique<NeonPreluWorkload>(descriptor, info);
984 }
985
CreateQLstm(const QLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const986 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
987 const WorkloadInfo& info) const
988 {
989 return std::make_unique<NeonQLstmWorkload>(descriptor, info);
990 }
991
CreateQuantize(const QuantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const992 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
993 const WorkloadInfo& info) const
994 {
995 return std::make_unique<NeonQuantizeWorkload>(descriptor, info);
996 }
997
CreateQuantizedLstm(const QuantizedLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const998 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
999 const WorkloadInfo& info) const
1000 {
1001 return std::make_unique<NeonQuantizedLstmWorkload>(descriptor, info);
1002 }
1003
CreateRank(const RankQueueDescriptor & descriptor,const WorkloadInfo & info) const1004 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
1005 const WorkloadInfo& info) const
1006 {
1007 return std::make_unique<NeonRankWorkload>(descriptor, info);
1008 }
1009
CreateReduce(const ReduceQueueDescriptor & descriptor,const WorkloadInfo & info) const1010 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
1011 const WorkloadInfo& info) const
1012 {
1013 return std::make_unique<NeonReduceWorkload>(descriptor, info);
1014 }
1015
CreateReshape(const ReshapeQueueDescriptor & descriptor,const WorkloadInfo & info) const1016 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
1017 const WorkloadInfo& info) const
1018 {
1019 return std::make_unique<NeonReshapeWorkload>(descriptor, info);
1020 }
1021
CreateResize(const ResizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1022 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
1023 const WorkloadInfo& info) const
1024 {
1025 return std::make_unique<NeonResizeWorkload>(descriptor, info);
1026 }
1027
CreateSlice(const SliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1028 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
1029 const WorkloadInfo& info) const
1030 {
1031 return std::make_unique<NeonSliceWorkload>(descriptor, info);
1032 }
1033
CreateSoftmax(const SoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const1034 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
1035 const WorkloadInfo& info) const
1036 {
1037 return std::make_unique<NeonSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
1038 }
1039
CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor & descriptor,const WorkloadInfo & info) const1040 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
1041 const WorkloadInfo& info) const
1042 {
1043 return std::make_unique<NeonSpaceToBatchNdWorkload>(descriptor, info);
1044 }
1045
CreateSpaceToDepth(const SpaceToDepthQueueDescriptor & descriptor,const WorkloadInfo & info) const1046 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
1047 const WorkloadInfo& info) const
1048 {
1049 return std::make_unique<NeonSpaceToDepthWorkload>(descriptor, info);
1050 }
1051
CreateSplitter(const SplitterQueueDescriptor & descriptor,const WorkloadInfo & info) const1052 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
1053 const WorkloadInfo& info) const
1054 {
1055 return std::make_unique<NeonSplitterWorkload>(descriptor, info);
1056 }
1057
CreateStack(const StackQueueDescriptor & descriptor,const WorkloadInfo & info) const1058 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
1059 const WorkloadInfo& info) const
1060 {
1061 return std::make_unique<NeonStackWorkload>(descriptor, info);
1062 }
1063
CreateStridedSlice(const StridedSliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1064 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
1065 const WorkloadInfo& info) const
1066 {
1067 return std::make_unique<NeonStridedSliceWorkload>(descriptor, info);
1068 }
1069
CreateSubtraction(const SubtractionQueueDescriptor & descriptor,const WorkloadInfo & info) const1070 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction(
1071 const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const
1072 {
1073 return std::make_unique<NeonSubtractionWorkload>(descriptor, info);
1074 }
1075
CreateTranspose(const TransposeQueueDescriptor & descriptor,const WorkloadInfo & info) const1076 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
1077 const WorkloadInfo& info) const
1078 {
1079 return std::make_unique<NeonTransposeWorkload>(descriptor, info);
1080 }
1081
CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1082 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateTransposeConvolution2d(
1083 const TransposeConvolution2dQueueDescriptor &descriptor,
1084 const WorkloadInfo &info) const
1085 {
1086 return std::make_unique<NeonTransposeConvolution2dWorkload>(descriptor, info,
1087 m_MemoryManager->GetIntraLayerManager());
1088 }
1089
1090 } // namespace armnn
1091