xref: /aosp_15_r20/external/armnn/src/backends/neon/workloads/NeonGatherNdWorkload.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonGatherNdWorkload.hpp"
7 #include "NeonWorkloadUtils.hpp"
8 #include <armnn/utility/PolymorphicDowncast.hpp>
9 #include <aclCommon/ArmComputeUtils.hpp>
10 #include "backendsCommon/WorkloadUtils.hpp"
11 
12 namespace armnn
13 {
NeonGatherNdWorkloadValidate(const TensorInfo & paramsInfo,const TensorInfo & indicesInfo,const TensorInfo & outputInfo)14 arm_compute::Status NeonGatherNdWorkloadValidate(const TensorInfo& paramsInfo,
15                                                  const TensorInfo& indicesInfo,
16                                                  const TensorInfo& outputInfo)
17 {
18     // Calculate ND, K, W, C.
19     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
20 
21     /// Validate Mul
22     // Indices with shape { W, ND }
23     armnn::TensorInfo indices_W_ND_Info = indicesInfo;
24     indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
25     const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
26 
27     // Flattened coefficients with shape { ND }
28     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
29     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
30     const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
31 
32     // Output of Mul with shape { W, ND }
33     const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
34 
35     auto statusMul = arm_compute::NEPixelWiseMultiplication::validate(&aclIndicesInfo,
36                                                                       &aclFlattenedCoeffInfo,
37                                                                       &aclOutputMulInfo,
38                                                                       1.0f,
39                                                                       arm_compute::ConvertPolicy::WRAP,
40                                                                       arm_compute::RoundingPolicy::TO_ZERO,
41                                                                       arm_compute::ActivationLayerInfo());
42 
43     /// Validate ReduceSum
44     // Flattened indices with shape { W }
45     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
46     flattenedIndices_Info.SetShape({ keyIndices["W"] });
47     const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
48 
49     const std::vector<unsigned int> armnnReduceAxes(1, 1);
50     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
51                                                                           indices_W_ND_Info.GetNumDimensions(),
52                                                                           armnnReduceAxes);
53 
54     auto statusReduceSum = arm_compute::NEReductionOperation::validate(&aclOutputMulInfo,
55                                                                        &aclFlattenedIndicesInfo,
56                                                                        static_cast<unsigned int>(coords[0]),
57                                                                        arm_compute::ReductionOperation::SUM,
58                                                                        false);
59 
60     /// Validate Gather
61     // Params with shape { K, C }
62     armnn::TensorInfo params_K_C_Info = paramsInfo;
63     params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
64     const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
65 
66     // Output of gather with shape { W, C }
67     armnn::TensorInfo outputGather_Info = outputInfo;
68     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
69     const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
70 
71     auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
72     auto statusGather =
73             arm_compute::NEGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
74 
75     /// Validate Reshape
76     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
77 
78     auto statusReshape = arm_compute::NEReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
79 
80     /// Return OK if all the layers are valid
81     auto okCode = arm_compute::ErrorCode::OK;
82     if (statusMul.error_code()       == okCode &&
83         statusReduceSum.error_code() == okCode &&
84         statusGather.error_code()    == okCode &&
85         statusReshape.error_code()   == okCode)
86     {
87         return arm_compute::Status(arm_compute::ErrorCode::OK,
88                                    "All GatherND layers validate status OK.");
89     }
90     else
91     {
92         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
93                                    "GatherND layer validate status failed.");
94     }
95 }
96 
NeonGatherNdWorkload(const GatherNdQueueDescriptor & descriptor,const WorkloadInfo & info)97 NeonGatherNdWorkload::NeonGatherNdWorkload(const GatherNdQueueDescriptor& descriptor,
98                                            const WorkloadInfo& info)
99         : NeonBaseWorkload<GatherNdQueueDescriptor>(descriptor, info)
100 {
101     m_Data.ValidateInputsOutputs("NeonGatherNdWorkload", 2, 1);
102 
103     TensorInfo paramsInfo  = info.m_InputTensorInfos[0];
104     TensorInfo indicesInfo = info.m_InputTensorInfos[1];
105     TensorInfo outputInfo  = info.m_OutputTensorInfos[0];
106 
107     arm_compute::ITensor& input   = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
108     arm_compute::ITensor& indices = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
109     arm_compute::ITensor& output  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
110 
111     // Calculate ND, K, W, C.
112     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
113 
114     /// Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff.
115     /// This could be done using MatMul instead of multiplication followed by reduce sum operation,
116     /// but GeMM does not support s32 at the moment.
117 
118     // Prepare the tensor to store the output of the reduce_sum operation
119     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
120     flattenedIndices_Info.SetShape({ keyIndices["W"] });
121     BuildArmComputeTensor(m_FlattenedIndices, flattenedIndices_Info);
122     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedIndices);
123 
124     // Reshape indices into { W, ND }
125     indices.info()->set_tensor_shape(BuildArmComputeTensorShape({ keyIndices["W"], keyIndices["ND"] }));
126 
127     // Calculate the m_FlattenedCoeff
128     TensorShape paramsShape = paramsInfo.GetShape();
129     std::vector<int32_t> flattenedCoeff(keyIndices["ND"], 1);
130     for (unsigned int i = 1; i < keyIndices["ND"]; ++i)
131     {
132         flattenedCoeff[i - 1] = static_cast<int32_t>(paramsShape[i]);
133     }
134     for (unsigned int i = keyIndices["ND"] - 1; i > 0; --i)
135     {
136         flattenedCoeff[i - 1] *= flattenedCoeff[i];
137     }
138     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
139     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
140     BuildArmComputeTensor(m_FlattenedCoeff, flattenedCoeff_Info);
141     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedCoeff);
142     ARMNN_ASSERT_MSG(indicesInfo.GetDataType() == DataType::Signed32,
143                      "flattenedCoeff must be same data type as m_FlattenedCoeff");
144     CopyArmComputeITensorData<int32_t>(flattenedCoeff.data(), m_FlattenedCoeff);
145 
146     // Prepare the tensor to store the output of the multiplication
147     armnn::TensorInfo outputMul_Info = indicesInfo;
148     outputMul_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
149     BuildArmComputeTensor(m_OutputMul, outputMul_Info);
150     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputMul);
151 
152     // Multiply
153     m_MulLayer.configure(&indices,
154                          &m_FlattenedCoeff,
155                          &m_OutputMul,
156                          1.0f,
157                          arm_compute::ConvertPolicy::WRAP,
158                          arm_compute::RoundingPolicy::TO_ZERO,
159                          arm_compute::ActivationLayerInfo());
160 
161     // Reduce Sum
162     const std::vector<unsigned int> armnnReduceAxes(1, 1);
163     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(m_OutputMul.info()->num_dimensions(),
164                                                                           outputMul_Info.GetNumDimensions(),
165                                                                           armnnReduceAxes);
166     m_ReduceSumLayer.configure(&m_OutputMul,
167                                &m_FlattenedIndices,
168                                static_cast<unsigned int>(coords[0]),
169                                arm_compute::ReductionOperation::SUM,
170                                false);
171 
172     /// Call Gather with adequate shapes
173     // Reshape params into { K, C }
174     paramsInfo.SetShape({ keyIndices["K"], keyIndices["C"] });
175     input.info()->set_tensor_shape(BuildArmComputeTensorShape(paramsInfo.GetShape()));
176 
177     // Reshape output to have the shape given by gather { W, C }
178     // (the original outputInfo has the shape given by gatherNd)
179     armnn::TensorInfo outputGather_Info = outputInfo;
180     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
181     BuildArmComputeTensor(m_OutputGather, outputGather_Info);
182     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputGather);
183 
184     m_GatherLayer.configure(&input, &m_FlattenedIndices, &m_OutputGather, ComputeAclAxis(0, paramsInfo));
185 
186     // Reshape output to the original output shape
187     m_ReshapeLayer.configure(&m_OutputGather, &output);
188 }
189 
Execute() const190 void NeonGatherNdWorkload::Execute() const
191 {
192     ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonGatherNdWorkload_Execute", this->GetGuid());
193     m_MulLayer.run();
194     m_ReduceSumLayer.run();
195     m_GatherLayer.run();
196     m_ReshapeLayer.run();
197 }
198 } //namespace armnn