xref: /aosp_15_r20/external/armnn/src/backends/cl/workloads/ClGatherNdWorkload.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ClGatherNdWorkload.hpp"
7 #include "ClWorkloadUtils.hpp"
8 #include "backendsCommon/WorkloadUtils.hpp"
9 #include <aclCommon/ArmComputeUtils.hpp>
10 #include <cl/ClTensorHandle.hpp>
11 
12 using namespace armnn::armcomputetensorutils;
13 
14 namespace armnn
15 {
ClGatherNdWorkloadValidate(const TensorInfo & paramsInfo,const TensorInfo & indicesInfo,const TensorInfo & outputInfo)16 arm_compute::Status ClGatherNdWorkloadValidate(const TensorInfo& paramsInfo,
17                                                const TensorInfo& indicesInfo,
18                                                const TensorInfo& outputInfo)
19 {
20     // Calculate ND, K, W, C.
21     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
22 
23     /// Validate Mul
24     // Indices with shape { W, ND }
25     armnn::TensorInfo indices_W_ND_Info = indicesInfo;
26     indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
27     const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
28 
29     // Flattened coefficients with shape { ND }
30     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
31     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
32     const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
33 
34     // Output of Mul with shape { W, ND }
35     const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
36 
37     auto statusMul = arm_compute::CLPixelWiseMultiplication::validate(&aclIndicesInfo,
38                                                                       &aclFlattenedCoeffInfo,
39                                                                       &aclOutputMulInfo,
40                                                                       1.0f,
41                                                                       arm_compute::ConvertPolicy::WRAP,
42                                                                       arm_compute::RoundingPolicy::TO_ZERO,
43                                                                       arm_compute::ActivationLayerInfo());
44 
45     /// Validate ReduceSum
46     // Flattened indices with shape { W }
47     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
48     flattenedIndices_Info.SetShape({ keyIndices["W"] });
49     const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
50 
51     const std::vector<unsigned int> armnnReduceAxes(1, 1);
52     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
53                                                                           indices_W_ND_Info.GetNumDimensions(),
54                                                                           armnnReduceAxes);
55 
56     auto statusReduceSum = arm_compute::CLReductionOperation::validate(&aclOutputMulInfo,
57                                                                        &aclFlattenedIndicesInfo,
58                                                                        static_cast<unsigned int>(coords[0]),
59                                                                        arm_compute::ReductionOperation::SUM,
60                                                                        false);
61 
62     /// Validate Gather
63     // Params with shape { K, C }
64     armnn::TensorInfo params_K_C_Info =  paramsInfo;
65     params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
66     const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
67 
68     // Output of gather with shape { W, C }
69     armnn::TensorInfo outputGather_Info = outputInfo;
70     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
71     const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
72 
73     auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
74     auto statusGather =
75             arm_compute::CLGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
76 
77     /// Validate Reshape
78     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
79 
80     auto statusReshape = arm_compute::CLReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
81 
82     /// Return OK if all the layers are valid
83     auto okCode = arm_compute::ErrorCode::OK;
84     if (statusMul.error_code()       == okCode &&
85         statusReduceSum.error_code() == okCode &&
86         statusGather.error_code()    == okCode &&
87         statusReshape.error_code()   == okCode)
88     {
89         return arm_compute::Status(arm_compute::ErrorCode::OK,
90                                    "All GatherND layers validate status OK.");
91     }
92     else
93     {
94         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
95                                    "GatherND layer validate status failed.");
96     }
97 }
98 
ClGatherNdWorkload(const GatherNdQueueDescriptor & descriptor,const WorkloadInfo & info,const arm_compute::CLCompileContext & clCompileContext)99 ClGatherNdWorkload::ClGatherNdWorkload(const GatherNdQueueDescriptor& descriptor,
100                                        const WorkloadInfo& info,
101                                        const arm_compute::CLCompileContext& clCompileContext)
102         : ClBaseWorkload<GatherNdQueueDescriptor>(descriptor, info)
103 {
104     m_Data.ValidateInputsOutputs("ClGatherNdWorkload", 2, 1);
105 
106     TensorInfo paramsInfo  = info.m_InputTensorInfos[0];
107     TensorInfo indicesInfo = info.m_InputTensorInfos[1];
108     TensorInfo outputInfo  = info.m_OutputTensorInfos[0];
109 
110     arm_compute::ICLTensor& input   = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
111     arm_compute::ICLTensor& indices = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
112     arm_compute::ICLTensor& output  = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
113 
114     // Calculate ND, K, W, C.
115     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
116 
117     /// Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff.
118     /// This could be done using MatMul instead of multiplication followed by reduce sum operation,
119     /// but GeMM does not support s32 at the moment.
120 
121     // Prepare the tensor to store the output of the reduce_sum operation
122     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
123     flattenedIndices_Info.SetShape({ keyIndices["W"] });
124     BuildArmComputeTensor(m_FlattenedIndices, flattenedIndices_Info);
125     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedIndices);
126 
127     // Reshape indices into { W, ND }
128     indices.info()->set_tensor_shape(BuildArmComputeTensorShape({ keyIndices["W"], keyIndices["ND"] }));
129 
130     // Calculate the m_FlattenedCoeff
131     TensorShape paramsShape = paramsInfo.GetShape();
132     std::vector<int32_t> flattenedCoeff(keyIndices["ND"], 1);
133     for (unsigned int i = 1; i < keyIndices["ND"]; ++i)
134     {
135         flattenedCoeff[i - 1] = static_cast<int32_t>(paramsShape[i]);
136     }
137     for (unsigned int i = keyIndices["ND"] - 1; i > 0; --i)
138     {
139         flattenedCoeff[i - 1] *= flattenedCoeff[i];
140     }
141     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
142     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
143     BuildArmComputeTensor(m_FlattenedCoeff, flattenedCoeff_Info);
144     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedCoeff);
145     ARMNN_ASSERT_MSG(indicesInfo.GetDataType() == DataType::Signed32,
146                      "flattenedCoeff must be same data type as m_FlattenedCoeff");
147     CopyArmComputeClTensorData<int32_t>(m_FlattenedCoeff, flattenedCoeff.data());
148 
149     // Prepare the tensor to store the output of the multiplication
150     armnn::TensorInfo outputMul_Info = indicesInfo;
151     outputMul_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
152     BuildArmComputeTensor(m_OutputMul, outputMul_Info);
153     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputMul);
154 
155     // Multiply
156     m_MulLayer.configure(clCompileContext,
157                          &indices,
158                          &m_FlattenedCoeff,
159                          &m_OutputMul,
160                          1.0f,
161                          arm_compute::ConvertPolicy::WRAP,
162                          arm_compute::RoundingPolicy::TO_ZERO,
163                          arm_compute::ActivationLayerInfo());
164 
165     // Reduce Sum
166     const std::vector<unsigned int> armnnReduceAxes(1, 1);
167     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(m_OutputMul.info()->num_dimensions(),
168                                                                           outputMul_Info.GetNumDimensions(),
169                                                                           armnnReduceAxes);
170     m_ReduceSumLayer.configure(clCompileContext,
171                                &m_OutputMul,
172                                &m_FlattenedIndices,
173                                static_cast<unsigned int>(coords[0]),
174                                arm_compute::ReductionOperation::SUM,
175                                false);
176 
177     /// Call Gather with adequate shapes
178     // Reshape params into { K, C }
179     paramsInfo.SetShape({ keyIndices["K"], keyIndices["C"] });
180     input.info()->set_tensor_shape(BuildArmComputeTensorShape(paramsInfo.GetShape()));
181 
182     // Reshape output to have the shape given by gather { W, C }
183     // (the original outputInfo has the shape given by gatherNd)
184     armnn::TensorInfo outputGather_Info = outputInfo;
185     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
186     BuildArmComputeTensor(m_OutputGather, outputGather_Info);
187     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputGather);
188     {
189         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClGatherNdWorkload_configure");
190         auto aclAxis = ComputeAclAxis(0, paramsInfo);
191         m_GatherLayer.configure(clCompileContext, &input, &m_FlattenedIndices, &m_OutputGather, aclAxis);
192     }
193 
194     // Reshape output to the original output shape
195     m_ReshapeLayer.configure(clCompileContext, &m_OutputGather, &output);
196 };
197 
Execute() const198 void ClGatherNdWorkload::Execute() const
199 {
200     ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClGatherNdWorkload_Execute", this->GetGuid());
201     RunClFunction(m_MulLayer, CHECK_LOCATION());
202     RunClFunction(m_ReduceSumLayer, CHECK_LOCATION());
203     RunClFunction(m_GatherLayer, CHECK_LOCATION());
204     RunClFunction(m_ReshapeLayer, CHECK_LOCATION());
205 }
206 } // namespace armnn
207