xref: /aosp_15_r20/external/armnn/src/backends/cl/workloads/ClWorkloadUtils.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include <BFloat16.hpp>
8 #include <Half.hpp>
9 
10 #include <aclCommon/ArmComputeTensorUtils.hpp>
11 #include <cl/OpenClTimer.hpp>
12 #include <armnn/backends/TensorHandle.hpp>
13 
14 #include <armnn/Utils.hpp>
15 
16 #include <arm_compute/runtime/CL/CLTensor.h>
17 #include <arm_compute/runtime/IFunction.h>
18 
19 #include <sstream>
20 
21 #define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \
22     ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \
23                                                   armnn::EmptyOptional(), \
24                                                   name, \
25                                                   armnn::OpenClTimer(), \
26                                                   armnn::WallClockTimer())
27 
28 #define ARMNN_SCOPED_PROFILING_EVENT_CL_GUID(name, guid) \
29     ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \
30                                                   guid, \
31                                                   name, \
32                                                   armnn::OpenClTimer(), \
33                                                   armnn::WallClockTimer())
34 
35 namespace armnn
36 {
37 
GetConvolutionMethodString(arm_compute::ConvolutionMethod & convolutionMethod)38 inline std::string GetConvolutionMethodString(arm_compute::ConvolutionMethod& convolutionMethod)
39 {
40     switch (convolutionMethod)
41     {
42         case arm_compute::ConvolutionMethod::FFT:
43             return "FFT";
44         case arm_compute::ConvolutionMethod::DIRECT:
45             return "Direct";
46         case arm_compute::ConvolutionMethod::GEMM:
47             return "GEMM";
48         case arm_compute::ConvolutionMethod::WINOGRAD:
49             return "Winograd";
50         default:
51             return "Unknown";
52     }
53 }
54 
55 template <typename T>
CopyArmComputeClTensorData(arm_compute::CLTensor & dstTensor,const T * srcData)56 void CopyArmComputeClTensorData(arm_compute::CLTensor& dstTensor, const T* srcData)
57 {
58     {
59         ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting");
60         dstTensor.map(true);
61     }
62 
63     {
64         ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor");
65         armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor);
66     }
67 
68     dstTensor.unmap();
69 }
70 
SetClStridedSliceData(const std::vector<int> & m_begin,const std::vector<int> & m_end,const std::vector<int> & m_stride)71 inline auto SetClStridedSliceData(const std::vector<int>& m_begin,
72                                   const std::vector<int>& m_end,
73                                   const std::vector<int>& m_stride)
74 {
75     arm_compute::Coordinates starts;
76     arm_compute::Coordinates ends;
77     arm_compute::Coordinates strides;
78 
79     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
80 
81     for (unsigned int i = 0; i < num_dims; i++) {
82         unsigned int revertedIndex = num_dims - i - 1;
83 
84         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
85         ends.set(i, static_cast<int>(m_end[revertedIndex]));
86         strides.set(i, static_cast<int>(m_stride[revertedIndex]));
87     }
88 
89     return std::make_tuple(starts, ends, strides);
90 }
91 
SetClSliceData(const std::vector<unsigned int> & m_begin,const std::vector<unsigned int> & m_size)92 inline auto SetClSliceData(const std::vector<unsigned int>& m_begin,
93                            const std::vector<unsigned int>& m_size)
94 {
95     // This function must translate the size vector given to an end vector
96     // expected by the ACL NESlice workload
97     arm_compute::Coordinates starts;
98     arm_compute::Coordinates ends;
99 
100     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
101 
102     // For strided slices, we have the relationship size = (end - begin) / stride
103     // For slice, we assume stride to be a vector of all ones, yielding the formula
104     // size = (end - begin) therefore we know end = size + begin
105     for (unsigned int i = 0; i < num_dims; i++)
106     {
107         unsigned int revertedIndex = num_dims - i - 1;
108 
109         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
110         ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
111     }
112 
113     return std::make_tuple(starts, ends);
114 }
115 
InitializeArmComputeClTensorData(arm_compute::CLTensor & clTensor,const ConstTensorHandle * handle)116 inline void InitializeArmComputeClTensorData(arm_compute::CLTensor& clTensor,
117                                              const ConstTensorHandle* handle)
118 {
119     ARMNN_ASSERT(handle);
120 
121     armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor);
122     switch(handle->GetTensorInfo().GetDataType())
123     {
124         case DataType::Float16:
125             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>());
126             break;
127         case DataType::Float32:
128             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<float>());
129             break;
130         case DataType::QAsymmU8:
131             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<uint8_t>());
132             break;
133         case DataType::QAsymmS8:
134         case DataType::QSymmS8:
135             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int8_t>());
136             break;
137         case DataType::QSymmS16:
138             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int16_t>());
139             break;
140         case DataType::Signed32:
141             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int32_t>());
142             break;
143         case DataType::BFloat16:
144             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::BFloat16>());
145             break;
146         default:
147             // Throw exception; assertion not called in release build.
148             throw Exception("Unexpected tensor type during InitializeArmComputeClTensorData().");
149     }
150 };
151 
WrapClError(const cl::Error & clError,const CheckLocation & location)152 inline RuntimeException WrapClError(const cl::Error& clError, const CheckLocation& location)
153 {
154     std::stringstream message;
155     message << "CL error: " << clError.what() << ". Error code: " << clError.err();
156 
157     return RuntimeException(message.str(), location);
158 }
159 
RunClFunction(arm_compute::IFunction & function,const CheckLocation & location)160 inline void RunClFunction(arm_compute::IFunction& function, const CheckLocation& location)
161 {
162     try
163     {
164         function.run();
165     }
166     catch (cl::Error& error)
167     {
168         throw WrapClError(error, location);
169     }
170 }
171 
172 template <typename DataType, typename PayloadType>
GetOutputTensorData(unsigned int idx,const PayloadType & data)173 DataType* GetOutputTensorData(unsigned int idx, const PayloadType& data)
174 {
175     ITensorHandle* tensorHandle = data.m_Outputs[idx];
176     return reinterpret_cast<DataType*>(tensorHandle->Map());
177 }
178 
179 } //namespace armnn
180