xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/depthwise_conv_3x3.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_DEPTHWISE_CONV_3X3_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_DEPTHWISE_CONV_3X3_H_
18 
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
25 #include "tensorflow/lite/delegates/gpu/common/operations.h"
26 #include "tensorflow/lite/delegates/gpu/common/shape.h"
27 #include "tensorflow/lite/delegates/gpu/common/status.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
30 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
32 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
33 #include "tensorflow/lite/delegates/gpu/common/types.h"
34 
35 namespace tflite {
36 namespace gpu {
37 
38 class DepthwiseConv3x3 : public GPUOperation {
39  public:
40   DepthwiseConv3x3() = default;
41   void GetPossibleKernelWorkGroups(
42       TuningType tuning_type, const GpuInfo& gpu_info,
43       const KernelInfo& kernel_info,
44       std::vector<int3>* work_groups) const override;
45   int3 GetGridSize() const override;
46 
47   // Move only
48   DepthwiseConv3x3(DepthwiseConv3x3&& operation);
49   DepthwiseConv3x3& operator=(DepthwiseConv3x3&& operation);
50   DepthwiseConv3x3(const DepthwiseConv3x3&) = delete;
51   DepthwiseConv3x3& operator=(const DepthwiseConv3x3&) = delete;
52 
53  private:
54   explicit DepthwiseConv3x3(const OperationDef& definition,
55                             bool weights_are_buffer, bool local_mem_uploads,
56                             const GpuInfo& gpu_info);
57   template <DataType T>
58   void UploadWeightsAndBiases(const tflite::gpu::Tensor<OHWI, T>& weights,
59                               const tflite::gpu::Tensor<Linear, T>& biases,
60                               bool weights_are_buffer);
61 
62   friend DepthwiseConv3x3 CreateDepthwiseConv3x3(
63       const GpuInfo& gpu_info, const OperationDef& definition,
64       const DepthwiseConvolution2DAttributes& attr);
65 
66   template <DataType S, typename T>
67   void RearrangeWeightsAndBiasesData(
68       const tflite::gpu::Tensor<OHWI, S>& weights,
69       const tflite::gpu::Tensor<Linear, S>& biases, absl::Span<T> dst);
70 
71   std::string GenerateDepthwiseConvCode(const GpuInfo& gpu_info,
72                                         const OperationDef& op_def,
73                                         bool weights_are_buffer,
74                                         bool local_mem_uploads);
75 
76   bool local_mem_uploads_;
77 };
78 
79 template <DataType T>
UploadWeightsAndBiases(const tflite::gpu::Tensor<OHWI,T> & weights,const tflite::gpu::Tensor<Linear,T> & biases,bool weights_are_buffer)80 void DepthwiseConv3x3::UploadWeightsAndBiases(
81     const tflite::gpu::Tensor<OHWI, T>& weights,
82     const tflite::gpu::Tensor<Linear, T>& biases, bool weights_are_buffer) {
83   const int src_depth = DivideRoundUp(weights.shape.i, 4);
84   int texture_width = 10;  // 3x3 kernel + 1 bias
85   int texture_height = src_depth;
86   const int elements_count = texture_width * texture_height;
87   const bool fp32_weights = definition_.precision == CalculationsPrecision::F32;
88   const int float4_size = fp32_weights ? 16 : 8;
89 
90   std::vector<uint8_t> data(float4_size * elements_count);
91   if (fp32_weights) {
92     float4* ptr = reinterpret_cast<float4*>(data.data());
93     RearrangeWeightsAndBiasesData(weights, biases,
94                                   absl::MakeSpan(ptr, elements_count));
95   } else {
96     half4* ptr = reinterpret_cast<half4*>(data.data());
97     RearrangeWeightsAndBiasesData(weights, biases,
98                                   absl::MakeSpan(ptr, elements_count));
99   }
100 
101   if (weights_are_buffer) {
102     BufferDescriptor desc;
103     desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
104     desc.element_size = 4;
105     desc.size = float4_size * elements_count;
106     desc.data = std::move(data);
107     args_.AddObject("weights",
108                     std::make_unique<BufferDescriptor>(std::move(desc)));
109   } else {
110     TensorDescriptor desc = CreateConstantHWVec4TensorDescriptor(
111         fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16,
112         TensorStorageType::TEXTURE_2D, texture_width, texture_height,
113         data.data());
114     args_.AddObject("weights", std::make_unique<TensorDescriptor>(desc));
115   }
116 }
117 
118 template <DataType S, typename T>
RearrangeWeightsAndBiasesData(const tflite::gpu::Tensor<OHWI,S> & weights,const tflite::gpu::Tensor<Linear,S> & biases,absl::Span<T> dst)119 void DepthwiseConv3x3::RearrangeWeightsAndBiasesData(
120     const tflite::gpu::Tensor<OHWI, S>& weights,
121     const tflite::gpu::Tensor<Linear, S>& biases, absl::Span<T> dst) {
122   const int src_depth = DivideRoundUp(weights.shape.i, 4);
123 
124   int counter = 0;
125   for (int s = 0; s < src_depth; ++s) {
126     for (int y = 0; y < 3; ++y) {
127       for (int x = 0; x < 3; ++x) {
128         T filter_val;
129         for (int i = 0; i < 4; ++i) {
130           const int s_ch = s * 4 + i;
131           if (s_ch < weights.shape.i) {
132             const int f_index = weights.shape.LinearIndex({0, y, x, s_ch});
133             filter_val[i] = weights.data[f_index];
134           } else {
135             filter_val[i] = 0.0f;
136           }
137         }
138         dst[counter++] = filter_val;
139       }
140     }
141 
142     T bias_val;
143     for (int i = 0; i < 4; ++i) {
144       const int dst_ch = s * 4 + i;
145       bias_val[i] = dst_ch >= biases.shape.v ? 0.0f : biases.data[dst_ch];
146     }
147     dst[counter++] = bias_val;
148   }
149 }
150 
151 bool IsDepthwiseConv3x3Supported(const GpuInfo& gpu_info,
152                                  const DepthwiseConvolution2DAttributes& attr);
153 
154 DepthwiseConv3x3 CreateDepthwiseConv3x3(
155     const GpuInfo& gpu_info, const OperationDef& definition,
156     const DepthwiseConvolution2DAttributes& attr);
157 
158 }  // namespace gpu
159 }  // namespace tflite
160 
161 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_DEPTHWISE_CONV_3X3_H_
162