xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/conv_constants.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONV_CONSTANTS_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONV_CONSTANTS_H_
18 
19 #include <memory>
20 #include <utility>
21 
22 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
23 #include "tensorflow/lite/delegates/gpu/common/operations.h"
24 #include "tensorflow/lite/delegates/gpu/common/shape.h"
25 #include "tensorflow/lite/delegates/gpu/common/status.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
28 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/common/types.h"
30 
31 namespace tflite {
32 namespace gpu {
33 
34 template <DataType S, typename T>
RearrangeWeightsForConvConstants(const tflite::gpu::Tensor<OHWI,S> & weights,absl::Span<T> dst)35 void RearrangeWeightsForConvConstants(
36     const tflite::gpu::Tensor<OHWI, S>& weights, absl::Span<T> dst) {
37   const int dst_depth = DivideRoundUp(weights.shape.o, 4);
38   const int src_depth = DivideRoundUp(weights.shape.i, 4);
39   const int kernel_x = weights.shape.w;
40   const int kernel_y = weights.shape.h;
41 
42   int counter = 0;
43   for (int s = 0; s < src_depth; ++s) {
44     for (int y = 0; y < kernel_y; ++y) {
45       for (int x = 0; x < kernel_x; ++x) {
46         for (int d = 0; d < dst_depth; ++d) {
47           const int channels_count = std::min(4, weights.shape.i - s * 4);
48           T filters[4];
49           for (int i = 0; i < 4; ++i) {
50             for (int j = 0; j < channels_count; ++j) {
51               const int s_ch = s * 4 + j;
52               const int d_ch = d * 4 + i;
53               if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
54                 const int f_index =
55                     weights.shape.LinearIndex({d_ch, y, x, s_ch});
56                 filters[j][i] = weights.data[f_index];
57               } else {
58                 filters[j][i] = 0.0f;
59               }
60             }
61           }
62           for (int i = 0; i < channels_count; ++i) {
63             dst[counter++] = filters[i];
64           }
65         }
66       }
67     }
68   }
69 }
70 
71 template <DataType S, typename T>
RearrangeWeightsForConvConstantsDot(const tflite::gpu::Tensor<OHWI,S> & weights,absl::Span<T> dst)72 void RearrangeWeightsForConvConstantsDot(
73     const tflite::gpu::Tensor<OHWI, S>& weights, absl::Span<T> dst) {
74   const int dst_depth = DivideRoundUp(weights.shape.o, 4);
75   const int src_depth = DivideRoundUp(weights.shape.i, 4);
76   const int kernel_x = weights.shape.w;
77   const int kernel_y = weights.shape.h;
78 
79   int counter = 0;
80   for (int s = 0; s < src_depth; ++s) {
81     for (int y = 0; y < kernel_y; ++y) {
82       for (int x = 0; x < kernel_x; ++x) {
83         for (int d = 0; d < dst_depth; ++d) {
84           const int channels_count = std::min(4, weights.shape.o - d * 4);
85           T filters[4];
86           for (int j = 0; j < channels_count; ++j) {
87             for (int i = 0; i < 4; ++i) {
88               const int s_ch = s * 4 + i;
89               const int d_ch = d * 4 + j;
90               if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
91                 const int f_index =
92                     weights.shape.LinearIndex({d_ch, y, x, s_ch});
93                 filters[j][i] = weights.data[f_index];
94               } else {
95                 filters[j][i] = 0.0f;
96               }
97             }
98           }
99           for (int i = 0; i < channels_count; ++i) {
100             dst[counter++] = filters[i];
101           }
102         }
103       }
104     }
105   }
106 }
107 
108 template <DataType T>
UploadWeightsForConvConstants(const tflite::gpu::Tensor<OHWI,T> & weights,const GpuInfo & gpu_info,CalculationsPrecision precision,bool use_dot_conv,GPUOperation * op)109 void UploadWeightsForConvConstants(const tflite::gpu::Tensor<OHWI, T>& weights,
110                                    const GpuInfo& gpu_info,
111                                    CalculationsPrecision precision,
112                                    bool use_dot_conv, GPUOperation* op) {
113   const int src_depth = DivideRoundUp(weights.shape.i, 4);
114   const int dst_depth = DivideRoundUp(weights.shape.o, 4);
115   const int kernel_x = weights.shape.w;
116   const int kernel_y = weights.shape.h;
117 
118   const bool f32_weights = precision == CalculationsPrecision::F32;
119   const int float_size = f32_weights ? 4 : 2;
120   const int aligned_ch_count = use_dot_conv ? weights.shape.o * src_depth * 4
121                                             : weights.shape.i * dst_depth * 4;
122   const int float_count = aligned_ch_count * kernel_x * kernel_y;
123 
124   BufferDescriptor desc;
125   desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
126   desc.element_size = 4;
127   if (gpu_info.IsApiOpenCl() || gpu_info.IsApiMetal()) {
128     desc.memory_type = MemoryType::CONSTANT;
129   } else {
130     desc.memory_type = MemoryType::GLOBAL;
131   }
132   desc.size = float_size * float_count;
133   desc.data.resize(desc.size);
134 
135   if (f32_weights) {
136     float4* ptr = reinterpret_cast<float4*>(desc.data.data());
137     if (use_dot_conv) {
138       RearrangeWeightsForConvConstantsDot(weights,
139                                           absl::MakeSpan(ptr, float_count / 4));
140     } else {
141       RearrangeWeightsForConvConstants(weights,
142                                        absl::MakeSpan(ptr, float_count / 4));
143     }
144   } else {
145     half4* ptr = reinterpret_cast<half4*>(desc.data.data());
146     if (use_dot_conv) {
147       RearrangeWeightsForConvConstantsDot(weights,
148                                           absl::MakeSpan(ptr, float_count / 4));
149     } else {
150       RearrangeWeightsForConvConstants(weights,
151                                        absl::MakeSpan(ptr, float_count / 4));
152     }
153   }
154 
155   op->args_.AddObject("weights",
156                       std::make_unique<BufferDescriptor>(std::move(desc)));
157 }
158 
159 bool IsConvConstantsSupported(const GpuInfo& gpu_info,
160                               const OperationDef& definition,
161                               const Convolution2DAttributes& attr);
162 
163 GPUOperation CreateConvConstants(const GpuInfo& gpu_info,
164                                  const OperationDef& definition,
165                                  const Convolution2DAttributes& attr);
166 
167 }  // namespace gpu
168 }  // namespace tflite
169 
170 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONV_CONSTANTS_H_
171