xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/winograd.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_WINOGRAD_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_WINOGRAD_H_
18 
19 #include <string>
20 
21 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
22 #include "tensorflow/lite/delegates/gpu/common/operations.h"
23 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
24 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
25 
26 namespace tflite {
27 namespace gpu {
28 
29 // You can read https://arxiv.org/pdf/1509.09308.pdf for understanding of basic
30 // principles. In this kernels used different matrices for transformations than
31 // in original work.
32 class Winograd4x4To36 : public GPUOperation {
33  public:
34   Winograd4x4To36() = default;
GetPossibleKernelWorkGroups(TuningType tuning_type,const GpuInfo & gpu_info,const KernelInfo & kernel_info,std::vector<int3> * work_groups)35   void GetPossibleKernelWorkGroups(
36       TuningType tuning_type, const GpuInfo& gpu_info,
37       const KernelInfo& kernel_info,
38       std::vector<int3>* work_groups) const override {
39     work_groups->push_back(work_group_size_);
40   }
41   int3 GetGridSize() const override;
42   absl::Status BindArguments(ArgumentsBinder* args) override;
43 
44   // Move only
45   Winograd4x4To36(Winograd4x4To36&& kernel) = default;
46   Winograd4x4To36& operator=(Winograd4x4To36&& kernel) = default;
47   Winograd4x4To36(const Winograd4x4To36&) = delete;
48   Winograd4x4To36& operator=(const Winograd4x4To36&) = delete;
49 
50  private:
Winograd4x4To36(const OperationDef & definition,const Padding2D & padding)51   Winograd4x4To36(const OperationDef& definition, const Padding2D& padding)
52       : GPUOperation(definition), padding_(padding) {}
53   friend Winograd4x4To36 CreateWinograd4x4To36(const OperationDef& definition,
54                                                const Padding2D& padding,
55                                                const GpuInfo& gpu_info);
56 
57   Padding2D padding_;
58 };
59 
60 Winograd4x4To36 CreateWinograd4x4To36(const OperationDef& definition,
61                                       const Padding2D& padding,
62                                       const GpuInfo& gpu_info);
63 
64 class Winograd4x4To36TileX6 : public GPUOperation {
65  public:
66   Winograd4x4To36TileX6() = default;
67   Winograd4x4To36TileX6(const OperationDef& definition,
68                         const Padding2D& padding, const GpuInfo& gpu_info);
69   absl::Status BindArguments(ArgumentsBinder* args) override;
70   int3 GetGridSize() const override;
71   void GetPossibleKernelWorkGroups(
72       TuningType tuning_type, const GpuInfo& gpu_info,
73       const KernelInfo& kernel_info,
74       std::vector<int3>* work_groups) const override;
75 
76   // Move only
77   Winograd4x4To36TileX6(Winograd4x4To36TileX6&& operation) = default;
78   Winograd4x4To36TileX6& operator=(Winograd4x4To36TileX6&& operation) = default;
79   Winograd4x4To36TileX6(const Winograd4x4To36TileX6&) = delete;
80   Winograd4x4To36TileX6& operator=(const Winograd4x4To36TileX6&) = delete;
81 
82  private:
83   friend Winograd4x4To36TileX6 CreateWinograd4x4To36TileX6(
84       const GpuInfo& gpu_info, const OperationDef& definition,
85       const Padding2D& padding);
86 
87   void UploadBt();
88 
89   std::string GetWinograd4x4To36TileX6Code(const OperationDef& op_def,
90                                            const GpuInfo& gpu_info);
91 
92   // Must be called after kernel compilation
93   int3 SelectBestWorkGroup(const KernelInfo& kernel_info) const;
94 
95   Padding2D padding_;
96 };
97 
98 Winograd4x4To36TileX6 CreateWinograd4x4To36TileX6(
99     const GpuInfo& gpu_info, const OperationDef& definition,
100     const Padding2D& padding);
101 
102 class Winograd36To4x4 : public GPUOperation {
103  public:
104   Winograd36To4x4() = default;
GetPossibleKernelWorkGroups(TuningType tuning_type,const GpuInfo & gpu_info,const KernelInfo & kernel_info,std::vector<int3> * work_groups)105   void GetPossibleKernelWorkGroups(
106       TuningType tuning_type, const GpuInfo& gpu_info,
107       const KernelInfo& kernel_info,
108       std::vector<int3>* work_groups) const override {
109     work_groups->push_back(work_group_size_);
110   }
111   int3 GetGridSize() const override;
112 
113   // Move only
114   Winograd36To4x4(Winograd36To4x4&& kernel) = default;
115   Winograd36To4x4& operator=(Winograd36To4x4&& kernel) = default;
116   Winograd36To4x4(const Winograd36To4x4&) = delete;
117   Winograd36To4x4& operator=(const Winograd36To4x4&) = delete;
118 
119  private:
Winograd36To4x4(const OperationDef & definition)120   explicit Winograd36To4x4(const OperationDef& definition)
121       : GPUOperation(definition) {}
122   friend Winograd36To4x4 CreateWinograd36To4x4(
123       const OperationDef& definition,
124       const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases);
125 };
126 
127 Winograd36To4x4 CreateWinograd36To4x4(
128     const OperationDef& definition,
129     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases);
130 
131 class Winograd36To4x4Tile4x1 : public GPUOperation {
132  public:
133   Winograd36To4x4Tile4x1() = default;
134   Winograd36To4x4Tile4x1(const OperationDef& definition,
135                          const GpuInfo& gpu_info);
136   absl::Status BindArguments(ArgumentsBinder* args) override;
137   int3 GetGridSize() const override;
138   void GetPossibleKernelWorkGroups(
139       TuningType tuning_type, const GpuInfo& gpu_info,
140       const KernelInfo& kernel_info,
141       std::vector<int3>* work_groups) const override;
142 
143   // Move only
144   Winograd36To4x4Tile4x1(Winograd36To4x4Tile4x1&& operation) = default;
145   Winograd36To4x4Tile4x1& operator=(Winograd36To4x4Tile4x1&& operation) =
146       default;
147   Winograd36To4x4Tile4x1(const Winograd36To4x4Tile4x1&) = delete;
148   Winograd36To4x4Tile4x1& operator=(const Winograd36To4x4Tile4x1&) = delete;
149 
150  private:
151   friend Winograd36To4x4Tile4x1 CreateWinograd36To4x4Tile4x1(
152       const GpuInfo& gpu_info, const OperationDef& definition,
153       const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases);
154 
155   void UploadAt();
156 
157   std::string GetWinograd36To4x4Tile4x1Code(const OperationDef& op_def,
158                                             const GpuInfo& gpu_info);
159 
160   // Must be called after kernel compilation
161   int3 SelectBestWorkGroup(const KernelInfo& kernel_info) const;
162 };
163 
164 Winograd36To4x4Tile4x1 CreateWinograd36To4x4Tile4x1(
165     const GpuInfo& gpu_info, const OperationDef& definition,
166     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases);
167 
168 }  // namespace gpu
169 }  // namespace tflite
170 
171 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_WINOGRAD_H_
172