xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/convolution_transposed_3x3.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/tasks/convolution_transposed_3x3.h"
17 
18 #include <memory>
19 #include <string>
20 #include <utility>
21 #include <vector>
22 
23 #include "tensorflow/lite/delegates/gpu/common/task/work_group_picking.h"
24 
25 namespace tflite {
26 namespace gpu {
27 
ConvolutionTransposed3x3(const OperationDef & definition,const GpuInfo & gpu_info,int2 padding)28 ConvolutionTransposed3x3::ConvolutionTransposed3x3(
29     const OperationDef& definition, const GpuInfo& gpu_info, int2 padding)
30     : GPUOperation(definition), padding_(padding) {
31   work_group_size_ = int3(8, 4, 1);
32   work_group_launch_order_ = int3(2, 0, 1);
33   if (gpu_info.IsApple()) {
34     if (gpu_info.apple_info.IsBionic()) {
35       weights_upload_type_ = WeightsUploadType::GLOBAL_MEM;
36     } else {
37       weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS;
38     }
39   } else if (gpu_info.IsPowerVR()) {
40     weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC;
41   } else if (gpu_info.IsNvidia() || gpu_info.IsIntel()) {
42     weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS;
43   } else if (gpu_info.IsAMD()) {
44     weights_upload_type_ = WeightsUploadType::CONSTANT_MEM;
45   } else {
46     weights_upload_type_ = WeightsUploadType::GLOBAL_MEM;
47   }
48   if (gpu_info.IsApple()) {
49     weights_layout_ = WeightsLayout::kOICustomSpatialO4I4;
50   } else {
51     weights_layout_ = WeightsLayout::kOICustomSpatialI4O4;
52   }
53   code_ = GenerateConvolutionTransposedCode(gpu_info, definition_,
54                                             weights_upload_type_, padding_,
55                                             work_group_launch_order_);
56   if (definition_.precision == CalculationsPrecision::F16 &&
57       gpu_info.IsPowerVR()) {
58     compiler_options_.push_back(CompilerOptions::kClFastRelaxedMath);
59   }
60 }
61 
GenerateConvolutionTransposedCode(const GpuInfo & gpu_info,const OperationDef & op_def,ConvolutionTransposed3x3::WeightsUploadType weights_upload_type,int2 padding,int3 work_group_launch_order)62 std::string ConvolutionTransposed3x3::GenerateConvolutionTransposedCode(
63     const GpuInfo& gpu_info, const OperationDef& op_def,
64     ConvolutionTransposed3x3::WeightsUploadType weights_upload_type,
65     int2 padding, int3 work_group_launch_order) {
66   auto src_desc = op_def.src_tensors[0];
67   AddSrcTensor("src_tensor", src_desc);
68   AddDstTensor("dst_tensor", op_def.src_tensors[0]);
69 
70   if (op_def.src_tensors.size() == 2) {
71     // dynamic weights
72     BufferDescriptor desc;
73     desc.element_type = op_def.src_tensors[1].GetDataType();
74     desc.element_size = 4;
75     desc.memory_type =
76         weights_upload_type ==
77                 ConvolutionTransposed3x3::WeightsUploadType::CONSTANT_MEM
78             ? MemoryType::CONSTANT
79             : MemoryType::GLOBAL;
80     AddSrcBuffer("weights", desc);
81   }
82 
83   args_.AddInt("filter_offset");
84   args_.AddInt("padding_x");
85   args_.AddInt("padding_y");
86 
87   const bool need_local_mem =
88       weights_upload_type ==
89           ConvolutionTransposed3x3::WeightsUploadType::LOCAL_MEM_BY_THREADS ||
90       weights_upload_type ==
91           ConvolutionTransposed3x3::WeightsUploadType::LOCAL_MEM_ASYNC;
92 
93   std::string c;
94   if (GetWeightsDescription().IsI4O4()) {
95     switch (op_def.precision) {
96       case CalculationsPrecision::F32:
97       case CalculationsPrecision::F16:
98         c += "#define CONV(R, SRC, F) \\\n";
99         c += "  R += SRC.x * weights_cache[F]; \\\n";
100         c += "  R += SRC.y * weights_cache[F + 1]; \\\n";
101         c += "  R += SRC.z * weights_cache[F + 2]; \\\n";
102         c += "  R += SRC.w * weights_cache[F + 3];   \n";
103         break;
104       case CalculationsPrecision::F32_F16:
105         c += "#define CONV(R, SRC, F) \\\n";
106         c += "  R += TO_ACCUM_TYPE(SRC.x * weights_cache[F] + SRC.y * "
107              "weights_cache[F + 1] + SRC.z * weights_cache[F + 2] + SRC.w * "
108              "weights_cache[F + 3]);\n";
109         break;
110     }
111   } else {
112     // O4I4
113     c += "#define CONV(R, SRC, F) \\\n";
114     c += "  R.x += dot(SRC, weights_cache[F]); \\\n";
115     c += "  R.y += dot(SRC, weights_cache[F + 1]); \\\n";
116     c += "  R.z += dot(SRC, weights_cache[F + 2]); \\\n";
117     c += "  R.w += dot(SRC, weights_cache[F + 3]);   \n";
118   }
119 
120   const int wg_total_size =
121       work_group_size_.x * work_group_size_.y * work_group_size_.z;
122   const std::string barrier =
123       wg_total_size == 32 && gpu_info.IsWaveSizeEqualTo32()
124           ? "SIMD_LOCAL_MEM_BARRIER"
125           : "LOCAL_MEM_BARRIER";
126   const std::string weights_space =
127       weights_upload_type ==
128               ConvolutionTransposed3x3::WeightsUploadType::CONSTANT_MEM
129           ? "__constant"
130           : "__global";
131 
132   if (gpu_info.IsApiOpenCl()) {
133     c += "__attribute__((reqd_work_group_size(8, 4, 1)))\n";
134   }
135   c += "MAIN_FUNCTION($0) {\n";
136   int3 launch_remap;
137   launch_remap[work_group_launch_order.x] = 0;
138   launch_remap[work_group_launch_order.y] = 1;
139   launch_remap[work_group_launch_order.z] = 2;
140   auto GetGlobalID = [&](int id) {
141     std::string result;
142     const std::string sid = std::to_string(id);
143     if (work_group_launch_order[id] == id) {
144       return "GLOBAL_ID_" + sid;
145     } else {
146       return "GROUP_ID_" + std::to_string(launch_remap[id]) + " * GROUP_SIZE_" +
147              sid + " + LOCAL_ID_" + sid;
148     }
149   };
150   if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) {
151     c += "  int linear_id = " + GetGlobalID(0) + ";\n";
152     c += "  int X = linear_id / args.dst_tensor.Batch();\n";
153     c += "  int B = linear_id % args.dst_tensor.Batch();\n";
154     c += "  args.src_tensor.SetBatchRef(B);\n";
155     c += "  args.dst_tensor.SetBatchRef(B);\n";
156   } else {
157     c += "  int X = " + GetGlobalID(0) + ";\n";
158   }
159   c += "  int DST_X = X * 2;\n";
160   c += "  int SRC_X = X + args.padding_x;\n";
161   c += "  int Y = " + GetGlobalID(1) + ";\n";
162   c += "  int DST_Y = Y * 2;\n";
163   c += "  int SRC_Y = Y + args.padding_y;\n";
164   c += "  int Z = " + GetGlobalID(2) + ";\n";
165   if (!need_local_mem) {
166     c += "  if (DST_X >= args.dst_tensor.Width() || DST_Y >= "
167          "args.dst_tensor.Height() || Z >= args.dst_tensor.Slices()) return;\n";
168   }
169   c += "  ACCUM_FLT4 r0 = INIT_ACCUM_FLT4(0.0f);\n";
170   c += "  ACCUM_FLT4 r1 = INIT_ACCUM_FLT4(0.0f);\n";
171   c += "  ACCUM_FLT4 r2 = INIT_ACCUM_FLT4(0.0f);\n";
172   c += "  ACCUM_FLT4 r3 = INIT_ACCUM_FLT4(0.0f);\n";
173   c += "  int f_offset = Z * args.filter_offset;\n";
174   if (need_local_mem) {
175     c += "  __local FLT4 weights_cache[36];\n";
176   }
177   if (weights_upload_type ==
178       ConvolutionTransposed3x3::WeightsUploadType::LOCAL_MEM_BY_THREADS) {
179     c += "  int local_id = LOCAL_ID_1 * 8 + LOCAL_ID_0;\n";
180   }
181   if (!src_desc.SupportsZeroClamp(Axis::WIDTH, gpu_info)) {
182     c += "  bool in_x0 = SRC_X >= 0 && SRC_X < args.src_tensor.Width();\n";
183     c += "  bool in_x1 = SRC_X + 1 >= 0 && SRC_X + 1 < "
184          "args.src_tensor.Width();\n";
185   }
186   if (!src_desc.SupportsZeroClamp(Axis::HEIGHT, gpu_info)) {
187     c += "  bool in_y0 = SRC_Y >= 0 && SRC_Y < args.src_tensor.Height();\n";
188     c += "  bool in_y1 = SRC_Y + 1 >= 0 && SRC_Y + 1 < "
189          "args.src_tensor.Height();\n";
190   }
191   auto generate_check = [&](int x, int y) {
192     std::string check;
193     const std::vector<Axis> axes{Axis::WIDTH, Axis::HEIGHT};
194     const std::vector<std::string> names{"in_x" + std::to_string(x),
195                                          "in_y" + std::to_string(y)};
196     for (int i = 0; i < axes.size(); ++i) {
197       const auto& axis = axes[i];
198       if (src_desc.HasAxis(axis) &&
199           !src_desc.SupportsZeroClamp(axis, gpu_info)) {
200         if (!check.empty()) {
201           check += " && ";
202         }
203         check += names[i];
204       }
205     }
206     return check;
207   };
208   if (src_desc.IsLinear()) {
209     if (src_desc.ReturnsZeroForNegOneRead(gpu_info)) {
210       c += "  int addr_0 = args.src_tensor.GetAddress(SRC_X, SRC_Y, 0);\n";
211       c += "  int addr_1 = args.src_tensor.GetAddress(SRC_X + 1, SRC_Y, 0);\n";
212       c += "  int addr_2 = args.src_tensor.GetAddress(SRC_X, SRC_Y + 1, 0);\n";
213       c += "  int addr_3 = args.src_tensor.GetAddress(SRC_X+1, SRC_Y+1, 0);\n";
214       c += "  addr_0 = select(-1, addr_0, (in_x0 && in_y0));\n";
215       c += "  addr_1 = select(-1, addr_1, (in_x1 && in_y0));\n";
216       c += "  addr_2 = select(-1, addr_2, (in_x0 && in_y1));\n";
217       c += "  addr_3 = select(-1, addr_3, (in_x1 && in_y1));\n";
218       c += "  int dz_0 = select(0, args.src_tensor.SliceStride(), (in_x0 && "
219            "in_y0));\n";
220       c += "  int dz_1 = select(0, args.src_tensor.SliceStride(), (in_x1 && "
221            "in_y0));\n";
222       c += "  int dz_2 = select(0, args.src_tensor.SliceStride(), (in_x0 && "
223            "in_y1));\n";
224       c += "  int dz_3 = select(0, args.src_tensor.SliceStride(), (in_x1 && "
225            "in_y1));\n";
226     } else {
227       c += "  int xc0 = clamp(SRC_X, 0, args.src_tensor.Width() - 1);\n";
228       c += "  int xc1 = clamp(SRC_X + 1, 0, args.src_tensor.Width() - 1);\n";
229       c += "  int yc0 = clamp(SRC_Y, 0, args.src_tensor.Height() - 1);\n";
230       c += "  int yc1 = clamp(SRC_Y + 1, 0, args.src_tensor.Height() - 1);\n";
231       c += "  int addr_0 = args.src_tensor.GetAddress(xc0, yc0, 0);\n";
232       c += "  int addr_1 = args.src_tensor.GetAddress(xc1, yc0, 0);\n";
233       c += "  int addr_2 = args.src_tensor.GetAddress(xc0, yc1, 0);\n";
234       c += "  int addr_3 = args.src_tensor.GetAddress(xc1, yc1, 0);\n";
235       c += "  int dz = args.src_tensor.SliceStride();\n";
236     }
237   }
238   auto read_src = [&](int x, int y) {
239     if (src_desc.IsLinear()) {
240       const std::string id = std::to_string(y * 2 + x);
241       const std::string addr = "addr_" + std::to_string(y * 2 + x);
242       if (src_desc.ReturnsZeroForNegOneRead(gpu_info)) {
243         return "args.src_tensor.Read(" + addr + "); " + addr + " += dz_" + id +
244                ";\n";
245       } else {
246         return "args.src_tensor.Read(" + addr + ") * INIT_FLT(in_x" +
247                std::to_string(x) + " && in_y" + std::to_string(y) + "); " +
248                addr + " += dz;\n";
249       }
250     } else {
251       std::string check = generate_check(x, y);
252       if (!check.empty()) {
253         check = " * INIT_FLT(" + check + ")";
254       }
255       return "args.src_tensor.Read(SRC_X + " + std::to_string(x) +
256              ", SRC_Y + " + std::to_string(y) + ", s)" + check + ";\n";
257     }
258   };
259   const int padding_x_rem = abs(padding.x) % 2;
260   const int padding_y_rem = abs(padding.y) % 2;
261   std::vector<std::pair<int, int>> permutation;
262   if (padding_x_rem == 1 && padding_y_rem == 1) {
263     permutation = {{0, 0}, {1, 0}, {1, 1}, {2, 0}, {2, 2},
264                    {3, 0}, {3, 1}, {3, 2}, {3, 3}};
265   } else if (padding_x_rem == 0 && padding_y_rem == 1) {
266     permutation = {{0, 0}, {0, 1}, {1, 1}, {2, 0}, {2, 1},
267                    {2, 2}, {2, 3}, {3, 1}, {3, 3}};
268   } else if (padding_x_rem == 1 && padding_y_rem == 0) {
269     permutation = {{0, 0}, {0, 2}, {1, 0}, {1, 1}, {1, 2},
270                    {1, 3}, {2, 2}, {3, 2}, {3, 3}};
271   } else {  // padding_x_rem == 0 && padding_y_rem == 0
272     permutation = {{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 1},
273                    {1, 3}, {2, 2}, {2, 3}, {3, 3}};
274   }
275   c += "  for (int s = 0; s < args.src_tensor.Slices(); ++s) {\n";
276   if (need_local_mem) {
277     c += "    " + barrier + ";\n";
278   }
279   if (weights_upload_type ==
280       ConvolutionTransposed3x3::WeightsUploadType::LOCAL_MEM_ASYNC) {
281     c += "    async_work_group_copy(weights_cache, "
282          "args.weights.GetPtr(f_offset), 36, "
283          "0);\n";
284   } else if (weights_upload_type ==
285              ConvolutionTransposed3x3::WeightsUploadType::
286                  LOCAL_MEM_BY_THREADS) {
287     c += "    weights_cache[local_id] = args.weights.Read(f_offset + "
288          "local_id);\n";
289     c += "    if (local_id < 4) {\n";
290     c += "      weights_cache[local_id + 32] = args.weights.Read(f_offset + "
291          "local_id + "
292          "32);\n";
293     c += "    };\n";
294   } else {  // GLOBAL_MEM/CONSTANT_MEM
295     c += "    " + weights_space +
296          " FLT4* weights_cache = args.weights.GetPtr(f_offset);\n";
297   }
298   c += "    FLT4 src0 = " + read_src(0, 0);
299   c += "    FLT4 src1 = " + read_src(1, 0);
300   c += "    FLT4 src2 = " + read_src(0, 1);
301   c += "    FLT4 src3 = " + read_src(1, 1);
302   c += "    f_offset += 36;\n";
303   if (need_local_mem) {
304     c += "    " + barrier + ";\n";
305   }
306   for (int i = 0; i < 9; ++i) {
307     const std::string r_name = "r" + std::to_string(permutation[i].first);
308     const std::string s_name = "src" + std::to_string(permutation[i].second);
309     const std::string w_name = std::to_string(i * 4);
310     c += "    CONV(" + r_name + ", " + s_name + ", " + w_name + ");\n";
311   }
312   c += "  }\n";
313   if (need_local_mem) {
314     c += "  if (DST_X >= args.dst_tensor.Width() || DST_Y >= "
315          "args.dst_tensor.Height() || Z >= args.dst_tensor.Slices()) return;\n";
316   }
317   c += "  FLT4 bias_val = args.biases.Read(Z);\n";
318   for (int y = 0; y < 2; ++y) {
319     for (int x = 0; x < 2; ++x) {
320       const std::string s_x = std::to_string(x);
321       const std::string s_y = std::to_string(y);
322       const std::string id = std::to_string(y * 2 + x);
323       const std::string x_c = "DST_X + " + s_x;
324       const std::string y_c = "DST_Y + " + s_y;
325       c += "  if (" + x_c + " < args.dst_tensor.Width() && " + y_c +
326            " < args.dst_tensor.Height()) {\n";
327       c += "    FLT4 res0 = TO_FLT4(r" + id + ") + bias_val;\n";
328       c += "    args.dst_tensor.Write(res0, " + x_c + ", " + y_c + ", Z);\n";
329       c += "  }\n";
330     }
331   }
332   c += "}\n";
333   return c;
334 }
335 
BindArguments(ArgumentsBinder * args)336 absl::Status ConvolutionTransposed3x3::BindArguments(ArgumentsBinder* args) {
337   RETURN_IF_ERROR(args->SetInt("filter_offset", 4 * 9 * src_[0]->Slices()));
338   const int padding_x =
339       padding_.x >= 1 ? (padding_.x - 1) / 2 : (padding_.x - 2) / 2;
340   const int padding_y =
341       padding_.y >= 1 ? (padding_.y - 1) / 2 : (padding_.y - 2) / 2;
342   RETURN_IF_ERROR(args->SetInt("padding_x", padding_x));
343   return args->SetInt("padding_y", padding_y);
344 }
345 
GetPossibleKernelWorkGroups(TuningType tuning_type,const GpuInfo & gpu_info,const KernelInfo & kernel_info,std::vector<int3> * work_groups) const346 void ConvolutionTransposed3x3::GetPossibleKernelWorkGroups(
347     TuningType tuning_type, const GpuInfo& gpu_info,
348     const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
349   if (weights_upload_type_ == WeightsUploadType::LOCAL_MEM_ASYNC ||
350       weights_upload_type_ == WeightsUploadType::LOCAL_MEM_BY_THREADS) {
351     work_groups->push_back(work_group_size_);
352     return;
353   }
354   GetPossibleWorkGroupsConv(tuning_type, gpu_info, kernel_info, grid_size_,
355                             work_groups);
356 }
357 
GetGridSize() const358 int3 ConvolutionTransposed3x3::GetGridSize() const {
359   const int grid_x = DivideRoundUp(dst_[0]->Width(), 2) * dst_[0]->Batch();
360   const int grid_y = DivideRoundUp(dst_[0]->Height(), 2);
361   const int grid_z = dst_[0]->Slices();
362   return int3(grid_x, grid_y, grid_z);
363 }
364 
GetSpatialWeightsRemap() const365 std::vector<int> ConvolutionTransposed3x3::GetSpatialWeightsRemap() const {
366   const int padding_x_rem = abs(padding_.x) % 2;
367   const int padding_y_rem = abs(padding_.y) % 2;
368 
369   std::vector<int> remap;
370   if (padding_x_rem == 1 && padding_y_rem == 1) {
371     return std::vector<int>{4, 5, 3, 7, 1, 8, 6, 2, 0};
372   } else if (padding_x_rem == 0 && padding_y_rem == 1) {
373     return std::vector<int>{5, 3, 4, 8, 6, 2, 0, 7, 1};
374   } else if (padding_x_rem == 1 && padding_y_rem == 0) {
375     return std::vector<int>{7, 1, 8, 6, 2, 0, 4, 5, 3};
376   } else {  // padding_x_rem == 0 && padding_y_rem == 0
377     return std::vector<int>{8, 6, 2, 0, 7, 1, 5, 3, 4};
378   }
379 }
380 
UploadWeights(const tflite::gpu::Tensor<OHWI,DataType::FLOAT32> & weights)381 void ConvolutionTransposed3x3::UploadWeights(
382     const tflite::gpu::Tensor<OHWI, DataType::FLOAT32>& weights) {
383   const auto weights_desc = GetWeightsDescription();
384   const int flt_count =
385       GetTotalElementsCountForLayout(weights_desc, weights.shape);
386 
387   BufferDescriptor desc;
388   desc.element_type = weights_desc.type;
389   desc.element_size = 4;
390   desc.memory_type =
391       weights_upload_type_ ==
392               ConvolutionTransposed3x3::WeightsUploadType::CONSTANT_MEM
393           ? MemoryType::CONSTANT
394           : MemoryType::GLOBAL;
395   desc.size = flt_count * SizeOf(desc.element_type);
396   desc.data.resize(desc.size);
397 
398   RearrangeWeights(weights, weights_desc, absl::MakeSpan(desc.data));
399 
400   args_.AddObject("weights",
401                   std::make_unique<BufferDescriptor>(std::move(desc)));
402 }
403 
IsConvolutionTransposed3x3Supported(const OperationDef & definition,const ConvolutionTransposedAttributes & attr)404 bool IsConvolutionTransposed3x3Supported(
405     const OperationDef& definition,
406     const ConvolutionTransposedAttributes& attr) {
407   return attr.weights.shape.w == 3 && attr.weights.shape.h == 3 &&
408          attr.stride.w == 2 && attr.stride.h == 2;
409 }
410 
CreateConvolutionTransposed3x3(const GpuInfo & gpu_info,const OperationDef & definition,const ConvolutionTransposedAttributes & attr)411 ConvolutionTransposed3x3 CreateConvolutionTransposed3x3(
412     const GpuInfo& gpu_info, const OperationDef& definition,
413     const ConvolutionTransposedAttributes& attr) {
414   const int2 padding = int2(attr.padding.prepended.w, attr.padding.prepended.h);
415   ConvolutionTransposed3x3 result(definition, gpu_info, padding);
416   result.UploadWeights(attr.weights);
417 
418   TensorDescriptor bias_tensor_desc = CreateConstantLinearTensorDescriptor(
419       gpu_info, definition.src_tensors[0].GetDataType(), attr.bias);
420   result.args_.AddObject("biases", std::make_unique<TensorDescriptor>(
421                                        std::move(bias_tensor_desc)));
422   return result;
423 }
424 
CreateConvolutionTransposed3x3DynamicWeights(const GpuInfo & gpu_info,const OperationDef & definition,const ConvolutionTransposedAttributes & attr)425 ConvolutionTransposed3x3 CreateConvolutionTransposed3x3DynamicWeights(
426     const GpuInfo& gpu_info, const OperationDef& definition,
427     const ConvolutionTransposedAttributes& attr) {
428   OperationDef new_def = definition;
429   new_def.src_tensors = {
430       definition.src_tensors[0]};  // leaving only src_tensor def, weights defs
431                                    // will be added later
432   const DataType weights_type = definition.GetDataType();
433   // add 1 src_tensor(buffer) for weights
434   new_def.src_tensors.push_back(
435       {weights_type, TensorStorageType::BUFFER, Layout::HWC});
436 
437   const int2 padding = int2(attr.padding.prepended.w, attr.padding.prepended.h);
438   ConvolutionTransposed3x3 result(new_def, gpu_info, padding);
439 
440   TensorDescriptor bias_tensor_desc = CreateConstantLinearTensorDescriptor(
441       gpu_info, definition.src_tensors[0].GetDataType(), attr.bias);
442   result.args_.AddObject("biases", std::make_unique<TensorDescriptor>(
443                                        std::move(bias_tensor_desc)));
444   return result;
445 }
446 
447 }  // namespace gpu
448 }  // namespace tflite
449