xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h"
16 
17 #include <stdint.h>
18 
19 #include <cmath>
20 #include <limits>
21 
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/delegates/hexagon/builders/op_builder.h"
24 #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h"
25 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
26 #include "tensorflow/lite/kernels/kernel_util.h"
27 
28 namespace tflite {
29 namespace delegates {
30 namespace hexagon {
31 namespace {
32 
33 // Channel count to split depthwise convolution op.
34 // See Conv2dOpBuilder.should_split_dwconv_ for details.
35 constexpr int kDwConv5x5Filt2x2StrideChannelCount = 32;
36 
37 // Dilated Depthwise Convolution performs SpaceToBatchND & BatchToSpaceND before
38 // and after the op respectively.
39 // This helper computes the paddings param for SpaceToBatchND and crops param
40 // for BatchToSpaceND.
41 //
42 // Inspired by tf.nn.with_space_to_batch & tf.required_space_to_batch_paddings.
ComputeSpaceToBatchParams(int input_height,int input_width,int weights_height,int weights_width,const std::vector<int> & dilation_factors_h_w,const TfLitePadding padding_type,std::vector<int> * paddings,std::vector<int> * crops)43 void ComputeSpaceToBatchParams(int input_height, int input_width,
44                                int weights_height, int weights_width,
45                                const std::vector<int>& dilation_factors_h_w,
46                                const TfLitePadding padding_type,
47                                std::vector<int>* paddings,
48                                std::vector<int>* crops) {
49   // Base paddings depend on padding applied to the Depthwise Conv op.
50   // 4-element array: {top, bottom, left, right}.
51   std::vector<int> base_paddings(4, 0);
52   if (padding_type == kTfLitePaddingSame) {
53     const int dilated_weights_h =
54         dilation_factors_h_w[0] * (weights_height - 1) + 1;
55     const int dilated_weights_w =
56         dilation_factors_h_w[1] * (weights_width - 1) + 1;
57     base_paddings[0] = (dilated_weights_h - 1) / 2;
58     base_paddings[1] = dilated_weights_h - 1 - (dilated_weights_h - 1) / 2;
59     base_paddings[2] = (dilated_weights_w - 1) / 2;
60     base_paddings[3] = dilated_weights_w - 1 - (dilated_weights_w - 1) / 2;
61   }
62 
63   // paddings represents {pad_top, pad_bottom, pad_left, pad_right}.
64   paddings->resize(4, 0);
65   // crops represents {crop_top, crop_bottom, crop_left, crop_right}.
66   crops->resize(4, 0);
67 
68   // Logic for computing paddings & crops follows.
69   // Taken from tf.required_space_to_batch_paddings, but without array
70   // operations since we only deal with 2 dimensions.
71   int pad_start_h = base_paddings[0];
72   int pad_start_w = base_paddings[2];
73   int orig_pad_end_h = base_paddings[1];
74   int orig_pad_end_w = base_paddings[3];
75   int full_input_h = input_height + pad_start_h + orig_pad_end_h;
76   int full_input_w = input_width + pad_start_w + orig_pad_end_w;
77   int pad_end_extra_h =
78       (dilation_factors_h_w[0] - full_input_h % dilation_factors_h_w[0]) %
79       dilation_factors_h_w[0];
80   int pad_end_extra_w =
81       (dilation_factors_h_w[1] - full_input_w % dilation_factors_h_w[1]) %
82       dilation_factors_h_w[1];
83   int pad_end_h = orig_pad_end_h + pad_end_extra_h;
84   int pad_end_w = orig_pad_end_w + pad_end_extra_w;
85 
86   // Assign values.
87   (*paddings)[0] = pad_start_h;
88   (*paddings)[1] = pad_end_h;
89   (*paddings)[2] = pad_start_w;
90   (*paddings)[3] = pad_end_w;
91   (*crops)[0] = 0;
92   (*crops)[1] = pad_end_extra_h;
93   (*crops)[2] = 0;
94   (*crops)[3] = pad_end_extra_w;
95 }
96 
97 // Computes output dimensions for the SpaceToBatchND op used in the dilated
98 // Depthwise Conv case.
99 // space_to_batch_paddings should be in format {top, bottom, left, right}.
100 // These are computed from the documentation for SpaceToBatchND_8's output.
PopulateSpaceToBatchOutputDims(int input_batch_size,int input_height_size,int input_width_size,int input_depth_size,const std::vector<int> & dilation_factors_h_w,const std::vector<int> & space_to_batch_paddings,std::vector<int> * space_to_batch_output_dims)101 void PopulateSpaceToBatchOutputDims(
102     int input_batch_size, int input_height_size, int input_width_size,
103     int input_depth_size, const std::vector<int>& dilation_factors_h_w,
104     const std::vector<int>& space_to_batch_paddings,
105     std::vector<int>* space_to_batch_output_dims) {
106   // Batches.
107   space_to_batch_output_dims->push_back(
108       input_batch_size * dilation_factors_h_w[0] * dilation_factors_h_w[1]);
109   // Height.
110   space_to_batch_output_dims->push_back((space_to_batch_paddings[0] +
111                                          input_height_size +
112                                          space_to_batch_paddings[1]) /
113                                         dilation_factors_h_w[0]);
114   // Width.
115   space_to_batch_output_dims->push_back((space_to_batch_paddings[2] +
116                                          input_width_size +
117                                          space_to_batch_paddings[3]) /
118                                         dilation_factors_h_w[1]);
119   // Depth.
120   space_to_batch_output_dims->push_back(input_depth_size);
121 }
122 
123 }  // namespace
124 
BuildDilatedDwConv(const TfLiteIntArray * inputs,const TfLiteTensor & data_tensor,const TfLiteTensor & output_data_tensor,OpBuilder * data_min_const,OpBuilder * data_max_const,OpBuilder * conv_output_min_const,OpBuilder * conv_output_max_const,OpBuilder * stride_node,int stride_height,const TfLitePadding padding_type,TensorID * output_tensor,TensorID * output_min_tensor,TensorID * output_max_tensor)125 void Conv2dOpBuilder::BuildDilatedDwConv(
126     const TfLiteIntArray* inputs, const TfLiteTensor& data_tensor,
127     const TfLiteTensor& output_data_tensor, OpBuilder* data_min_const,
128     OpBuilder* data_max_const, OpBuilder* conv_output_min_const,
129     OpBuilder* conv_output_max_const, OpBuilder* stride_node, int stride_height,
130     const TfLitePadding padding_type, TensorID* output_tensor,
131     TensorID* output_min_tensor, TensorID* output_max_tensor) {
132   static std::vector<int> dilation_factors_shape = {1, 1, 1, 2};
133   static std::vector<int> paddings_shape = {1, 1, 2, 2};
134   // Output dimensions.
135   int output_batch_size, output_height_size, output_width_size,
136       output_depth_size;
137   GetDims(&output_batch_size, &output_height_size, &output_width_size,
138           &output_depth_size, output_data_tensor.dims);
139   // For dilated Depthwise Conv, we convert this node into SpaceToBatchND, and
140   // then chain Supernode & BatchToSpaceND after it.
141   int input_batch_size, input_height_size, input_width_size, input_depth_size;
142   GetDims(&input_batch_size, &input_height_size, &input_width_size,
143           &input_depth_size, data_tensor.dims);
144   ComputeSpaceToBatchParams(input_height_size, input_width_size,
145                             weight_shape_[0], weight_shape_[1],
146                             dilation_factors_h_w_, padding_type,
147                             &space_to_batch_paddings_, &batch_to_space_crops_);
148   auto* dilation_factors_const = graph_builder_->AddConstNodeWithData(
149       dilation_factors_shape.data(),
150       reinterpret_cast<char*>(dilation_factors_h_w_.data()),
151       dilation_factors_h_w_.size() * sizeof(stride_height));
152   auto* paddings_const = graph_builder_->AddConstNodeWithData(
153       paddings_shape.data(),
154       reinterpret_cast<char*>(space_to_batch_paddings_.data()),
155       space_to_batch_paddings_.size() * sizeof(stride_height));
156   auto* crops_const = graph_builder_->AddConstNodeWithData(
157       paddings_shape.data(),
158       reinterpret_cast<char*>(batch_to_space_crops_.data()),
159       batch_to_space_crops_.size() * sizeof(stride_height));
160 
161   // 1. SpaceToBatch.
162   SetOpType(OP_SpaceToBatchND_8);
163   AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
164   AddInput(TensorID(dilation_factors_const->GetID(), 0));
165   AddInput(TensorID(paddings_const->GetID(), 0));
166   AddInput(TensorID(data_min_const->GetID(), 0));
167   AddInput(TensorID(data_max_const->GetID(), 0));
168   std::vector<int> space_to_batch_output_dims;
169   PopulateSpaceToBatchOutputDims(
170       input_batch_size, input_height_size, input_width_size, input_depth_size,
171       dilation_factors_h_w_, space_to_batch_paddings_,
172       &space_to_batch_output_dims);
173   TensorID space_to_batch_op_out =
174       AddOutput(sizeof(uint8_t), 4, space_to_batch_output_dims);
175   AddOutput(sizeof(float), 4, kScalarShape);
176   AddOutput(sizeof(float), 4, kScalarShape);
177 
178   // 2. Depthwise Conv.
179   auto* conv_op = graph_builder_->AddNode(GetTFLiteNodeID());
180   conv_op->SetOpType(OP_DepthwiseSupernode_8x8p32to8);
181   conv_op->AddInput(space_to_batch_op_out);
182   conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1]));
183   conv_op->AddInput(TensorID(data_min_const->GetID(), 0));
184   conv_op->AddInput(TensorID(data_max_const->GetID(), 0));
185   conv_op->AddInput(TensorID(weights_min_node_->GetID(), 0));
186   conv_op->AddInput(TensorID(weights_max_node_->GetID(), 0));
187   conv_op->AddInput(TensorID(stride_node->GetID(), 0));
188   conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2]));
189   conv_op->AddInput(TensorID(bias_min_node_->GetID(), 0));
190   conv_op->AddInput(TensorID(bias_max_node_->GetID(), 0));
191   conv_op->AddInput(TensorID(conv_output_min_const->GetID(), 0));
192   conv_op->AddInput(TensorID(conv_output_max_const->GetID(), 0));
193   if (per_channel_quant_.channel_scales_node != nullptr) {
194     conv_op->AddInput(
195         TensorID(per_channel_quant_.channel_scales_node->GetID(), 0));
196   }
197   // The padding is handled by the SpaceToBatch/BatchToSpace ops surrounding
198   // this node. Hence, this op's padding remains VALID only.
199   // tf.nn.with_space_to_batch's docs state the following pattern:
200   // """
201   // batch_to_space_nd(
202   //  op(space_to_batch_nd(input, adjusted_dilation_rate, adjusted_paddings),
203   //     num_spatial_dims,
204   //     "VALID")
205   //  adjusted_dilation_rate,
206   //  adjusted_crops)
207   // """
208   conv_op->SetPaddingType(NN_PAD_VALID);
209   // These dimensions are probably a little excessive, but they upper-bound
210   // the possible output from DepthwiseConv.
211   // TODO(b/139955809): Find better bounds?
212   TensorID conv_output = conv_op->AddOutput(
213       sizeof(uint8_t), 4,
214       {output_batch_size * dilation_factors_h_w_[0] * dilation_factors_h_w_[1],
215        output_height_size, output_width_size, output_depth_size});
216   conv_op->AddOutput(sizeof(float), 4, kScalarShape);
217   conv_op->AddOutput(sizeof(float), 4, kScalarShape);
218 
219   // 3. BatchToSpace.
220   auto* batch_to_space_op = graph_builder_->AddNode(GetTFLiteNodeID());
221   batch_to_space_op->SetOpType(OP_BatchToSpaceND_8);
222   batch_to_space_op->AddInput(conv_output);
223   batch_to_space_op->AddInput(TensorID(dilation_factors_const->GetID(), 0));
224   batch_to_space_op->AddInput(TensorID(crops_const->GetID(), 0));
225   batch_to_space_op->AddInput(TensorID(conv_output_min_const->GetID(), 0));
226   batch_to_space_op->AddInput(TensorID(conv_output_max_const->GetID(), 0));
227   *output_tensor =
228       batch_to_space_op->AddOutput(sizeof(uint8_t), 4,
229                                    {output_batch_size, output_height_size,
230                                     output_width_size, output_depth_size});
231   *output_min_tensor =
232       batch_to_space_op->AddOutput(sizeof(float), 4, kScalarShape);
233   *output_max_tensor =
234       batch_to_space_op->AddOutput(sizeof(float), 4, kScalarShape);
235 }
236 
237 // Workaround for depthwise conv accuracy issues.
238 // See Conv2dOpBuilder.should_split_dwconv_ for details.
BuildSplittedDwConv(const TfLiteIntArray * inputs,const TfLiteTensor & data_tensor,const TfLiteTensor & output_data_tensor,OpBuilder * data_min_const,OpBuilder * data_max_const,OpBuilder * conv_output_min_const,OpBuilder * conv_output_max_const,OpBuilder * stride_node,const TfLitePadding padding_type,TensorID * output_tensor,TensorID * output_min_tensor,TensorID * output_max_tensor)239 void Conv2dOpBuilder::BuildSplittedDwConv(
240     const TfLiteIntArray* inputs, const TfLiteTensor& data_tensor,
241     const TfLiteTensor& output_data_tensor, OpBuilder* data_min_const,
242     OpBuilder* data_max_const, OpBuilder* conv_output_min_const,
243     OpBuilder* conv_output_max_const, OpBuilder* stride_node,
244     const TfLitePadding padding_type, TensorID* output_tensor,
245     TensorID* output_min_tensor, TensorID* output_max_tensor) {
246   // Input dimensions.
247   int input_batch_size, input_height_size, input_width_size, input_depth_size;
248   GetDims(&input_batch_size, &input_height_size, &input_width_size,
249           &input_depth_size, data_tensor.dims);
250   // Output dimensions.
251   int output_batch_size, output_height_size, output_width_size,
252       output_depth_size;
253   GetDims(&output_batch_size, &output_height_size, &output_width_size,
254           &output_depth_size, output_data_tensor.dims);
255 
256   auto* split = this;
257   split->SetOpType(OP_QuantizedSplit_8);
258   int32_t dim_channel = 3;
259   auto* dimension_const = graph_builder_->AddConstNodeWithData(
260       kScalarShape, reinterpret_cast<char*>(&dim_channel), sizeof(dim_channel));
261   split->AddInput(TensorID(dimension_const->GetID(), 0));
262   split->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
263   split->AddInput(TensorID(data_min_const->GetID(), 0));
264   split->AddInput(TensorID(data_max_const->GetID(), 0));
265   std::vector<TensorID> data_nodes;
266   data_nodes.reserve(per_channel_quant_.splits);
267   for (auto i = 0; i < per_channel_quant_.splits; i++) {
268     data_nodes.emplace_back(
269         split->AddOutput(sizeof(uint8_t), 4,
270                          {input_batch_size, input_height_size, input_width_size,
271                           kDwConv5x5Filt2x2StrideChannelCount}));
272   }
273   auto data_min = split->AddOutput(sizeof(float), 4, kScalarShape);
274   auto data_max = split->AddOutput(sizeof(float), 4, kScalarShape);
275 
276   std::vector<TensorID> dconv_outputs, dconv_min, dconv_max;
277   for (auto i = 0; i < per_channel_quant_.splits; i++) {
278     auto* dw_conv = graph_builder_->AddNode(GetTFLiteNodeID());
279     dw_conv->SetOpType(OP_DepthwiseSupernode_8x8p32to8);
280     if (padding_type == kTfLitePaddingSame) {
281       dw_conv->SetPaddingType(NN_PAD_SAME);
282     } else if (padding_type == kTfLitePaddingValid) {
283       dw_conv->SetPaddingType(NN_PAD_VALID);
284     }
285     dw_conv->AddInput(data_nodes[i]);
286     dw_conv->AddInput(TensorID(weights_nodes_[i]->GetID(), 0));
287     dw_conv->AddInput(data_min);
288     dw_conv->AddInput(data_max);
289     dw_conv->AddInput(TensorID(weights_min_node_->GetID(), 0));
290     dw_conv->AddInput(TensorID(weights_max_node_->GetID(), 0));
291     dw_conv->AddInput(TensorID(stride_node->GetID(), 0));
292     dw_conv->AddInput(TensorID(bias_nodes_[i]->GetID(), 0));
293     dw_conv->AddInput(TensorID(bias_min_node_->GetID(), 0));
294     dw_conv->AddInput(TensorID(bias_max_node_->GetID(), 0));
295     dw_conv->AddInput(TensorID(conv_output_min_const->GetID(), 0));
296     dw_conv->AddInput(TensorID(conv_output_max_const->GetID(), 0));
297     dw_conv->AddInput(
298         TensorID(per_channel_quant_.channel_scales_nodes[i]->GetID(), 0));
299     dconv_outputs.push_back(dw_conv->AddOutput(
300         sizeof(uint8_t), 4,
301         {output_batch_size, output_height_size, output_width_size,
302          kDwConv5x5Filt2x2StrideChannelCount}));
303     dconv_min.push_back(dw_conv->AddOutput(sizeof(float), 4, kScalarShape));
304     dconv_max.push_back(dw_conv->AddOutput(sizeof(float), 4, kScalarShape));
305   }
306 
307   auto* concat = graph_builder_->AddNode(GetTFLiteNodeID());
308   concat->SetOpType(OP_QuantizedConcat_8);
309   concat->AddInput(TensorID(dimension_const->GetID(), 0));
310   for (auto i = 0; i < per_channel_quant_.splits; i++) {
311     concat->AddInput(dconv_outputs[i]);
312   }
313   for (auto i = 0; i < per_channel_quant_.splits; i++) {
314     concat->AddInput(dconv_min[i]);
315   }
316   for (auto i = 0; i < per_channel_quant_.splits; i++) {
317     concat->AddInput(dconv_max[i]);
318   }
319   concat->AddInput(TensorID(conv_output_min_const->GetID(), 0));
320   concat->AddInput(TensorID(conv_output_max_const->GetID(), 0));
321   *output_tensor = concat->AddOutput(sizeof(uint8_t), 4,
322                                      {output_batch_size, output_height_size,
323                                       output_width_size, output_depth_size});
324   *output_min_tensor = concat->AddOutput(sizeof(float), 4, kScalarShape);
325   *output_max_tensor = concat->AddOutput(sizeof(float), 4, kScalarShape);
326 }
327 
BuildStandardConv(const TfLiteIntArray * inputs,const TfLiteTensor & output_data_tensor,OpBuilder * data_min_const,OpBuilder * data_max_const,OpBuilder * conv_output_min_const,OpBuilder * conv_output_max_const,OpBuilder * stride_node,const TfLitePadding padding_type,TensorID * output_tensor,TensorID * output_min_tensor,TensorID * output_max_tensor)328 void Conv2dOpBuilder::BuildStandardConv(
329     const TfLiteIntArray* inputs, const TfLiteTensor& output_data_tensor,
330     OpBuilder* data_min_const, OpBuilder* data_max_const,
331     OpBuilder* conv_output_min_const, OpBuilder* conv_output_max_const,
332     OpBuilder* stride_node, const TfLitePadding padding_type,
333     TensorID* output_tensor, TensorID* output_min_tensor,
334     TensorID* output_max_tensor) {
335   // Standard case.
336   // Output dimensions.
337   int output_batch_size, output_height_size, output_width_size,
338       output_depth_size;
339   GetDims(&output_batch_size, &output_height_size, &output_width_size,
340           &output_depth_size, output_data_tensor.dims);
341   // Padding type.
342   if (padding_type == kTfLitePaddingSame) {
343     SetPaddingType(NN_PAD_SAME);
344   } else if (padding_type == kTfLitePaddingValid) {
345     SetPaddingType(NN_PAD_VALID);
346   }
347   // Inputs
348   AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
349   AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1]));
350   AddInput(TensorID(data_min_const->GetID(), 0));
351   AddInput(TensorID(data_max_const->GetID(), 0));
352   AddInput(TensorID(weights_min_node_->GetID(), 0));
353   AddInput(TensorID(weights_max_node_->GetID(), 0));
354   AddInput(TensorID(stride_node->GetID(), 0));
355   AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2]));
356   AddInput(TensorID(bias_min_node_->GetID(), 0));
357   AddInput(TensorID(bias_max_node_->GetID(), 0));
358   AddInput(TensorID(conv_output_min_const->GetID(), 0));
359   AddInput(TensorID(conv_output_max_const->GetID(), 0));
360   if (per_channel_quant_.channel_scales_node != nullptr) {
361     AddInput(TensorID(per_channel_quant_.channel_scales_node->GetID(), 0));
362   }
363   // Outputs
364   *output_tensor = AddOutput(sizeof(uint8_t), 4,
365                              {output_batch_size, output_height_size,
366                               output_width_size, output_depth_size});
367   *output_min_tensor = AddOutput(sizeof(float), 4, kScalarShape);
368   *output_max_tensor = AddOutput(sizeof(float), 4, kScalarShape);
369 }
370 
PopulateSubGraph(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)371 TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
372                                                const TfLiteIntArray* outputs,
373                                                TfLiteContext* context) {
374   // Input data tensor.
375   const auto& data_tensor = context->tensors[inputs->data[0]];
376   const auto& output_data_tensor = context->tensors[outputs->data[0]];
377   int input_batch_size, input_height_size, input_width_size, input_depth_size;
378   GetDims(&input_batch_size, &input_height_size, &input_width_size,
379           &input_depth_size, data_tensor.dims);
380   float data_min = 0;
381   float data_max = 0;
382   TF_LITE_ENSURE_STATUS(
383       ComputeMinAndMaxQuantValues(data_tensor, &data_min, &data_max));
384   auto* data_min_const = graph_builder_->AddConstNodeWithData(
385       kScalarShape, reinterpret_cast<char*>(&data_min), sizeof(data_min));
386   auto* data_max_const = graph_builder_->AddConstNodeWithData(
387       kScalarShape, reinterpret_cast<char*>(&data_max), sizeof(data_max));
388 
389   // Gather information about the Convolution operations.
390   TfLitePadding padding_type = kTfLitePaddingUnknown;
391   TfLiteFusedActivation activation = kTfLiteActNone;
392   int stride_height = 0;
393   int stride_width = 0;
394   bool is_dilated_depthwise_conv = false;
395   int channel_multiplier = 1;
396   if (op_node_.op_type == OP_Supernode_8x8p32to8) {
397     const TfLiteConvParams* conv_params =
398         reinterpret_cast<const TfLiteConvParams*>(builtin_data_);
399     stride_height = conv_params->stride_height;
400     stride_width = conv_params->stride_width;
401     padding_type = conv_params->padding;
402     activation = conv_params->activation;
403   } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) {
404     const TfLiteDepthwiseConvParams* conv_params =
405         reinterpret_cast<const TfLiteDepthwiseConvParams*>(builtin_data_);
406     stride_height = conv_params->stride_height;
407     stride_width = conv_params->stride_width;
408     padding_type = conv_params->padding;
409     activation = conv_params->activation;
410     channel_multiplier = conv_params->depth_multiplier;
411     // We only support dilation for DepthwiseConv.
412     if (conv_params->dilation_height_factor > 1 ||
413         conv_params->dilation_width_factor > 1) {
414       is_dilated_depthwise_conv = true;
415       dilation_factors_h_w_.push_back(conv_params->dilation_height_factor);
416       dilation_factors_h_w_.push_back(conv_params->dilation_width_factor);
417     }
418   }
419 
420   // Weights tensor
421   TF_LITE_ENSURE_STATUS(
422       InitializeWeightsNodes(inputs, outputs, context, input_depth_size));
423 
424   // Stride node.
425   static int dummy = 0;
426   stride_shape_ = {1, stride_height, stride_width, 1};
427   auto* stride_node = graph_builder_->AddConstNodeWithData(
428       stride_shape_.data(), reinterpret_cast<char*>(&dummy), sizeof(dummy));
429 
430   // Output dimensions.
431   int output_batch_size, output_height_size, output_width_size,
432       output_depth_size;
433   GetDims(&output_batch_size, &output_height_size, &output_width_size,
434           &output_depth_size, context->tensors[outputs->data[0]].dims);
435   // Output bounds.
436   // TODO(b/129276536): Add support for other activations here. Current
437   // implementation assumes None/Relu.
438   float output_min = 0;
439   float output_max = 0;
440   TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues(
441       context->tensors[outputs->data[0]], &output_min, &output_max));
442   // These denote the bounds fed to Hexagon's Conv mechanism, which will be
443   // different from the TFLite tensor bounds if there is a RELU activation.
444   float conv_output_min = output_min;
445   float conv_output_max = output_max;
446   if (activation == kTfLiteActRelu6) {
447     conv_output_min = 0;
448     conv_output_max = 6;
449   } else if (activation == kTfLiteActReluN1To1) {
450     conv_output_min = -1;
451     conv_output_max = 1;
452   } else if (activation == kTfLiteActRelu) {
453     conv_output_min = 0;
454   }
455   auto* conv_output_min_const = graph_builder_->AddConstNodeWithData(
456       kScalarShape, reinterpret_cast<char*>(&conv_output_min),
457       sizeof(conv_output_min));
458   auto* conv_output_max_const = graph_builder_->AddConstNodeWithData(
459       kScalarShape, reinterpret_cast<char*>(&conv_output_max),
460       sizeof(conv_output_max));
461 
462   // Bias node.
463   TF_LITE_ENSURE_STATUS(InitializeBiasNodes(inputs, outputs, context));
464 
465   // TODO(b/143759564): Simplify this method when depth_multiplier support needs
466   // generalizing.
467   if (channel_multiplier > 1 && input_depth_size == 1) {
468     // Depthwise Conv with input_depth == 1 & channel_multiplier > 1 is
469     // equivalent to Conv.
470     SetOpType(OP_Supernode_8x8p32to8);
471   } else if (channel_multiplier > 1) {
472     TF_LITE_KERNEL_LOG(
473         context, "depth_multiplier > 1 not supported with input_depth > 1");
474     return kTfLiteError;
475   }
476 
477   TensorID output_tensor, output_min_tensor, output_max_tensor;
478   if (is_dilated_depthwise_conv) {
479     BuildDilatedDwConv(inputs, data_tensor, output_data_tensor, data_min_const,
480                        data_max_const, conv_output_min_const,
481                        conv_output_max_const, stride_node, stride_height,
482                        padding_type, &output_tensor, &output_min_tensor,
483                        &output_max_tensor);
484   } else if (should_split_dwconv_) {
485     BuildSplittedDwConv(inputs, data_tensor, output_data_tensor, data_min_const,
486                         data_max_const, conv_output_min_const,
487                         conv_output_max_const, stride_node, padding_type,
488                         &output_tensor, &output_min_tensor, &output_max_tensor);
489   } else {
490     BuildStandardConv(inputs, output_data_tensor, data_min_const,
491                       data_max_const, conv_output_min_const,
492                       conv_output_max_const, stride_node, padding_type,
493                       &output_tensor, &output_min_tensor, &output_max_tensor);
494   }
495 
496   // Requantize if activation was not None & the TFLite tensor's min/max is
497   // different (diff > 1e-2) from the RELU bounds.
498   const float min_bound_diff = std::abs(conv_output_min - output_min);
499   const float max_bound_diff = std::abs(conv_output_max - output_max);
500   if (activation != kTfLiteActNone &&
501       (min_bound_diff > 0.01 || max_bound_diff > 0.01)) {
502     auto* requantized_min_const = graph_builder_->AddConstNodeWithData(
503         kScalarShape, reinterpret_cast<char*>(&output_min), sizeof(output_min));
504     auto* requantized_max_const = graph_builder_->AddConstNodeWithData(
505         kScalarShape, reinterpret_cast<char*>(&output_max), sizeof(output_max));
506     auto* requantize_op = graph_builder_->AddNode(GetTFLiteNodeID());
507     requantize_op->SetOpType(OP_Requantize_8to8);
508     requantize_op->AddInput(output_tensor);
509     requantize_op->AddInput(output_min_tensor);
510     requantize_op->AddInput(output_max_tensor);
511     requantize_op->AddInput(TensorID(requantized_min_const->GetID(), 0));
512     requantize_op->AddInput(TensorID(requantized_max_const->GetID(), 0));
513     node_output_ =
514         requantize_op->AddOutput(sizeof(uint8_t), 4,
515                                  {output_batch_size, output_height_size,
516                                   output_width_size, output_depth_size});
517     requantize_op->AddOutput(sizeof(float), 4, kScalarShape);
518     requantize_op->AddOutput(sizeof(float), 4, kScalarShape);
519   } else {
520     node_output_ = output_tensor;
521   }
522 
523   return kTfLiteOk;
524 }
525 
RegisterOutputs(const TfLiteIntArray * outputs,TfLiteContext * context)526 TfLiteStatus Conv2dOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
527                                               TfLiteContext* context) {
528   // Should be only 1 output.
529   graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
530                                   node_output_.second);
531   return kTfLiteOk;
532 }
533 
~Conv2dOpBuilder()534 Conv2dOpBuilder::~Conv2dOpBuilder() {}
535 
CreateConv2DBuilder(GraphBuilder * graph_builder,int op_type)536 OpBuilder* CreateConv2DBuilder(GraphBuilder* graph_builder, int op_type) {
537   return new Conv2dOpBuilder(graph_builder, op_type);
538 }
539 
540 }  // namespace hexagon
541 }  // namespace delegates
542 }  // namespace tflite
543