1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h"
16
17 #include <stdint.h>
18
19 #include <cmath>
20 #include <limits>
21
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/delegates/hexagon/builders/op_builder.h"
24 #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h"
25 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
26 #include "tensorflow/lite/kernels/kernel_util.h"
27
28 namespace tflite {
29 namespace delegates {
30 namespace hexagon {
31 namespace {
32
33 // Channel count to split depthwise convolution op.
34 // See Conv2dOpBuilder.should_split_dwconv_ for details.
35 constexpr int kDwConv5x5Filt2x2StrideChannelCount = 32;
36
37 // Dilated Depthwise Convolution performs SpaceToBatchND & BatchToSpaceND before
38 // and after the op respectively.
39 // This helper computes the paddings param for SpaceToBatchND and crops param
40 // for BatchToSpaceND.
41 //
42 // Inspired by tf.nn.with_space_to_batch & tf.required_space_to_batch_paddings.
ComputeSpaceToBatchParams(int input_height,int input_width,int weights_height,int weights_width,const std::vector<int> & dilation_factors_h_w,const TfLitePadding padding_type,std::vector<int> * paddings,std::vector<int> * crops)43 void ComputeSpaceToBatchParams(int input_height, int input_width,
44 int weights_height, int weights_width,
45 const std::vector<int>& dilation_factors_h_w,
46 const TfLitePadding padding_type,
47 std::vector<int>* paddings,
48 std::vector<int>* crops) {
49 // Base paddings depend on padding applied to the Depthwise Conv op.
50 // 4-element array: {top, bottom, left, right}.
51 std::vector<int> base_paddings(4, 0);
52 if (padding_type == kTfLitePaddingSame) {
53 const int dilated_weights_h =
54 dilation_factors_h_w[0] * (weights_height - 1) + 1;
55 const int dilated_weights_w =
56 dilation_factors_h_w[1] * (weights_width - 1) + 1;
57 base_paddings[0] = (dilated_weights_h - 1) / 2;
58 base_paddings[1] = dilated_weights_h - 1 - (dilated_weights_h - 1) / 2;
59 base_paddings[2] = (dilated_weights_w - 1) / 2;
60 base_paddings[3] = dilated_weights_w - 1 - (dilated_weights_w - 1) / 2;
61 }
62
63 // paddings represents {pad_top, pad_bottom, pad_left, pad_right}.
64 paddings->resize(4, 0);
65 // crops represents {crop_top, crop_bottom, crop_left, crop_right}.
66 crops->resize(4, 0);
67
68 // Logic for computing paddings & crops follows.
69 // Taken from tf.required_space_to_batch_paddings, but without array
70 // operations since we only deal with 2 dimensions.
71 int pad_start_h = base_paddings[0];
72 int pad_start_w = base_paddings[2];
73 int orig_pad_end_h = base_paddings[1];
74 int orig_pad_end_w = base_paddings[3];
75 int full_input_h = input_height + pad_start_h + orig_pad_end_h;
76 int full_input_w = input_width + pad_start_w + orig_pad_end_w;
77 int pad_end_extra_h =
78 (dilation_factors_h_w[0] - full_input_h % dilation_factors_h_w[0]) %
79 dilation_factors_h_w[0];
80 int pad_end_extra_w =
81 (dilation_factors_h_w[1] - full_input_w % dilation_factors_h_w[1]) %
82 dilation_factors_h_w[1];
83 int pad_end_h = orig_pad_end_h + pad_end_extra_h;
84 int pad_end_w = orig_pad_end_w + pad_end_extra_w;
85
86 // Assign values.
87 (*paddings)[0] = pad_start_h;
88 (*paddings)[1] = pad_end_h;
89 (*paddings)[2] = pad_start_w;
90 (*paddings)[3] = pad_end_w;
91 (*crops)[0] = 0;
92 (*crops)[1] = pad_end_extra_h;
93 (*crops)[2] = 0;
94 (*crops)[3] = pad_end_extra_w;
95 }
96
97 // Computes output dimensions for the SpaceToBatchND op used in the dilated
98 // Depthwise Conv case.
99 // space_to_batch_paddings should be in format {top, bottom, left, right}.
100 // These are computed from the documentation for SpaceToBatchND_8's output.
PopulateSpaceToBatchOutputDims(int input_batch_size,int input_height_size,int input_width_size,int input_depth_size,const std::vector<int> & dilation_factors_h_w,const std::vector<int> & space_to_batch_paddings,std::vector<int> * space_to_batch_output_dims)101 void PopulateSpaceToBatchOutputDims(
102 int input_batch_size, int input_height_size, int input_width_size,
103 int input_depth_size, const std::vector<int>& dilation_factors_h_w,
104 const std::vector<int>& space_to_batch_paddings,
105 std::vector<int>* space_to_batch_output_dims) {
106 // Batches.
107 space_to_batch_output_dims->push_back(
108 input_batch_size * dilation_factors_h_w[0] * dilation_factors_h_w[1]);
109 // Height.
110 space_to_batch_output_dims->push_back((space_to_batch_paddings[0] +
111 input_height_size +
112 space_to_batch_paddings[1]) /
113 dilation_factors_h_w[0]);
114 // Width.
115 space_to_batch_output_dims->push_back((space_to_batch_paddings[2] +
116 input_width_size +
117 space_to_batch_paddings[3]) /
118 dilation_factors_h_w[1]);
119 // Depth.
120 space_to_batch_output_dims->push_back(input_depth_size);
121 }
122
123 } // namespace
124
BuildDilatedDwConv(const TfLiteIntArray * inputs,const TfLiteTensor & data_tensor,const TfLiteTensor & output_data_tensor,OpBuilder * data_min_const,OpBuilder * data_max_const,OpBuilder * conv_output_min_const,OpBuilder * conv_output_max_const,OpBuilder * stride_node,int stride_height,const TfLitePadding padding_type,TensorID * output_tensor,TensorID * output_min_tensor,TensorID * output_max_tensor)125 void Conv2dOpBuilder::BuildDilatedDwConv(
126 const TfLiteIntArray* inputs, const TfLiteTensor& data_tensor,
127 const TfLiteTensor& output_data_tensor, OpBuilder* data_min_const,
128 OpBuilder* data_max_const, OpBuilder* conv_output_min_const,
129 OpBuilder* conv_output_max_const, OpBuilder* stride_node, int stride_height,
130 const TfLitePadding padding_type, TensorID* output_tensor,
131 TensorID* output_min_tensor, TensorID* output_max_tensor) {
132 static std::vector<int> dilation_factors_shape = {1, 1, 1, 2};
133 static std::vector<int> paddings_shape = {1, 1, 2, 2};
134 // Output dimensions.
135 int output_batch_size, output_height_size, output_width_size,
136 output_depth_size;
137 GetDims(&output_batch_size, &output_height_size, &output_width_size,
138 &output_depth_size, output_data_tensor.dims);
139 // For dilated Depthwise Conv, we convert this node into SpaceToBatchND, and
140 // then chain Supernode & BatchToSpaceND after it.
141 int input_batch_size, input_height_size, input_width_size, input_depth_size;
142 GetDims(&input_batch_size, &input_height_size, &input_width_size,
143 &input_depth_size, data_tensor.dims);
144 ComputeSpaceToBatchParams(input_height_size, input_width_size,
145 weight_shape_[0], weight_shape_[1],
146 dilation_factors_h_w_, padding_type,
147 &space_to_batch_paddings_, &batch_to_space_crops_);
148 auto* dilation_factors_const = graph_builder_->AddConstNodeWithData(
149 dilation_factors_shape.data(),
150 reinterpret_cast<char*>(dilation_factors_h_w_.data()),
151 dilation_factors_h_w_.size() * sizeof(stride_height));
152 auto* paddings_const = graph_builder_->AddConstNodeWithData(
153 paddings_shape.data(),
154 reinterpret_cast<char*>(space_to_batch_paddings_.data()),
155 space_to_batch_paddings_.size() * sizeof(stride_height));
156 auto* crops_const = graph_builder_->AddConstNodeWithData(
157 paddings_shape.data(),
158 reinterpret_cast<char*>(batch_to_space_crops_.data()),
159 batch_to_space_crops_.size() * sizeof(stride_height));
160
161 // 1. SpaceToBatch.
162 SetOpType(OP_SpaceToBatchND_8);
163 AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
164 AddInput(TensorID(dilation_factors_const->GetID(), 0));
165 AddInput(TensorID(paddings_const->GetID(), 0));
166 AddInput(TensorID(data_min_const->GetID(), 0));
167 AddInput(TensorID(data_max_const->GetID(), 0));
168 std::vector<int> space_to_batch_output_dims;
169 PopulateSpaceToBatchOutputDims(
170 input_batch_size, input_height_size, input_width_size, input_depth_size,
171 dilation_factors_h_w_, space_to_batch_paddings_,
172 &space_to_batch_output_dims);
173 TensorID space_to_batch_op_out =
174 AddOutput(sizeof(uint8_t), 4, space_to_batch_output_dims);
175 AddOutput(sizeof(float), 4, kScalarShape);
176 AddOutput(sizeof(float), 4, kScalarShape);
177
178 // 2. Depthwise Conv.
179 auto* conv_op = graph_builder_->AddNode(GetTFLiteNodeID());
180 conv_op->SetOpType(OP_DepthwiseSupernode_8x8p32to8);
181 conv_op->AddInput(space_to_batch_op_out);
182 conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1]));
183 conv_op->AddInput(TensorID(data_min_const->GetID(), 0));
184 conv_op->AddInput(TensorID(data_max_const->GetID(), 0));
185 conv_op->AddInput(TensorID(weights_min_node_->GetID(), 0));
186 conv_op->AddInput(TensorID(weights_max_node_->GetID(), 0));
187 conv_op->AddInput(TensorID(stride_node->GetID(), 0));
188 conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2]));
189 conv_op->AddInput(TensorID(bias_min_node_->GetID(), 0));
190 conv_op->AddInput(TensorID(bias_max_node_->GetID(), 0));
191 conv_op->AddInput(TensorID(conv_output_min_const->GetID(), 0));
192 conv_op->AddInput(TensorID(conv_output_max_const->GetID(), 0));
193 if (per_channel_quant_.channel_scales_node != nullptr) {
194 conv_op->AddInput(
195 TensorID(per_channel_quant_.channel_scales_node->GetID(), 0));
196 }
197 // The padding is handled by the SpaceToBatch/BatchToSpace ops surrounding
198 // this node. Hence, this op's padding remains VALID only.
199 // tf.nn.with_space_to_batch's docs state the following pattern:
200 // """
201 // batch_to_space_nd(
202 // op(space_to_batch_nd(input, adjusted_dilation_rate, adjusted_paddings),
203 // num_spatial_dims,
204 // "VALID")
205 // adjusted_dilation_rate,
206 // adjusted_crops)
207 // """
208 conv_op->SetPaddingType(NN_PAD_VALID);
209 // These dimensions are probably a little excessive, but they upper-bound
210 // the possible output from DepthwiseConv.
211 // TODO(b/139955809): Find better bounds?
212 TensorID conv_output = conv_op->AddOutput(
213 sizeof(uint8_t), 4,
214 {output_batch_size * dilation_factors_h_w_[0] * dilation_factors_h_w_[1],
215 output_height_size, output_width_size, output_depth_size});
216 conv_op->AddOutput(sizeof(float), 4, kScalarShape);
217 conv_op->AddOutput(sizeof(float), 4, kScalarShape);
218
219 // 3. BatchToSpace.
220 auto* batch_to_space_op = graph_builder_->AddNode(GetTFLiteNodeID());
221 batch_to_space_op->SetOpType(OP_BatchToSpaceND_8);
222 batch_to_space_op->AddInput(conv_output);
223 batch_to_space_op->AddInput(TensorID(dilation_factors_const->GetID(), 0));
224 batch_to_space_op->AddInput(TensorID(crops_const->GetID(), 0));
225 batch_to_space_op->AddInput(TensorID(conv_output_min_const->GetID(), 0));
226 batch_to_space_op->AddInput(TensorID(conv_output_max_const->GetID(), 0));
227 *output_tensor =
228 batch_to_space_op->AddOutput(sizeof(uint8_t), 4,
229 {output_batch_size, output_height_size,
230 output_width_size, output_depth_size});
231 *output_min_tensor =
232 batch_to_space_op->AddOutput(sizeof(float), 4, kScalarShape);
233 *output_max_tensor =
234 batch_to_space_op->AddOutput(sizeof(float), 4, kScalarShape);
235 }
236
237 // Workaround for depthwise conv accuracy issues.
238 // See Conv2dOpBuilder.should_split_dwconv_ for details.
BuildSplittedDwConv(const TfLiteIntArray * inputs,const TfLiteTensor & data_tensor,const TfLiteTensor & output_data_tensor,OpBuilder * data_min_const,OpBuilder * data_max_const,OpBuilder * conv_output_min_const,OpBuilder * conv_output_max_const,OpBuilder * stride_node,const TfLitePadding padding_type,TensorID * output_tensor,TensorID * output_min_tensor,TensorID * output_max_tensor)239 void Conv2dOpBuilder::BuildSplittedDwConv(
240 const TfLiteIntArray* inputs, const TfLiteTensor& data_tensor,
241 const TfLiteTensor& output_data_tensor, OpBuilder* data_min_const,
242 OpBuilder* data_max_const, OpBuilder* conv_output_min_const,
243 OpBuilder* conv_output_max_const, OpBuilder* stride_node,
244 const TfLitePadding padding_type, TensorID* output_tensor,
245 TensorID* output_min_tensor, TensorID* output_max_tensor) {
246 // Input dimensions.
247 int input_batch_size, input_height_size, input_width_size, input_depth_size;
248 GetDims(&input_batch_size, &input_height_size, &input_width_size,
249 &input_depth_size, data_tensor.dims);
250 // Output dimensions.
251 int output_batch_size, output_height_size, output_width_size,
252 output_depth_size;
253 GetDims(&output_batch_size, &output_height_size, &output_width_size,
254 &output_depth_size, output_data_tensor.dims);
255
256 auto* split = this;
257 split->SetOpType(OP_QuantizedSplit_8);
258 int32_t dim_channel = 3;
259 auto* dimension_const = graph_builder_->AddConstNodeWithData(
260 kScalarShape, reinterpret_cast<char*>(&dim_channel), sizeof(dim_channel));
261 split->AddInput(TensorID(dimension_const->GetID(), 0));
262 split->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
263 split->AddInput(TensorID(data_min_const->GetID(), 0));
264 split->AddInput(TensorID(data_max_const->GetID(), 0));
265 std::vector<TensorID> data_nodes;
266 data_nodes.reserve(per_channel_quant_.splits);
267 for (auto i = 0; i < per_channel_quant_.splits; i++) {
268 data_nodes.emplace_back(
269 split->AddOutput(sizeof(uint8_t), 4,
270 {input_batch_size, input_height_size, input_width_size,
271 kDwConv5x5Filt2x2StrideChannelCount}));
272 }
273 auto data_min = split->AddOutput(sizeof(float), 4, kScalarShape);
274 auto data_max = split->AddOutput(sizeof(float), 4, kScalarShape);
275
276 std::vector<TensorID> dconv_outputs, dconv_min, dconv_max;
277 for (auto i = 0; i < per_channel_quant_.splits; i++) {
278 auto* dw_conv = graph_builder_->AddNode(GetTFLiteNodeID());
279 dw_conv->SetOpType(OP_DepthwiseSupernode_8x8p32to8);
280 if (padding_type == kTfLitePaddingSame) {
281 dw_conv->SetPaddingType(NN_PAD_SAME);
282 } else if (padding_type == kTfLitePaddingValid) {
283 dw_conv->SetPaddingType(NN_PAD_VALID);
284 }
285 dw_conv->AddInput(data_nodes[i]);
286 dw_conv->AddInput(TensorID(weights_nodes_[i]->GetID(), 0));
287 dw_conv->AddInput(data_min);
288 dw_conv->AddInput(data_max);
289 dw_conv->AddInput(TensorID(weights_min_node_->GetID(), 0));
290 dw_conv->AddInput(TensorID(weights_max_node_->GetID(), 0));
291 dw_conv->AddInput(TensorID(stride_node->GetID(), 0));
292 dw_conv->AddInput(TensorID(bias_nodes_[i]->GetID(), 0));
293 dw_conv->AddInput(TensorID(bias_min_node_->GetID(), 0));
294 dw_conv->AddInput(TensorID(bias_max_node_->GetID(), 0));
295 dw_conv->AddInput(TensorID(conv_output_min_const->GetID(), 0));
296 dw_conv->AddInput(TensorID(conv_output_max_const->GetID(), 0));
297 dw_conv->AddInput(
298 TensorID(per_channel_quant_.channel_scales_nodes[i]->GetID(), 0));
299 dconv_outputs.push_back(dw_conv->AddOutput(
300 sizeof(uint8_t), 4,
301 {output_batch_size, output_height_size, output_width_size,
302 kDwConv5x5Filt2x2StrideChannelCount}));
303 dconv_min.push_back(dw_conv->AddOutput(sizeof(float), 4, kScalarShape));
304 dconv_max.push_back(dw_conv->AddOutput(sizeof(float), 4, kScalarShape));
305 }
306
307 auto* concat = graph_builder_->AddNode(GetTFLiteNodeID());
308 concat->SetOpType(OP_QuantizedConcat_8);
309 concat->AddInput(TensorID(dimension_const->GetID(), 0));
310 for (auto i = 0; i < per_channel_quant_.splits; i++) {
311 concat->AddInput(dconv_outputs[i]);
312 }
313 for (auto i = 0; i < per_channel_quant_.splits; i++) {
314 concat->AddInput(dconv_min[i]);
315 }
316 for (auto i = 0; i < per_channel_quant_.splits; i++) {
317 concat->AddInput(dconv_max[i]);
318 }
319 concat->AddInput(TensorID(conv_output_min_const->GetID(), 0));
320 concat->AddInput(TensorID(conv_output_max_const->GetID(), 0));
321 *output_tensor = concat->AddOutput(sizeof(uint8_t), 4,
322 {output_batch_size, output_height_size,
323 output_width_size, output_depth_size});
324 *output_min_tensor = concat->AddOutput(sizeof(float), 4, kScalarShape);
325 *output_max_tensor = concat->AddOutput(sizeof(float), 4, kScalarShape);
326 }
327
BuildStandardConv(const TfLiteIntArray * inputs,const TfLiteTensor & output_data_tensor,OpBuilder * data_min_const,OpBuilder * data_max_const,OpBuilder * conv_output_min_const,OpBuilder * conv_output_max_const,OpBuilder * stride_node,const TfLitePadding padding_type,TensorID * output_tensor,TensorID * output_min_tensor,TensorID * output_max_tensor)328 void Conv2dOpBuilder::BuildStandardConv(
329 const TfLiteIntArray* inputs, const TfLiteTensor& output_data_tensor,
330 OpBuilder* data_min_const, OpBuilder* data_max_const,
331 OpBuilder* conv_output_min_const, OpBuilder* conv_output_max_const,
332 OpBuilder* stride_node, const TfLitePadding padding_type,
333 TensorID* output_tensor, TensorID* output_min_tensor,
334 TensorID* output_max_tensor) {
335 // Standard case.
336 // Output dimensions.
337 int output_batch_size, output_height_size, output_width_size,
338 output_depth_size;
339 GetDims(&output_batch_size, &output_height_size, &output_width_size,
340 &output_depth_size, output_data_tensor.dims);
341 // Padding type.
342 if (padding_type == kTfLitePaddingSame) {
343 SetPaddingType(NN_PAD_SAME);
344 } else if (padding_type == kTfLitePaddingValid) {
345 SetPaddingType(NN_PAD_VALID);
346 }
347 // Inputs
348 AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
349 AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1]));
350 AddInput(TensorID(data_min_const->GetID(), 0));
351 AddInput(TensorID(data_max_const->GetID(), 0));
352 AddInput(TensorID(weights_min_node_->GetID(), 0));
353 AddInput(TensorID(weights_max_node_->GetID(), 0));
354 AddInput(TensorID(stride_node->GetID(), 0));
355 AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2]));
356 AddInput(TensorID(bias_min_node_->GetID(), 0));
357 AddInput(TensorID(bias_max_node_->GetID(), 0));
358 AddInput(TensorID(conv_output_min_const->GetID(), 0));
359 AddInput(TensorID(conv_output_max_const->GetID(), 0));
360 if (per_channel_quant_.channel_scales_node != nullptr) {
361 AddInput(TensorID(per_channel_quant_.channel_scales_node->GetID(), 0));
362 }
363 // Outputs
364 *output_tensor = AddOutput(sizeof(uint8_t), 4,
365 {output_batch_size, output_height_size,
366 output_width_size, output_depth_size});
367 *output_min_tensor = AddOutput(sizeof(float), 4, kScalarShape);
368 *output_max_tensor = AddOutput(sizeof(float), 4, kScalarShape);
369 }
370
PopulateSubGraph(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)371 TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
372 const TfLiteIntArray* outputs,
373 TfLiteContext* context) {
374 // Input data tensor.
375 const auto& data_tensor = context->tensors[inputs->data[0]];
376 const auto& output_data_tensor = context->tensors[outputs->data[0]];
377 int input_batch_size, input_height_size, input_width_size, input_depth_size;
378 GetDims(&input_batch_size, &input_height_size, &input_width_size,
379 &input_depth_size, data_tensor.dims);
380 float data_min = 0;
381 float data_max = 0;
382 TF_LITE_ENSURE_STATUS(
383 ComputeMinAndMaxQuantValues(data_tensor, &data_min, &data_max));
384 auto* data_min_const = graph_builder_->AddConstNodeWithData(
385 kScalarShape, reinterpret_cast<char*>(&data_min), sizeof(data_min));
386 auto* data_max_const = graph_builder_->AddConstNodeWithData(
387 kScalarShape, reinterpret_cast<char*>(&data_max), sizeof(data_max));
388
389 // Gather information about the Convolution operations.
390 TfLitePadding padding_type = kTfLitePaddingUnknown;
391 TfLiteFusedActivation activation = kTfLiteActNone;
392 int stride_height = 0;
393 int stride_width = 0;
394 bool is_dilated_depthwise_conv = false;
395 int channel_multiplier = 1;
396 if (op_node_.op_type == OP_Supernode_8x8p32to8) {
397 const TfLiteConvParams* conv_params =
398 reinterpret_cast<const TfLiteConvParams*>(builtin_data_);
399 stride_height = conv_params->stride_height;
400 stride_width = conv_params->stride_width;
401 padding_type = conv_params->padding;
402 activation = conv_params->activation;
403 } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) {
404 const TfLiteDepthwiseConvParams* conv_params =
405 reinterpret_cast<const TfLiteDepthwiseConvParams*>(builtin_data_);
406 stride_height = conv_params->stride_height;
407 stride_width = conv_params->stride_width;
408 padding_type = conv_params->padding;
409 activation = conv_params->activation;
410 channel_multiplier = conv_params->depth_multiplier;
411 // We only support dilation for DepthwiseConv.
412 if (conv_params->dilation_height_factor > 1 ||
413 conv_params->dilation_width_factor > 1) {
414 is_dilated_depthwise_conv = true;
415 dilation_factors_h_w_.push_back(conv_params->dilation_height_factor);
416 dilation_factors_h_w_.push_back(conv_params->dilation_width_factor);
417 }
418 }
419
420 // Weights tensor
421 TF_LITE_ENSURE_STATUS(
422 InitializeWeightsNodes(inputs, outputs, context, input_depth_size));
423
424 // Stride node.
425 static int dummy = 0;
426 stride_shape_ = {1, stride_height, stride_width, 1};
427 auto* stride_node = graph_builder_->AddConstNodeWithData(
428 stride_shape_.data(), reinterpret_cast<char*>(&dummy), sizeof(dummy));
429
430 // Output dimensions.
431 int output_batch_size, output_height_size, output_width_size,
432 output_depth_size;
433 GetDims(&output_batch_size, &output_height_size, &output_width_size,
434 &output_depth_size, context->tensors[outputs->data[0]].dims);
435 // Output bounds.
436 // TODO(b/129276536): Add support for other activations here. Current
437 // implementation assumes None/Relu.
438 float output_min = 0;
439 float output_max = 0;
440 TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues(
441 context->tensors[outputs->data[0]], &output_min, &output_max));
442 // These denote the bounds fed to Hexagon's Conv mechanism, which will be
443 // different from the TFLite tensor bounds if there is a RELU activation.
444 float conv_output_min = output_min;
445 float conv_output_max = output_max;
446 if (activation == kTfLiteActRelu6) {
447 conv_output_min = 0;
448 conv_output_max = 6;
449 } else if (activation == kTfLiteActReluN1To1) {
450 conv_output_min = -1;
451 conv_output_max = 1;
452 } else if (activation == kTfLiteActRelu) {
453 conv_output_min = 0;
454 }
455 auto* conv_output_min_const = graph_builder_->AddConstNodeWithData(
456 kScalarShape, reinterpret_cast<char*>(&conv_output_min),
457 sizeof(conv_output_min));
458 auto* conv_output_max_const = graph_builder_->AddConstNodeWithData(
459 kScalarShape, reinterpret_cast<char*>(&conv_output_max),
460 sizeof(conv_output_max));
461
462 // Bias node.
463 TF_LITE_ENSURE_STATUS(InitializeBiasNodes(inputs, outputs, context));
464
465 // TODO(b/143759564): Simplify this method when depth_multiplier support needs
466 // generalizing.
467 if (channel_multiplier > 1 && input_depth_size == 1) {
468 // Depthwise Conv with input_depth == 1 & channel_multiplier > 1 is
469 // equivalent to Conv.
470 SetOpType(OP_Supernode_8x8p32to8);
471 } else if (channel_multiplier > 1) {
472 TF_LITE_KERNEL_LOG(
473 context, "depth_multiplier > 1 not supported with input_depth > 1");
474 return kTfLiteError;
475 }
476
477 TensorID output_tensor, output_min_tensor, output_max_tensor;
478 if (is_dilated_depthwise_conv) {
479 BuildDilatedDwConv(inputs, data_tensor, output_data_tensor, data_min_const,
480 data_max_const, conv_output_min_const,
481 conv_output_max_const, stride_node, stride_height,
482 padding_type, &output_tensor, &output_min_tensor,
483 &output_max_tensor);
484 } else if (should_split_dwconv_) {
485 BuildSplittedDwConv(inputs, data_tensor, output_data_tensor, data_min_const,
486 data_max_const, conv_output_min_const,
487 conv_output_max_const, stride_node, padding_type,
488 &output_tensor, &output_min_tensor, &output_max_tensor);
489 } else {
490 BuildStandardConv(inputs, output_data_tensor, data_min_const,
491 data_max_const, conv_output_min_const,
492 conv_output_max_const, stride_node, padding_type,
493 &output_tensor, &output_min_tensor, &output_max_tensor);
494 }
495
496 // Requantize if activation was not None & the TFLite tensor's min/max is
497 // different (diff > 1e-2) from the RELU bounds.
498 const float min_bound_diff = std::abs(conv_output_min - output_min);
499 const float max_bound_diff = std::abs(conv_output_max - output_max);
500 if (activation != kTfLiteActNone &&
501 (min_bound_diff > 0.01 || max_bound_diff > 0.01)) {
502 auto* requantized_min_const = graph_builder_->AddConstNodeWithData(
503 kScalarShape, reinterpret_cast<char*>(&output_min), sizeof(output_min));
504 auto* requantized_max_const = graph_builder_->AddConstNodeWithData(
505 kScalarShape, reinterpret_cast<char*>(&output_max), sizeof(output_max));
506 auto* requantize_op = graph_builder_->AddNode(GetTFLiteNodeID());
507 requantize_op->SetOpType(OP_Requantize_8to8);
508 requantize_op->AddInput(output_tensor);
509 requantize_op->AddInput(output_min_tensor);
510 requantize_op->AddInput(output_max_tensor);
511 requantize_op->AddInput(TensorID(requantized_min_const->GetID(), 0));
512 requantize_op->AddInput(TensorID(requantized_max_const->GetID(), 0));
513 node_output_ =
514 requantize_op->AddOutput(sizeof(uint8_t), 4,
515 {output_batch_size, output_height_size,
516 output_width_size, output_depth_size});
517 requantize_op->AddOutput(sizeof(float), 4, kScalarShape);
518 requantize_op->AddOutput(sizeof(float), 4, kScalarShape);
519 } else {
520 node_output_ = output_tensor;
521 }
522
523 return kTfLiteOk;
524 }
525
RegisterOutputs(const TfLiteIntArray * outputs,TfLiteContext * context)526 TfLiteStatus Conv2dOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
527 TfLiteContext* context) {
528 // Should be only 1 output.
529 graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
530 node_output_.second);
531 return kTfLiteOk;
532 }
533
~Conv2dOpBuilder()534 Conv2dOpBuilder::~Conv2dOpBuilder() {}
535
CreateConv2DBuilder(GraphBuilder * graph_builder,int op_type)536 OpBuilder* CreateConv2DBuilder(GraphBuilder* graph_builder, int op_type) {
537 return new Conv2dOpBuilder(graph_builder, op_type);
538 }
539
540 } // namespace hexagon
541 } // namespace delegates
542 } // namespace tflite
543