xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/tools/optimize/quantization_utils.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
16 
17 #include <algorithm>
18 #include <cmath>
19 #include <cstdint>
20 #include <iostream>
21 #include <memory>
22 #include <string>
23 
24 #include "absl/memory/memory.h"
25 #include "third_party/eigen3/Eigen/Core"
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/core/api/error_reporter.h"
28 #include "tensorflow/lite/kernels/internal/cppmath.h"
29 #include "tensorflow/lite/kernels/internal/quantization_util.h"
30 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
31 #include "tensorflow/lite/kernels/internal/types.h"
32 #include "tensorflow/lite/minimal_logging.h"
33 #include "tensorflow/lite/schema/schema_generated.h"
34 #include "tensorflow/lite/tools/optimize/model_utils.h"
35 
36 namespace tflite {
37 namespace optimize {
38 namespace utils {
39 
40 namespace {
41 const int8_t kMinQuantizedValue = -127;
42 const int8_t kMaxQuantizedValue = 127;
43 
44 // The maximum number of dimensions supported in per-channel quantization.
45 constexpr int kPerChannelMaxDim = 4;
46 }  // namespace
47 
NumElements(const TensorT & tensor,uint64_t * num_elements)48 TfLiteStatus NumElements(const TensorT& tensor, uint64_t* num_elements) {
49   *num_elements = 1;
50   for (const int64_t dim : tensor.shape) {
51     if (dim <= 0 || *num_elements > UINT64_MAX / static_cast<uint64_t>(dim)) {
52       return kTfLiteError;
53     }
54     *num_elements *= dim;
55   }
56   return kTfLiteOk;
57 }
58 
59 // Nudge min and max so that floating point 0 falls exactly on a quantized
60 // value, returning the nudges scale and zero_point.
61 //
62 // Although this code originates from FakeQuantization in quantized training,
63 // we may deviate from that implementation as we please since we do not fine
64 // tune the weights with quantized training.
GetAsymmetricQuantizationParams(float min,float max,const int quant_min,const int quant_max,QuantizationParametersT * quantization_params)65 void GetAsymmetricQuantizationParams(
66     float min, float max, const int quant_min, const int quant_max,
67     QuantizationParametersT* quantization_params) {
68   const float quant_min_float = static_cast<float>(quant_min);
69   const float quant_max_float = static_cast<float>(quant_max);
70   // Adjust the boundaries to guarantee 0 is included.
71   min = std::min(static_cast<float>(min), 0.0f);
72   max = std::max(static_cast<float>(max), 0.0f);
73   const float scale = (max - min) / (quant_max_float - quant_min_float);
74   // Scale can be zero if min and max are exactly 0.0f.
75   float zero_point_from_min = quant_min_float;
76   if (scale != 0) {
77     zero_point_from_min = quant_min_float - min / scale;
78   }
79   int64_t zero_point;
80   if (zero_point_from_min < quant_min_float) {
81     zero_point = static_cast<int64_t>(quant_min);
82   } else if (zero_point_from_min > quant_max_float) {
83     zero_point = static_cast<int64_t>(quant_max);
84   } else {
85     zero_point = static_cast<int64_t>(std::round(zero_point_from_min));
86   }
87   quantization_params->min = std::vector<float>(1, min);
88   quantization_params->max = std::vector<float>(1, max);
89   quantization_params->scale = std::vector<float>(1, scale);
90   quantization_params->zero_point = std::vector<int64_t>(1, zero_point);
91 }
92 
GetSymmetricQuantizationParams(float min,float max,const int half_quant_range,QuantizationParametersT * quantization_params)93 void GetSymmetricQuantizationParams(
94     float min, float max, const int half_quant_range,
95     QuantizationParametersT* quantization_params) {
96   // Adjust the boundaries to guarantee 0 is included.
97   min = std::min(min, 0.0f);
98   max = std::max(max, 0.0f);
99   const float scale = std::max(std::abs(max), std::abs(min)) / half_quant_range;
100   quantization_params->min = std::vector<float>(1, min);
101   quantization_params->max = std::vector<float>(1, max);
102   quantization_params->scale = std::vector<float>(1, scale);
103   quantization_params->zero_point = std::vector<int64_t>(1, 0);
104 }
105 
GetQuantizationParams(TensorT * tensor,TensorType activations_type,QuantizationParametersT * quantization_params,ErrorReporter * error_reporter)106 TfLiteStatus GetQuantizationParams(TensorT* tensor, TensorType activations_type,
107                                    QuantizationParametersT* quantization_params,
108                                    ErrorReporter* error_reporter) {
109   if (activations_type == TensorType_INT8) {
110     GetAsymmetricQuantizationParams(
111         tensor->quantization->min[0], tensor->quantization->max[0],
112         std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
113         quantization_params);
114   } else if (activations_type == TensorType_INT16) {
115     const int half_quantized_range = 32767;
116     GetSymmetricQuantizationParams(tensor->quantization->min[0],
117                                    tensor->quantization->max[0],
118                                    half_quantized_range, quantization_params);
119   } else {
120     TF_LITE_REPORT_ERROR(
121         error_reporter,
122         "Unsupported activation type for quantize-activation: %d",
123         activations_type);
124     return kTfLiteError;
125   }
126   return kTfLiteOk;
127 }
128 
129 // Set the max and min quantization parameter for a single tensor given its
130 // values.
FillSingleMinMax(const float * const input,const uint64_t input_size,QuantizationParametersT * quantization_params)131 void FillSingleMinMax(const float* const input, const uint64_t input_size,
132                       QuantizationParametersT* quantization_params) {
133   const auto minmax = std::minmax_element(input, input + input_size);
134   quantization_params->min.assign(1, *minmax.first);
135   quantization_params->max.assign(1, *minmax.second);
136 }
137 
FillPerChannelMinMax(const float * const input,const std::vector<int32_t> & dimension,int32_t channel_dim_index,QuantizationParametersT * quantization_params,ErrorReporter * error_reporter)138 TfLiteStatus FillPerChannelMinMax(const float* const input,
139                                   const std::vector<int32_t>& dimension,
140                                   int32_t channel_dim_index,
141                                   QuantizationParametersT* quantization_params,
142                                   ErrorReporter* error_reporter) {
143   if (!quantization_params->min.empty() || !quantization_params->max.empty()) {
144     TF_LITE_REPORT_ERROR(
145         error_reporter,
146         "Min or max already present in tensor quantization params.");
147     return kTfLiteError;
148   }
149 
150   if (dimension.size() > kPerChannelMaxDim) {
151     TF_LITE_REPORT_ERROR(
152         error_reporter,
153         "Expected tensor with less than %d dimensions, but got %d.",
154         kPerChannelMaxDim + 1, dimension.size());
155     return kTfLiteError;
156   }
157   if (channel_dim_index >= dimension.size()) {
158     TF_LITE_REPORT_ERROR(
159         error_reporter,
160         "Expected channel_dim_index to be less than %d, but got %d.",
161         dimension.size(), channel_dim_index);
162     return kTfLiteError;
163   }
164 
165   const int32_t channel_dim_size = dimension[channel_dim_index];
166   quantization_params->quantized_dimension = channel_dim_index;
167   quantization_params->min = std::vector<float>(channel_dim_size);
168   quantization_params->max = std::vector<float>(channel_dim_size);
169   std::vector<bool> has_min_max_value(channel_dim_size, false);
170   int indices[kPerChannelMaxDim];
171   RuntimeShape unextended_tensor_dims(dimension.size(), dimension.data());
172   RuntimeShape tensor_dims =
173       RuntimeShape::ExtendedShape(kPerChannelMaxDim, unextended_tensor_dims);
174   channel_dim_index +=
175       kPerChannelMaxDim - unextended_tensor_dims.DimensionsCount();
176 
177   // Compute min max ranges per channel
178   for (indices[0] = 0; indices[0] < tensor_dims.Dims(0); indices[0]++) {
179     for (indices[1] = 0; indices[1] < tensor_dims.Dims(1); indices[1]++) {
180       for (indices[2] = 0; indices[2] < tensor_dims.Dims(2); indices[2]++) {
181         for (indices[3] = 0; indices[3] < tensor_dims.Dims(3); indices[3]++) {
182           int channel_idx = indices[channel_dim_index];
183           const float val = input[Offset(tensor_dims, indices)];
184           if (has_min_max_value[channel_idx]) {
185             if (quantization_params->min[channel_idx] > val) {
186               quantization_params->min[channel_idx] = val;
187             } else if (quantization_params->max[channel_idx] < val) {
188               quantization_params->max[channel_idx] = val;
189             }
190           } else {
191             quantization_params->min[channel_idx] = val;
192             quantization_params->max[channel_idx] = val;
193             has_min_max_value[channel_idx] = true;
194           }
195         }
196       }
197     }
198   }
199   return kTfLiteOk;
200 }
201 
202 // Populates the scales vector based on max and min values of quant_params
GetSymmetricScalesFromMaxMin(QuantizationParametersT * quant_params,std::vector<float> * scales,ErrorReporter * error_reporter)203 TfLiteStatus GetSymmetricScalesFromMaxMin(QuantizationParametersT* quant_params,
204                                           std::vector<float>* scales,
205                                           ErrorReporter* error_reporter) {
206   // Check that max and min values are present and their sizes match.
207   if (quant_params->min.empty() || quant_params->max.empty()) {
208     TF_LITE_REPORT_ERROR(error_reporter,
209                          "Max and min values are not populated.");
210     return kTfLiteError;
211   }
212   if (quant_params->min.size() != quant_params->max.size()) {
213     TF_LITE_REPORT_ERROR(error_reporter,
214                          "Dimensions of max and min values do not match.");
215     return kTfLiteError;
216   }
217   if (scales->size() != quant_params->min.size()) {
218     TF_LITE_REPORT_ERROR(error_reporter,
219                          "Provided scale vector has incorrect size.");
220     return kTfLiteError;
221   }
222 
223   // num_channels is calculated from min.size() to infer whether quantization
224   // is per axis.
225   int num_channels = quant_params->min.size();
226   // Calculate scales per channel.
227   for (int channel_idx = 0; channel_idx < num_channels; ++channel_idx) {
228     const float half_range = std::max(std::abs(quant_params->min[channel_idx]),
229                                       std::abs(quant_params->max[channel_idx]));
230     scales->at(channel_idx) = half_range / kMaxQuantizedValue;
231   }
232   return kTfLiteOk;
233 }
234 
235 // Checks that the bias is quantized to within the middle half of the
236 // allowable bit range determined by the scales of the input and weight tensors.
237 // If this condition is not satisfied, the scale of the weights is increased in
238 // order to prevent overflow. The scale of the bias is not set here, only the
239 // min/max.
240 // The quant_params are the quantization parameters that correspond to the
241 // weight tensor.
AdjustWeightsForBiasScale(QuantizationParametersT * quant_params,const float * bias_data,const size_t bias_size,const float input_scale,ErrorReporter * error_reporter)242 TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params,
243                                        const float* bias_data,
244                                        const size_t bias_size,
245                                        const float input_scale,
246                                        ErrorReporter* error_reporter) {
247   // TODO(dmolitor) Allow adjusting activation scale.
248   // TODO(dmolitor) Tighten scale adjustment.
249   // TODO(dmolitor) Test using a separate strategy for scales of 0.
250   const int32_t kScale = std::numeric_limits<int32_t>::max();
251   if (quant_params == nullptr) {
252     TF_LITE_REPORT_ERROR(error_reporter,
253                          "Missing max and min values for weight tensor.");
254     return kTfLiteError;
255   }
256   // channel_dim_size is calculated from min.size() to infer whether
257   // quantization is per axis
258   int channel_dim_size = quant_params->min.size();
259   if (channel_dim_size == 0) {
260     TF_LITE_REPORT_ERROR(
261         error_reporter,
262         "Missing weight scales. Unable to check compatibility with bias "
263         "scale.");
264     return kTfLiteError;
265   }
266 
267   std::vector<float> weight_scales(channel_dim_size);
268   TF_LITE_ENSURE_STATUS(GetSymmetricScalesFromMaxMin(
269       quant_params, &weight_scales, error_reporter));
270 
271   // Per channel quantization
272   if (channel_dim_size > 1) {
273     for (int i = 0; i < channel_dim_size; ++i) {
274       // Current scale is not compatible with bias. Adjust max/min values.
275       if (std::abs(bias_data[i]) >=
276           0.5 * input_scale * weight_scales[i] * kScale) {
277         quant_params->max[i] = 2.0 * std::abs(bias_data[i]) / kScale *
278                                (kMaxQuantizedValue / input_scale);
279         quant_params->min[i] = -quant_params->max[i];
280       }
281     }
282     // Per layer quantization
283   } else if (channel_dim_size == 1) {
284     const auto minmax = std::minmax_element(bias_data, bias_data + bias_size);
285     const float bias_half_range =
286         std::max(std::abs(*minmax.first), std::abs(*minmax.second));
287 
288     // Need to adjust weight min/max; not compatible with bias.
289     if (bias_half_range / kScale >= 0.5 * input_scale * weight_scales[0]) {
290       quant_params->min[0] =
291           2.0 * bias_half_range / kScale * (kMinQuantizedValue / input_scale);
292       quant_params->max[0] =
293           2.0 * bias_half_range / kScale * (kMaxQuantizedValue / input_scale);
294     }
295   }
296   return kTfLiteOk;
297 }
298 
299 // Per-channel quantize a tensor at the given index and fills both scales and
300 // quantized values.
SymmetricPerChannelQuantization(TensorT * tensor,const float * const input,int32_t channel_dim_index,std::vector<float> * output_scales,std::vector<int8_t> * output_value,ErrorReporter * error_reporter)301 TfLiteStatus SymmetricPerChannelQuantization(TensorT* tensor,
302                                              const float* const input,
303                                              int32_t channel_dim_index,
304                                              std::vector<float>* output_scales,
305                                              std::vector<int8_t>* output_value,
306                                              ErrorReporter* error_reporter) {
307   if (tensor == nullptr) {
308     TF_LITE_REPORT_ERROR(error_reporter, "Cannot quantize. Tensor is null.");
309     return kTfLiteError;
310   }
311   const int32_t channel_dim_size = tensor->shape[channel_dim_index];
312   // Fill per channel max and min values if needed
313   if (tensor->quantization == nullptr) {
314     tensor->quantization = std::make_unique<QuantizationParametersT>();
315   }
316   if (!HasMinMax(tensor)) {
317     TF_LITE_ENSURE_STATUS(
318         FillPerChannelMinMax(input, tensor->shape, channel_dim_index,
319                              tensor->quantization.get(), error_reporter));
320   }
321 
322   // Calculate scales per channel using max and min values from tensor.
323   std::vector<float> scale_invs(channel_dim_size);
324   const float half_scale = kMaxQuantizedValue;
325   for (int channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) {
326     const float half_range =
327         std::max(std::abs(tensor->quantization->min[channel_idx]),
328                  std::abs(tensor->quantization->max[channel_idx]));
329     output_scales->at(channel_idx) = half_range / half_scale;
330     if (half_range == 0) {
331       scale_invs[channel_idx] = 0;
332     } else {
333       scale_invs[channel_idx] = half_scale / half_range;
334     }
335   }
336 
337   // Quantize the input values.
338   SymmetricPerChannelQuantizeValues(input, scale_invs, tensor->shape,
339                                     channel_dim_index, output_value);
340   return kTfLiteOk;
341 }
342 
SymmetricQuantizeFloatsToInt16(const float * data,uint64_t num_elements,float scaling_factor)343 std::vector<int16_t> SymmetricQuantizeFloatsToInt16(const float* data,
344                                                     uint64_t num_elements,
345                                                     float scaling_factor) {
346   // Compute the inverse of scale.
347   const float scaling_factor_inv =
348       (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
349   std::vector<int16_t> buffer(num_elements);
350   const int32_t kScale = std::numeric_limits<int16_t>::max();
351 
352   for (size_t i = 0; i < num_elements; i++) {
353     const int32_t quantized_value =
354         static_cast<int32_t>(TfLiteRound(data[i] * scaling_factor_inv));
355     buffer[i] = std::min(kScale, std::max(-kScale, quantized_value));
356   }
357   return buffer;
358 }
359 
SymmetricQuantizeFloatsToInt16(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)360 TfLiteStatus SymmetricQuantizeFloatsToInt16(ModelT* model, TensorT* tensor,
361                                             float scaling_factor,
362                                             ErrorReporter* error_reporter) {
363   const BufferT* buffer = model->buffers[tensor->buffer].get();
364   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
365   uint64_t num_elements;
366   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
367 
368   auto final_buffer =
369       SymmetricQuantizeFloatsToInt16(float_data, num_elements, scaling_factor);
370   // Set the buffers and output type.
371   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
372   size_t buffer_size = num_elements * sizeof(int16_t);
373   std::vector<float> scales(1, scaling_factor);
374   std::vector<int64_t> zero_points(1, 0);
375   return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
376                                buffer_size, TensorType_INT16, model, tensor,
377                                error_reporter);
378 }
379 
SymmetricPerChannelQuantizeValues(const float * const input,const std::vector<float> & scales_inv,const std::vector<int32_t> & dimension,int32_t channel_dim_index,std::vector<int8_t> * output_value)380 void SymmetricPerChannelQuantizeValues(const float* const input,
381                                        const std::vector<float>& scales_inv,
382                                        const std::vector<int32_t>& dimension,
383                                        int32_t channel_dim_index,
384                                        std::vector<int8_t>* output_value) {
385   // Quantize the values.
386   int indices[kPerChannelMaxDim];
387   RuntimeShape unextended_tensor_dims(dimension.size(), dimension.data());
388   RuntimeShape tensor_dims =
389       RuntimeShape::ExtendedShape(kPerChannelMaxDim, unextended_tensor_dims);
390   channel_dim_index +=
391       kPerChannelMaxDim - unextended_tensor_dims.DimensionsCount();
392   for (indices[0] = 0; indices[0] < tensor_dims.Dims(0); indices[0]++) {
393     for (indices[1] = 0; indices[1] < tensor_dims.Dims(1); indices[1]++) {
394       for (indices[2] = 0; indices[2] < tensor_dims.Dims(2); indices[2]++) {
395         for (indices[3] = 0; indices[3] < tensor_dims.Dims(3); indices[3]++) {
396           int channel_idx = indices[channel_dim_index];
397           int index = Offset(tensor_dims, indices);
398           const float val = input[index];
399           const int32_t quantized_value =
400               static_cast<int32_t>(TfLiteRound(val * scales_inv[channel_idx]));
401           output_value->at(index) = std::min<int8_t>(
402               kMaxQuantizedValue,
403               std::max<int8_t>(kMinQuantizedValue, quantized_value));
404         }
405       }
406     }
407   }
408 }
409 
410 // Quantize the tensor using the max and min values recorded in its quantization
411 // parameters. Applies per-layer quantization.
SymmetricQuantizeTensorFromMinMax(ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)412 TfLiteStatus SymmetricQuantizeTensorFromMinMax(ModelT* model, TensorT* tensor,
413                                                ErrorReporter* error_reporter) {
414   if (model == nullptr || tensor == nullptr) {
415     TF_LITE_REPORT_ERROR(error_reporter, "No tensor to quantize.");
416     return kTfLiteError;
417   }
418 
419   BufferT* buffer = model->buffers[tensor->buffer].get();
420   if (buffer == nullptr) {
421     TF_LITE_REPORT_ERROR(error_reporter, "Missing buffer.");
422     return kTfLiteError;
423   }
424 
425   if (!HasMinMax(tensor)) {
426     TF_LITE_REPORT_ERROR(error_reporter,
427                          "Missing min or max values for quantization.");
428     return kTfLiteError;
429   }
430   if (tensor->quantization->min.size() != 1 ||
431       tensor->quantization->max.size() != 1) {
432     TF_LITE_REPORT_ERROR(error_reporter,
433                          "Expected single entry in max and min.");
434     return kTfLiteError;
435   }
436 
437   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
438   uint64_t num_elements;
439   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
440 
441   std::vector<int8_t> quantized_buffer;
442   quantized_buffer.resize(num_elements);
443 
444   // Quantize tensor using recorded min and max values
445   float scaling_factor;
446   tensor_utils::SymmetricQuantizeFloats(
447       float_data, num_elements, quantized_buffer.data(),
448       tensor->quantization->min[0], tensor->quantization->max[0],
449       &scaling_factor);
450   tensor->quantization->scale = std::vector<float>(1, scaling_factor);
451   tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
452 
453   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
454   model->buffers[tensor->buffer]->data.assign(uint8_buffer,
455                                               uint8_buffer + num_elements);
456   // Update the tensor type.
457   tensor->type = TensorType_INT8;
458 
459   return kTfLiteOk;
460 }
461 
SymmetricQuantizeTensor(ModelT * model,TensorT * tensor)462 TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) {
463   if (model == nullptr || tensor == nullptr) {
464     TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
465     return kTfLiteError;
466   }
467 
468   BufferT* buffer = model->buffers[tensor->buffer].get();
469   if (buffer == nullptr) {
470     TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
471     return kTfLiteError;
472   }
473   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
474   uint64_t num_elements;
475   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
476 
477   std::vector<int8_t> quantized_buffer;
478   quantized_buffer.resize(num_elements);
479 
480   float min_value, max_value, scaling_factor;
481   tensor_utils::SymmetricQuantizeFloats(float_data, num_elements,
482                                         quantized_buffer.data(), &min_value,
483                                         &max_value, &scaling_factor);
484 
485   if (tensor->quantization == nullptr) {
486     tensor->quantization = std::make_unique<QuantizationParametersT>();
487   }
488   tensor->quantization->scale = std::vector<float>(1, scaling_factor);
489   tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
490 
491   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
492   model->buffers[tensor->buffer]->data.assign(uint8_buffer,
493                                               uint8_buffer + num_elements);
494 
495   // Update the tensor type.
496   tensor->type = TensorType_INT8;
497 
498   return kTfLiteOk;
499 }
500 
QuantizeTensorFloat16(ModelT * model,TensorT * tensor)501 TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) {
502   if (model == nullptr || tensor == nullptr) {
503     TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
504     return kTfLiteError;
505   }
506 
507   BufferT* buffer = model->buffers[tensor->buffer].get();
508   if (buffer == nullptr) {
509     TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
510     return kTfLiteError;
511   }
512 
513   uint64_t num_elements;
514   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
515 
516   // Copy single byte buffer data to float vector to guard against misalignment.
517   std::vector<float> float_vector(num_elements);
518   uint8_t* first = buffer->data.data();
519   std::copy(first, first + buffer->data.size(),
520             reinterpret_cast<uint8_t*>(float_vector.data()));
521 
522   // Transform float data to float16.
523   std::vector<Eigen::half> quantized_buffer;
524   quantized_buffer.resize(num_elements);
525   constexpr float kMaxFloat16Value = 65504.f;
526   constexpr float kMinFloat16Value = -65504.f;
527   std::transform(float_vector.begin(), float_vector.end(),
528                  quantized_buffer.begin(), [=](float a) {
529                    float clamped = std::min(std::max(a, kMinFloat16Value),
530                                             kMaxFloat16Value);
531                    return static_cast<Eigen::half>(clamped);
532                  });
533 
534   char* half_buffer = reinterpret_cast<char*>(quantized_buffer.data());
535   model->buffers[tensor->buffer]->data.assign(
536       half_buffer, half_buffer + sizeof(Eigen::half) * num_elements);
537 
538   // Update the tensor type.
539   tensor->type = TensorType_FLOAT16;
540 
541   return kTfLiteOk;
542 }
543 
AddQuantizationParams(const std::vector<float> & scales,const std::vector<int64_t> & zero_point,int quantized_dimension,const uint8_t * buffer_data,size_t buffer_size,TensorType output_type,ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)544 TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
545                                    const std::vector<int64_t>& zero_point,
546                                    int quantized_dimension,
547                                    const uint8_t* buffer_data,
548                                    size_t buffer_size, TensorType output_type,
549                                    ModelT* model, TensorT* tensor,
550                                    ErrorReporter* error_reporter) {
551   if (tensor->quantization == nullptr) {
552     tensor->quantization = std::make_unique<QuantizationParametersT>();
553   }
554   tensor->quantization->scale.assign(scales.begin(), scales.end());
555   if (zero_point.size() != scales.size()) {
556     TF_LITE_REPORT_ERROR(
557         error_reporter,
558         "Received zero_point of size %d and scales of size %d. "
559         "These sizes should match.",
560         zero_point.size(), scales.size());
561     return kTfLiteError;
562   }
563   tensor->quantization->zero_point.assign(zero_point.begin(), zero_point.end());
564   tensor->quantization->quantized_dimension = quantized_dimension;
565   model->buffers[tensor->buffer]->data.assign(buffer_data,
566                                               buffer_data + buffer_size);
567   // Update the tensor type.
568   tensor->type = output_type;
569   return kTfLiteOk;
570 }
571 
SymmetricQuantizeTensorPerChannel(ModelT * model,TensorT * tensor,int32_t channel_dim_index,ErrorReporter * error_reporter)572 TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
573                                                int32_t channel_dim_index,
574                                                ErrorReporter* error_reporter) {
575   if (tensor->shape.size() > kPerChannelMaxDim) {
576     TF_LITE_REPORT_ERROR(
577         error_reporter,
578         "SymmetricQuantizeTensorPerChannel requires tensor with less than %d "
579         "dimensions, but got %d dimension(s).",
580         kPerChannelMaxDim + 1, tensor->shape.size());
581     return kTfLiteError;
582   }
583 
584   // Get dimensions.
585   uint64_t num_elements;
586   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
587   const int32_t channel_dim_size = tensor->shape[channel_dim_index];
588 
589   // Get input float data.
590   const BufferT* buffer = model->buffers[tensor->buffer].get();
591   const float* float_input_data =
592       reinterpret_cast<const float*>(buffer->data.data());
593 
594   // Create container for output scale and output data.
595   std::vector<float> scales(channel_dim_size);
596   std::vector<int8_t> final_buffer(num_elements);
597 
598   // Quantize the input data with respect to channel_dim_index.
599   TF_LITE_ENSURE_STATUS(SymmetricPerChannelQuantization(
600       tensor, float_input_data, channel_dim_index, &scales, &final_buffer,
601       error_reporter));
602 
603   // Set the buffers and output type.
604   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
605   const size_t buffer_size = num_elements * sizeof(int8_t);
606   std::vector<int64_t> zero_point(scales.size(), 0);
607   return AddQuantizationParams(scales, zero_point, channel_dim_index,
608                                uint8_buffer, buffer_size, TensorType_INT8,
609                                model, tensor, error_reporter);
610 }
611 
612 template <class BiasType>
SymmetricBiasQuantize(const float * data,uint64_t num_elements,const std::vector<float> & scales)613 std::vector<BiasType> SymmetricBiasQuantize(const float* data,
614                                             uint64_t num_elements,
615                                             const std::vector<float>& scales) {
616   std::vector<BiasType> buffer(num_elements);
617   const BiasType kScale = std::numeric_limits<BiasType>::max();
618   float scaling_factor_inv_per_layer = (scales[0] == 0) ? 0 : 1.0 / scales[0];
619 
620   for (int32_t idx = 0; idx < num_elements; idx++) {
621     float scaling_factor_inv =
622         scales.size() == 1 ? scaling_factor_inv_per_layer
623                            : ((scales[idx] == 0) ? 0 : 1.0 / scales[idx]);
624     const BiasType quantized_value =
625         tflite::SafeCast<BiasType>(TfLiteRound(data[idx] * scaling_factor_inv));
626     buffer[idx] = std::min(kScale, std::max(-kScale, quantized_value));
627   }
628   return buffer;
629 }
630 
631 template std::vector<std::int32_t> SymmetricBiasQuantize<std::int32_t>(
632     const float* data, uint64_t num_elements, const std::vector<float>& scales);
633 
634 template <class BiasType>
SymmetricPerLayerBiasQuantize(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)635 TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor,
636                                            float scaling_factor,
637                                            ErrorReporter* error_reporter) {
638   const BufferT* buffer = model->buffers[tensor->buffer].get();
639   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
640   uint64_t num_elements;
641   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
642 
643   auto final_buffer = SymmetricBiasQuantize<BiasType>(float_data, num_elements,
644                                                       {scaling_factor});
645 
646   // Set the buffers and output type.
647   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
648   size_t buffer_size = num_elements * sizeof(BiasType);
649   std::vector<float> scales(1, scaling_factor);
650   std::vector<int64_t> zero_points(1, 0);
651 
652   auto output_type = std::is_same<BiasType, std::int32_t>::value
653                          ? TensorType_INT32
654                          : TensorType_INT64;
655   return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
656                                buffer_size, output_type, model, tensor,
657                                error_reporter);
658 }
659 
660 template TfLiteStatus SymmetricPerLayerBiasQuantize<std::int32_t>(
661     ModelT* model, TensorT* tensor, float scaling_factor,
662     ErrorReporter* error_reporter);
663 
664 template TfLiteStatus SymmetricPerLayerBiasQuantize<std::int64_t>(
665     ModelT* model, TensorT* tensor, float scaling_factor,
666     ErrorReporter* error_reporter);
667 
668 template <class BiasType>
SymmetricPerChannelBiasQuantize(ModelT * model,TensorT * tensor,float input_scale,const float * weight_scales,int number_of_dimension,ErrorReporter * error_reporter)669 TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
670                                              float input_scale,
671                                              const float* weight_scales,
672                                              int number_of_dimension,
673                                              ErrorReporter* error_reporter) {
674   // Compute scales.
675   std::vector<float> scales(number_of_dimension);
676   for (int i = 0; i < number_of_dimension; i++) {
677     scales[i] = input_scale * weight_scales[i];
678   }
679 
680   const BufferT* buffer = model->buffers[tensor->buffer].get();
681   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
682   uint64_t num_elements;
683   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
684 
685   auto final_buffer =
686       SymmetricBiasQuantize<BiasType>(float_data, num_elements, scales);
687 
688   // Set the buffers and output type.
689   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
690   size_t buffer_size = num_elements * sizeof(BiasType);
691   std::vector<int64_t> zero_point(scales.size(), 0);
692 
693   auto output_type = std::is_same<BiasType, std::int32_t>::value
694                          ? TensorType_INT32
695                          : TensorType_INT64;
696   return AddQuantizationParams(scales, zero_point, 0, uint8_buffer, buffer_size,
697                                output_type, model, tensor, error_reporter);
698 }
699 
700 template TfLiteStatus SymmetricPerChannelBiasQuantize<std::int64_t>(
701     ModelT* model, TensorT* tensor, float input_scale,
702     const float* weight_scales, int number_of_dimension,
703     ErrorReporter* error_reporter);
704 
705 template TfLiteStatus SymmetricPerChannelBiasQuantize<std::int32_t>(
706     ModelT* model, TensorT* tensor, float input_scale,
707     const float* weight_scales, int number_of_dimension,
708     ErrorReporter* error_reporter);
709 
QuantizeWeight(ModelT * model,TensorT * tensor,bool per_channel,int per_axis_index,ErrorReporter * error_reporter)710 TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
711                             int per_axis_index, ErrorReporter* error_reporter) {
712   // TODO(suharshs): Currently we conflate quantizing weights and constants. Its
713   // possible that the right thing to do is asymmetric quantize the weight. Add
714   // support for this.
715   if (per_channel) {
716     return SymmetricQuantizeTensorPerChannel(model, tensor, per_axis_index,
717                                              error_reporter);
718   } else if (HasMinMax(tensor) && (tensor->quantization->min.size() == 1) &&
719              (tensor->quantization->max.size() == 1)) {
720     // Quantize using recorded min/max values if per-tensor.
721     return SymmetricQuantizeTensorFromMinMax(model, tensor, error_reporter);
722   } else {
723     // Quantize using min/max from buffer.
724     return SymmetricQuantizeTensor(model, tensor);
725   }
726 }
727 
GetEffectiveScale(ModelT * model,SubGraphT * subgraph,int op_idx,std::vector<int> input_index,std::vector<int> intermediate_index,std::vector<float> factors)728 float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
729                         std::vector<int> input_index,
730                         std::vector<int> intermediate_index,
731                         std::vector<float> factors) {
732   float scale = 1.0f;
733   OperatorT* op = subgraph->operators[op_idx].get();
734   for (int i = 0, end = input_index.size(); i < end; ++i) {
735     const int index_local = input_index[i];
736     const int index_global = op->inputs[index_local];
737     const TensorT* tensor = subgraph->tensors[index_global].get();
738     scale *= tensor->quantization->scale[0];
739   }
740   for (int i = 0, end = intermediate_index.size(); i < end; ++i) {
741     const int index_local = intermediate_index[i];
742     const int index_global = op->intermediates[index_local];
743     const TensorT* tensor = subgraph->tensors[index_global].get();
744     scale *= tensor->quantization->scale[0];
745   }
746   for (int i = 0, end = factors.size(); i < end; ++i) {
747     scale *= factors[i];
748   }
749   return scale;
750 }
751 
QuantizeActivation(TensorT * tensor,TensorType activations_type,ErrorReporter * error_reporter)752 TfLiteStatus QuantizeActivation(TensorT* tensor, TensorType activations_type,
753                                 ErrorReporter* error_reporter) {
754   TF_LITE_ENSURE_STATUS(GetQuantizationParams(
755       tensor, activations_type, tensor->quantization.get(), error_reporter));
756   tensor->type = activations_type;
757   return kTfLiteOk;
758 }
759 
QuantizeActivationToInt16(TensorT * tensor,float scale)760 TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale) {
761   const int32_t zero_point = 0;
762   tensor->quantization = std::make_unique<QuantizationParametersT>();
763   tensor->quantization->scale.push_back(scale);
764   tensor->quantization->zero_point.push_back(zero_point);
765   tensor->type = TensorType_INT16;
766   return kTfLiteOk;
767 }
768 
GetPowerOfTwoScale(float min,float max)769 int GetPowerOfTwoScale(float min, float max) {
770   const float range = std::max(std::abs(min), std::abs(max));
771   int pot = 0;
772   for (int i = 0; i < 10; i++) {
773     // NOTE: use std::pow() for bitwise accuracy.
774     if (std::pow(2, pot) < range) {  // NOLINT
775       pot++;
776     }
777   }
778   return pot;
779 }
780 
781 }  // namespace utils
782 }  // namespace optimize
783 }  // namespace tflite
784