xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/tools/optimize/quantization_utils.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_QUANTIZATION_UTILS_H_
16 #define TENSORFLOW_LITE_TOOLS_OPTIMIZE_QUANTIZATION_UTILS_H_
17 
18 #include <cstdint>
19 #include <vector>
20 
21 #include "tensorflow/lite/context.h"
22 #include "tensorflow/lite/core/api/error_reporter.h"
23 #include "tensorflow/lite/schema/schema_generated.h"
24 
25 namespace tflite {
26 namespace optimize {
27 namespace utils {
28 
29 // Returns the number of elements in the given tensor.
30 TfLiteStatus NumElements(const TensorT& tensor, uint64_t* num_elements);
31 
32 // Populates the scale and zero point for quantization parameters.
33 //
34 // Nudges min and max so that floating point 0 falls exactly on a quantized
35 // value, returning the nudges scale and zero_point.
36 void GetAsymmetricQuantizationParams(
37     float min, float max, const int quant_min, const int quant_max,
38     QuantizationParametersT* quantization_params);
39 
40 // Populates the single total max and min values for a tensor.
41 void FillSingleMinMax(const float* const input, const uint64_t input_size,
42                       QuantizationParametersT* quantization_params);
43 
44 // Populates the max and min values for per channel quantization.
45 TfLiteStatus FillPerChannelMinMax(const float* const input,
46                                   const std::vector<int>& dimension,
47                                   int32_t channel_dim_index,
48                                   QuantizationParametersT* quantization_params,
49                                   ErrorReporter* error_reporter);
50 
51 // Per-channel quantize a tensor at the given index and returns both scales and
52 // quantized values.
53 // Parameters:
54 // - tensor is the tensor to be quantized, needed to access associated
55 //   quantization parameters
56 // - input is the float input data to be quantized.
57 // - channel_dim_index is the channel index within "dimension".
58 //   dimension[channel_dim_index] gives the number of channels.
59 // - output_scale is the output scale, the size of which equals the number of
60 //   channels.
61 // - output_value is the output data, the size of which equals the number of
62 //   inputs.
63 TfLiteStatus SymmetricPerChannelQuantization(TensorT* tensor,
64                                              const float* const input,
65                                              int32_t channel_dim_index,
66                                              std::vector<float>* output_scales,
67                                              std::vector<int8_t>* output_value,
68                                              ErrorReporter* error_reporter);
69 
70 // Quantize the values given an array of scales.
71 void SymmetricPerChannelQuantizeValues(const float* const input,
72                                        const std::vector<float>& scales_inv,
73                                        const std::vector<int32_t>& dimension,
74                                        int32_t channel_dim_index,
75                                        std::vector<int8_t>* output_value);
76 
77 // Quantizes tensor using symmetric quantization with the min and max elements
78 // of the tensor.
79 TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor);
80 
81 // Quantizes tensor to float16.
82 TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor);
83 
84 // Add quantization parameters.
85 TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
86                                    const std::vector<int64_t>& zero_point,
87                                    int quantized_dimension,
88                                    const uint8_t* buffer_data,
89                                    size_t buffer_size, TensorType output_type,
90                                    ModelT* model, TensorT* tensor,
91                                    ErrorReporter* error_reporter);
92 
93 // Populates the scales vector based on max and min values of quant_params
94 TfLiteStatus GetSymmetricScalesFromMaxMin(QuantizationParametersT* quant_params,
95                                           std::vector<float>* scales,
96                                           ErrorReporter* error_reporter);
97 
98 // Adjusts scale of weights if incompatible with bias scale and likely to
99 // cause overflow.
100 TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params,
101                                        const float* bias_data,
102                                        const size_t bias_size,
103                                        const float input_scale,
104                                        ErrorReporter* error_reporter);
105 
106 // Quantizes tensor with per channel.
107 TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
108                                                int32_t channel_dim_index,
109                                                ErrorReporter* error_reporter);
110 
111 // Symmetrically quantizes float to 16bits.
112 TfLiteStatus SymmetricQuantizeFloatsToInt16(ModelT* model, TensorT* tensor,
113                                             float scaling_factor,
114                                             ErrorReporter* error_reporter);
115 
116 std::vector<int16_t> SymmetricQuantizeFloatsToInt16(const float* data,
117                                                     uint64_t num_elements,
118                                                     float scaling_factor);
119 
120 // Symmetrically quantizes the bias for per-layer ops (i.e. FullyConnected).
121 template <typename BiasType>
122 TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor,
123                                            float scaling_factor,
124                                            ErrorReporter* error_reporter);
125 
126 // Symmetrically quantizes the bias for ops like Conv and DepthwiseConv.
127 // The scale of bias if weight_per_channel_scale[channel] * input_scale.
128 template <typename BiasType>
129 TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
130                                              float input_scale,
131                                              const float* weight_scales,
132                                              int number_of_dimension,
133                                              ErrorReporter* error_reporter);
134 
135 template <typename BiasType>
136 std::vector<BiasType> SymmetricBiasQuantize(const float* data,
137                                             uint64_t num_elements,
138                                             const std::vector<float>& scales);
139 
140 // Quantize weight with or without per channel.
141 TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
142                             int per_axis_index, ErrorReporter* error_reporter);
143 
144 // Get effective scale by combining input scale, intermediate scale and factors.
145 float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
146                         std::vector<int> input_index,
147                         std::vector<int> intermediate_index,
148                         std::vector<float> factors);
149 
150 // Return quantization parameters depending on activations type.
151 TfLiteStatus GetQuantizationParams(TensorT* tensor, TensorType activations_type,
152                                    QuantizationParametersT* quantization_params,
153                                    ErrorReporter* error_reporter);
154 
155 // Quantize activation.
156 TfLiteStatus QuantizeActivation(TensorT* tensor, TensorType activations_type,
157                                 ErrorReporter* error_reporter);
158 
159 // Quantize activation to 16bit.
160 TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale);
161 
162 // Get the power of two scale for min and max for symmetric quantization case.
163 int GetPowerOfTwoScale(float min, float max);
164 
165 }  // namespace utils
166 }  // namespace optimize
167 }  // namespace tflite
168 
169 #endif  // TENSORFLOW_LITE_TOOLS_OPTIMIZE_QUANTIZATION_UTILS_H_
170