1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
17
18 #include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
19
20 #if defined(_MSC_VER)
21 #define __restrict__ __restrict
22 #endif
23
24 namespace tflite {
25 namespace tensor_utils {
26
27 // Check if all entries of a vector are zero for float.
IsZeroVector(const float * vector,int v_size)28 bool IsZeroVector(const float* vector, int v_size) {
29 return PortableIsZeroVector(vector, v_size);
30 }
31
32 // Check if all entries of a vector are zero for int8_t.
IsZeroVector(const int8_t * vector,int v_size)33 bool IsZeroVector(const int8_t* vector, int v_size) {
34 return PortableIsZeroVector(vector, v_size);
35 }
36
SymmetricQuantizeFloats(const float * values,const int size,int8_t * quantized_values,float * min,float * max,float * scaling_factor)37 void SymmetricQuantizeFloats(const float* values, const int size,
38 int8_t* quantized_values, float* min, float* max,
39 float* scaling_factor) {
40 PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
41 scaling_factor);
42 }
43
SymmetricQuantizeFloats(const float * values,const int size,int8_t * quantized_values,float min_value,float max_value,float * scaling_factor)44 void SymmetricQuantizeFloats(const float* values, const int size,
45 int8_t* quantized_values, float min_value,
46 float max_value, float* scaling_factor) {
47 PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
48 max_value, scaling_factor);
49 }
50
AsymmetricQuantizeFloats(const float * values,const int size,int8_t * quantized_values,float * scaling_factor,int32_t * offset)51 void AsymmetricQuantizeFloats(const float* values, const int size,
52 int8_t* quantized_values, float* scaling_factor,
53 int32_t* offset) {
54 PortableAsymmetricQuantizeFloats(values, size, quantized_values,
55 scaling_factor, offset);
56 }
57
MatrixBatchVectorMultiplyAccumulate(const float * matrix,int m_rows,int m_cols,const float * vector,int n_batch,float * result)58 void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
59 int m_cols, const float* vector,
60 int n_batch, float* result) {
61 PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
62 n_batch, result);
63 }
64
MatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const int m_rows,const int m_cols,const int8_t * __restrict__ vector,const float * scaling_factors,int n_batch,float * __restrict__ result)65 void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
66 const int m_rows, const int m_cols,
67 const int8_t* __restrict__ vector,
68 const float* scaling_factors,
69 int n_batch,
70 float* __restrict__ result) {
71 PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
72 scaling_factors, n_batch, result);
73 }
74
MatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const int m_rows,const int m_cols,const int8_t * __restrict__ vectors,const float * scaling_factors,int n_batch,float * __restrict__ result,const float * per_channel_scale,const int32_t * input_offset,int32_t * scratch,int32_t * row_sums,bool * compute_row_sums,CpuBackendContext * context)75 void MatrixBatchVectorMultiplyAccumulate(
76 const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
77 const int8_t* __restrict__ vectors, const float* scaling_factors,
78 int n_batch, float* __restrict__ result, const float* per_channel_scale,
79 const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
80 bool* compute_row_sums, CpuBackendContext* context) {
81 PortableMatrixBatchVectorMultiplyAccumulate(
82 matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
83 per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
84 context);
85 }
86
MatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const int m_rows,const int m_cols,const int8_t * __restrict__ vector,const float * scaling_factors,int n_batch,int32_t * scratch,float * __restrict__ result,CpuBackendContext * context)87 void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
88 const int m_rows, const int m_cols,
89 const int8_t* __restrict__ vector,
90 const float* scaling_factors,
91 int n_batch, int32_t* scratch,
92 float* __restrict__ result,
93 CpuBackendContext* context) {
94 PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
95 scaling_factors, n_batch, result);
96 }
97
SparseMatrixBatchVectorMultiplyAccumulate1x4(const float * __restrict__ matrix,const int32_t * __restrict__ segments,const int32_t * __restrict__ indices,int m_rows,int m_cols,const float * __restrict__ vector,int n_batch,float * __restrict__ result)98 void SparseMatrixBatchVectorMultiplyAccumulate1x4(
99 const float* __restrict__ matrix, const int32_t* __restrict__ segments,
100 const int32_t* __restrict__ indices, int m_rows, int m_cols,
101 const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
102 PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
103 matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
104 }
105
SparseMatrixBatchVectorMultiplyAccumulate(const float * __restrict__ matrix,const uint8_t * __restrict__ ledger,int m_rows,int m_cols,const float * __restrict__ vector,int n_batch,float * __restrict__ result)106 void SparseMatrixBatchVectorMultiplyAccumulate(
107 const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
108 int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
109 float* __restrict__ result) {
110 PortableSparseMatrixBatchVectorMultiplyAccumulate(
111 matrix, ledger, m_rows, m_cols, vector, n_batch, result);
112 }
113
SparseMatrixBatchVectorMultiplyAccumulate1x16(const int8_t * __restrict__ matrix,const int32_t * __restrict__ segments,const int32_t * __restrict__ indices,int m_rows,int m_cols,const int8_t * __restrict__ vector,const int32_t * __restrict__ bias_vector,int n_batch,const int32_t input_offset,const int32_t output_multiplier,const int32_t output_shift,const int32_t output_offset,const int32_t output_activation_min,const int32_t output_activation_max,int8_t * __restrict__ result)114 void SparseMatrixBatchVectorMultiplyAccumulate1x16(
115 const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
116 const int32_t* __restrict__ indices, int m_rows, int m_cols,
117 const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
118 int n_batch, const int32_t input_offset, const int32_t output_multiplier,
119 const int32_t output_shift, const int32_t output_offset,
120 const int32_t output_activation_min, const int32_t output_activation_max,
121
122 int8_t* __restrict__ result) {
123 PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
124 matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch,
125 input_offset, output_multiplier, output_shift, output_offset,
126 output_activation_min, output_activation_max, result);
127 }
128
SparseMatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const uint8_t * ledger,const int m_rows,const int m_cols,const int8_t * __restrict__ vectors,const float * scaling_factors,int n_batch,float * __restrict__ result)129 void SparseMatrixBatchVectorMultiplyAccumulate(
130 const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
131 const int m_cols, const int8_t* __restrict__ vectors,
132 const float* scaling_factors, int n_batch, float* __restrict__ result) {
133 PortableSparseMatrixBatchVectorMultiplyAccumulate(
134 matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
135 result);
136 }
137
MatrixBatchVectorMultiplyAccumulate(const int8_t * input,const int32_t * bias,const int8_t * input_to_gate_weights,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t n_output,int32_t output_zp,int32_t * scratch,int16_t * output,CpuBackendContext * context)138 void MatrixBatchVectorMultiplyAccumulate(
139 const int8_t* input, const int32_t* bias,
140 const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
141 int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
142 int32_t* scratch, int16_t* output, CpuBackendContext* context) {
143 PortableMatrixBatchVectorMultiplyAccumulate(
144 input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
145 n_output, output_zp, scratch, output, context);
146 }
147
MatrixBatchVectorMultiplyAccumulate(const int8_t * input,const int32_t * bias,const int8_t * input_to_gate_weights,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t n_output,int32_t output_zp,int32_t * scratch,int8_t * output,CpuBackendContext * context)148 void MatrixBatchVectorMultiplyAccumulate(
149 const int8_t* input, const int32_t* bias,
150 const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
151 int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
152 int32_t* scratch, int8_t* output, CpuBackendContext* context) {
153 PortableMatrixBatchVectorMultiplyAccumulate(
154 input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
155 n_output, output_zp, scratch, output, context);
156 }
157
MatrixScalarMultiplyAccumulate(const int8_t * matrix,int32_t scalar,int32_t n_row,int32_t n_col,int32_t * output)158 void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
159 int32_t n_row, int32_t n_col,
160 int32_t* output) {
161 PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
162 }
163
MatrixBatchVectorMultiply(const int8_t * input,int32_t input_zeropoint,const int8_t * input_to_gate_weights,int32_t input_to_gate_effective_scale_a,int32_t input_to_gate_effective_scale_b,int32_t n_batch,int32_t n_input,int32_t n_cell,int8_t * gate_output,int8_t gate_output_zp)164 void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
165 const int8_t* input_to_gate_weights,
166 int32_t input_to_gate_effective_scale_a,
167 int32_t input_to_gate_effective_scale_b,
168 int32_t n_batch, int32_t n_input, int32_t n_cell,
169 int8_t* gate_output, int8_t gate_output_zp) {
170 PortableMatrixBatchVectorMultiply(
171 input, input_zeropoint, input_to_gate_weights,
172 input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
173 n_input, n_cell, gate_output, gate_output_zp);
174 }
175
MatrixBatchVectorMultiply(const int16_t * hidden,const int8_t * hidden_to_output_weights,int32_t proj_effective_scale_a,int32_t proj_effective_scale_b,const int32_t * gate_bias,int32_t n_batch,int32_t n_hidden,int32_t n_output,int32_t output_zp,int8_t * proj_output)176 void MatrixBatchVectorMultiply(const int16_t* hidden,
177 const int8_t* hidden_to_output_weights,
178 int32_t proj_effective_scale_a,
179 int32_t proj_effective_scale_b,
180 const int32_t* gate_bias, int32_t n_batch,
181 int32_t n_hidden, int32_t n_output,
182 int32_t output_zp, int8_t* proj_output) {
183 PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
184 proj_effective_scale_a,
185 proj_effective_scale_b, gate_bias, n_batch,
186 n_hidden, n_output, output_zp, proj_output);
187 }
188
ApplyLayerNorm(const int16_t * input,const int16_t * layer_norm_weights,const int32_t * bias,int32_t layer_norm_scale_a,int32_t layer_norm_scale_b,int32_t variance_limit,int n_batch,int n_input,int16_t * output)189 void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
190 const int32_t* bias, int32_t layer_norm_scale_a,
191 int32_t layer_norm_scale_b, int32_t variance_limit,
192 int n_batch, int n_input, int16_t* output) {
193 PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
194 layer_norm_scale_b, variance_limit, n_batch, n_input,
195 output);
196 }
197
ApplyLayerNormFloat(const int16_t * input,const int16_t * layer_norm_weights,int32_t layer_norm_scale_a,int32_t layer_norm_scale_b,const int32_t * bias,int n_batch,int n_input,int16_t * output)198 void ApplyLayerNormFloat(const int16_t* input,
199 const int16_t* layer_norm_weights,
200 int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
201 const int32_t* bias, int n_batch, int n_input,
202 int16_t* output) {
203 PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
204 layer_norm_scale_b, bias, n_batch, n_input,
205 output);
206 }
207
ApplySigmoid(const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)208 void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
209 int16_t* output) {
210 PortableApplySigmoid(input, n_batch, n_input, output);
211 }
212
ApplySigmoidFloat(const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)213 void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
214 int16_t* output) {
215 PortableApplySigmoidFloat(input, n_batch, n_input, output);
216 }
217
ApplyTanh(int32_t integer_bits,const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)218 void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
219 int32_t n_input, int16_t* output) {
220 PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
221 }
222
ApplyTanhFloat(const int16_t * input,int32_t n_batch,int32_t n_input,int32_t integer_bits,int16_t * output)223 void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
224 int32_t integer_bits, int16_t* output) {
225 PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
226 }
227
CwiseMul(const int16_t * input_1,const int16_t * input_2,int n_batch,int n_input,int shift,int16_t * output)228 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
229 int n_input, int shift, int16_t* output) {
230 PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
231 }
232
CwiseMul(const int16_t * input_1,const int16_t * input_2,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t output_zp,int8_t * output)233 void CwiseMul(const int16_t* input_1, const int16_t* input_2,
234 int32_t multiplier, int32_t shift, int32_t n_batch,
235 int32_t n_input, int32_t output_zp, int8_t* output) {
236 PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
237 output_zp, output);
238 }
239
CwiseAdd(const int16_t * input_1,const int16_t * input_2,int n_batch,int n_input,int16_t * output)240 void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
241 int n_input, int16_t* output) {
242 PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
243 }
244
CwiseClipping(float * vector,const int v_size,const float clipping_value)245 void CwiseClipping(float* vector, const int v_size,
246 const float clipping_value) {
247 PortableCwiseClipping(vector, v_size, clipping_value);
248 }
249
CwiseClipping(int16_t * vector,const int v_size,const int16_t clipping_value)250 void CwiseClipping(int16_t* vector, const int v_size,
251 const int16_t clipping_value) {
252 PortableCwiseClipping(vector, v_size, clipping_value);
253 }
254
CwiseClipping(int8_t * vector,const int v_size,const int8_t clipping_value)255 void CwiseClipping(int8_t* vector, const int v_size,
256 const int8_t clipping_value) {
257 PortableCwiseClipping(vector, v_size, clipping_value);
258 }
259
VectorBatchVectorCwiseProductAccumulate(const int16_t * vector,int v_size,const int16_t * batch_vector,int n_batch,int32_t multiplier,int shift,int16_t * result)260 void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
261 const int16_t* batch_vector,
262 int n_batch, int32_t multiplier,
263 int shift, int16_t* result) {
264 PortableVectorBatchVectorCwiseProductAccumulate(
265 vector, v_size, batch_vector, n_batch, multiplier, shift, result);
266 }
267
VectorVectorDotProduct(const float * vector1,const float * vector2,int v_size)268 float VectorVectorDotProduct(const float* vector1, const float* vector2,
269 int v_size) {
270 return PortableVectorVectorDotProduct(vector1, vector2, v_size);
271 }
272
BatchVectorBatchVectorDotProduct(const int16_t * vector1,const int16_t * vector2,int v_size,int n_batch,int32_t * result)273 void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
274 const int16_t* vector2, int v_size,
275 int n_batch, int32_t* result) {
276 PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch,
277 result);
278 }
279
Sub1Vector(const float * vector,int v_size,float * result)280 void Sub1Vector(const float* vector, int v_size, float* result) {
281 PortableSub1Vector(vector, v_size, result);
282 }
283
Sub1Vector(const int16_t * vector,int v_size,int16_t * result)284 void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
285 PortableSub1Vector(vector, v_size, result);
286 }
287
288 // Multiply all elements of vector with a scalar.
VectorScalarMultiply(const int8_t * vector,int v_size,float scale,float * result)289 void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
290 float* result) {
291 PortableVectorScalarMultiply(vector, v_size, scale, result);
292 }
293
ReductionSumVector(const float * input_vector,float * output_vector,int output_size,int reduction_size)294 void ReductionSumVector(const float* input_vector, float* output_vector,
295 int output_size, int reduction_size) {
296 PortableReductionSumVector(input_vector, output_vector, output_size,
297 reduction_size);
298 }
299
ReductionSumVector(const int32_t * input_vector,int32_t * output_vector,int output_size,int reduction_size)300 void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
301 int output_size, int reduction_size) {
302 PortableReductionSumVector(input_vector, output_vector, output_size,
303 reduction_size);
304 }
305
ReductionSumVector(const int8_t * input_vector,int32_t * output_vector,int output_size,int reduction_size)306 void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
307 int output_size, int reduction_size) {
308 PortableReductionSumVector(input_vector, output_vector, output_size,
309 reduction_size);
310 }
311
MeanStddevNormalization(const float * input_vector,float * output_vector,int v_size,int n_batch)312 void MeanStddevNormalization(const float* input_vector, float* output_vector,
313 int v_size, int n_batch) {
314 PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
315 }
316
TwoGateSaturatingAdd(const int8_t * input,int8_t input_zp,const int8_t * recurrent,int8_t recurrent_zp,int32_t input_effective_scale_a,int32_t input_effective_scale_b,int32_t recurrent_effective_scale_a,int32_t recurrent_effective_scale_b,int32_t n_batch,int32_t n_cell,int16_t * output)317 void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
318 const int8_t* recurrent, int8_t recurrent_zp,
319 int32_t input_effective_scale_a,
320 int32_t input_effective_scale_b,
321 int32_t recurrent_effective_scale_a,
322 int32_t recurrent_effective_scale_b, int32_t n_batch,
323 int32_t n_cell, int16_t* output) {
324 PortableTwoGateSaturatingAdd(
325 input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
326 input_effective_scale_b, recurrent_effective_scale_a,
327 recurrent_effective_scale_b, n_batch, n_cell, output);
328 }
329
330 } // namespace tensor_utils
331 } // namespace tflite
332
333 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
334