xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/kernels/internal/reference/integer_ops/add.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
17 
18 #include <algorithm>
19 #include <limits>
20 
21 #include "tensorflow/lite/kernels/internal/common.h"
22 #include "tensorflow/lite/kernels/internal/types.h"
23 
24 namespace tflite {
25 namespace reference_integer_ops {
26 
CheckArithmeticParams(const ArithmeticParams & params)27 inline void CheckArithmeticParams(const ArithmeticParams& params) {
28   TFLITE_DCHECK_LE(params.quantized_activation_min,
29                    params.quantized_activation_max);
30   // Input offset is negative input zero point. Activation tensors are
31   // asymmetric quantized so they span the full int8 range.
32   TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
33   TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
34   TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
35   TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
36 }
37 
ElementWise(int size,const ArithmeticParams & params,const int8_t * input1_data,const int8_t * input2_data,int8_t * output_data,void (* check_arithmetic_params)(const ArithmeticParams &),int8_t (* binary_func)(int8_t,int8_t,const ArithmeticParams &))38 inline void ElementWise(
39     int size, const ArithmeticParams& params, const int8_t* input1_data,
40     const int8_t* input2_data, int8_t* output_data,
41     void (*check_arithmetic_params)(const ArithmeticParams&),
42     int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
43   CheckArithmeticParams(params);
44   for (int i = 0; i < size; ++i) {
45     output_data[i] = binary_func(input1_data[i], input2_data[i], params);
46   }
47 }
48 
BroadcastBinaryFunction4DSlow(const ArithmeticParams & params,const RuntimeShape & input1_shape,const int8_t * input1_data,const RuntimeShape & input2_shape,const int8_t * input2_data,const RuntimeShape & output_shape,int8_t * output_data,void (* check_arithmetic_params)(const ArithmeticParams &),int8_t (* binary_func)(int8_t,int8_t,const ArithmeticParams &))49 inline void BroadcastBinaryFunction4DSlow(
50     const ArithmeticParams& params, const RuntimeShape& input1_shape,
51     const int8_t* input1_data, const RuntimeShape& input2_shape,
52     const int8_t* input2_data, const RuntimeShape& output_shape,
53     int8_t* output_data,
54     void (*check_arithmetic_params)(const ArithmeticParams&),
55     int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
56   NdArrayDesc<4> desc1;
57   NdArrayDesc<4> desc2;
58   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
59                                       &desc2);
60   const RuntimeShape extended_output_shape =
61       RuntimeShape::ExtendedShape(4, output_shape);
62 
63   // In Tensorflow, the dimensions are canonically named (batch_number, row,
64   // col, channel), with extents (batches, height, width, depth), with the
65   // trailing dimension changing most rapidly (channels has the smallest stride,
66   // typically 1 element).
67   //
68   // In generated C code, we store arrays with the dimensions reversed. The
69   // first dimension has smallest stride.
70   //
71   // We name our variables by their Tensorflow convention, but generate C code
72   // nesting loops such that the innermost loop has the smallest stride for the
73   // best cache behavior.
74   for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
75     for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
76       for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
77         for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
78           output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func(
79               input1_data[SubscriptToIndex(desc1, b, y, x, c)],
80               input2_data[SubscriptToIndex(desc2, b, y, x, c)], params);
81         }
82       }
83     }
84   }
85 }
86 
AddFunc(int8_t x,int8_t y,const ArithmeticParams & params)87 inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) {
88   const int32_t input1_val = params.input1_offset + x;
89   const int32_t input2_val = params.input2_offset + y;
90   const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
91   const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
92   const int32_t scaled_input1_val =
93       MultiplyByQuantizedMultiplierSmallerThanOneExp(
94           shifted_input1_val, params.input1_multiplier, params.input1_shift);
95   const int32_t scaled_input2_val =
96       MultiplyByQuantizedMultiplierSmallerThanOneExp(
97           shifted_input2_val, params.input2_multiplier, params.input2_shift);
98   const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
99   const int32_t raw_output =
100       MultiplyByQuantizedMultiplierSmallerThanOneExp(
101           raw_sum, params.output_multiplier, params.output_shift) +
102       params.output_offset;
103   const int32_t clamped_output =
104       std::min(params.quantized_activation_max,
105                std::max(params.quantized_activation_min, raw_output));
106   return static_cast<int8_t>(clamped_output);
107 }
108 
109 // Element-wise add that can often be used for inner loop of broadcast add as
110 // well as the non-broadcast add.
AddElementwise(int size,const ArithmeticParams & params,const int8_t * input1_data,const int8_t * input2_data,int8_t * output_data)111 inline void AddElementwise(int size, const ArithmeticParams& params,
112                            const int8_t* input1_data, const int8_t* input2_data,
113                            int8_t* output_data) {
114   ElementWise(size, params, input1_data, input2_data, output_data,
115               CheckArithmeticParams, AddFunc);
116 }
117 
Add(const ArithmeticParams & params,const RuntimeShape & input1_shape,const int8_t * input1_data,const RuntimeShape & input2_shape,const int8_t * input2_data,const RuntimeShape & output_shape,int8_t * output_data)118 inline void Add(const ArithmeticParams& params,
119                 const RuntimeShape& input1_shape, const int8_t* input1_data,
120                 const RuntimeShape& input2_shape, const int8_t* input2_data,
121                 const RuntimeShape& output_shape, int8_t* output_data) {
122   CheckArithmeticParams(params);
123 
124   const int flat_size =
125       MatchingElementsSize(input1_shape, input2_shape, output_shape);
126 
127   AddElementwise(flat_size, params, input1_data, input2_data, output_data);
128 }
129 
BroadcastAdd4DSlow(const ArithmeticParams & params,const RuntimeShape & input1_shape,const int8_t * input1_data,const RuntimeShape & input2_shape,const int8_t * input2_data,const RuntimeShape & output_shape,int8_t * output_data)130 inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
131                                const RuntimeShape& input1_shape,
132                                const int8_t* input1_data,
133                                const RuntimeShape& input2_shape,
134                                const int8_t* input2_data,
135                                const RuntimeShape& output_shape,
136                                int8_t* output_data) {
137   BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape,
138                                 input2_data, output_shape, output_data,
139                                 CheckArithmeticParams, AddFunc);
140 }
141 
142 }  // namespace reference_integer_ops
143 }  // namespace tflite
144 
145 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
146