xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/client/lib/quantize_test.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/client/lib/quantize.h"
17 
18 #include <limits>
19 
20 #include "tensorflow/compiler/xla/client/xla_builder.h"
21 #include "tensorflow/compiler/xla/test.h"
22 #include "tensorflow/compiler/xla/tests/client_library_test_base.h"
23 #include "tensorflow/compiler/xla/tests/test_macros.h"
24 #include "tensorflow/compiler/xla/types.h"
25 #include "tensorflow/compiler/xla/util.h"
26 
27 namespace xla {
28 namespace {
29 
30 using bfloat16 = tensorflow::bfloat16;
31 
32 template <typename NativeT>
GenerateInput()33 std::vector<NativeT> GenerateInput() {
34   std::vector<NativeT> input;
35   const auto n = std::numeric_limits<NativeT>::max();
36   input.reserve(n);
37 
38   for (int64_t i = std::numeric_limits<NativeT>::min(); i < n; ++i) {
39     input.push_back(static_cast<NativeT>(i));
40   }
41 
42   return input;
43 }
44 
45 template <typename NativeT>
GenerateLargeSizeInput(int num_columns,int num_rows)46 Array2D<NativeT> GenerateLargeSizeInput(int num_columns, int num_rows) {
47   Array2D<NativeT> input(num_columns, num_rows);
48 
49   input.FillRandom(6, 128);
50 
51   return input;
52 }
53 
54 template <typename NativeT>
PackLargeInput(Array2D<NativeT> & input)55 Array2D<uint32_t> PackLargeInput(Array2D<NativeT> &input) {
56   const int64_t size_per_pack = sizeof(uint32_t) / sizeof(NativeT);
57   int64_t width = input.width();
58 
59   int64_t padded_output_width = CeilOfRatio(width, size_per_pack);
60 
61   Array2D<uint32_t> pack_input(input.height(), padded_output_width);
62 
63   for (int h = 0; h < input.height(); h++) {
64     std::vector<NativeT> input_row;
65     input_row.reserve(width);
66     for (int w = 0; w < width; w++) {
67       input_row.push_back(input({h, w}));
68     }
69 
70     auto pack_input_vec = PackToUint32<uint8_t>(input_row);
71 
72     for (int w = 0; w < padded_output_width; w++) {
73       pack_input(h, w) = pack_input_vec[w];
74     }
75   }
76 
77   return pack_input;
78 }
79 
80 template <typename NativeT>
GenerateLargeSizeMinCombinedOutput(Array2D<NativeT> & input,const QuantizedRange & range,bool transpose_output=false)81 Array2D<bfloat16> GenerateLargeSizeMinCombinedOutput(
82     Array2D<NativeT> &input, const QuantizedRange &range,
83     bool transpose_output = false) {
84   const int64_t size_per_pack = sizeof(uint32_t) / sizeof(NativeT);
85   int64_t width = input.width();
86 
87   int64_t padded_output_width =
88       CeilOfRatio(width, size_per_pack) * size_per_pack;
89 
90   int64_t output_height;
91   int64_t output_width;
92 
93   if (transpose_output) {
94     output_height = padded_output_width;
95     output_width = input.height();
96   } else {
97     output_height = input.height();
98     output_width = padded_output_width;
99   }
100 
101   Array2D<bfloat16> output(output_height, output_width, bfloat16(0.0));
102 
103   float half_range =
104       !std::is_signed<NativeT>::value
105           ? 0.0f
106           : (static_cast<float>(std::numeric_limits<NativeT>::max() -
107                                 std::numeric_limits<NativeT>::min() + 1)) /
108                 2.0f;
109   const bfloat16 scale_factor =
110       (range.max - range.min) /
111       (static_cast<bfloat16>(std::numeric_limits<NativeT>::max() -
112                              std::numeric_limits<NativeT>::min()));
113 
114   for (int h = 0; h < input.height(); h++) {
115     std::vector<NativeT> input_row;
116     input_row.reserve(width);
117     for (int w = 0; w < width; w++) {
118       bfloat16 result =
119           static_cast<bfloat16>(input(h, w) + half_range) * scale_factor +
120           range.min;
121       if (transpose_output) {
122         output(w, h) = result;
123       } else {
124         output(h, w) = result;
125       }
126     }
127   }
128 
129   return output;
130 }
131 
132 template <typename NativeT>
GenerateMinCombinedOutput(const QuantizedRange & range)133 std::vector<bfloat16> GenerateMinCombinedOutput(const QuantizedRange &range) {
134   float half_range =
135       !std::is_signed<NativeT>::value
136           ? 0.0f
137           : (static_cast<float>(std::numeric_limits<NativeT>::max() -
138                                 std::numeric_limits<NativeT>::min() + 1)) /
139                 2.0f;
140   const bfloat16 scale_factor =
141       (range.max - range.min) /
142       (static_cast<bfloat16>(std::numeric_limits<NativeT>::max() -
143                              std::numeric_limits<NativeT>::min()));
144   std::vector<bfloat16> output;
145   const auto n = std::numeric_limits<NativeT>::max();
146   output.reserve(n);
147   for (int64_t i = std::numeric_limits<NativeT>::min(); i < n; ++i) {
148     bfloat16 result =
149         static_cast<bfloat16>(i + half_range) * scale_factor + range.min;
150     output.push_back(result);
151   }
152 
153   const int64_t pack_size = sizeof(uint32_t) / sizeof(NativeT);
154   const int64_t output_size = output.size();
155 
156   int64_t num_tailing_zeros =
157       CeilOfRatio(output_size, pack_size) * pack_size - output_size;
158 
159   output.insert(output.end(), num_tailing_zeros, bfloat16(0.0));
160   return output;
161 }
162 
163 // TODO(wangtao): add a test to make sure this op is the inverse of the existing
164 // TF quantize op defined in: third_party/tensorflow/core/kernels/quantize_op.cc
165 
166 using DequantizeTest = ClientLibraryTestBase;
167 
TEST(PackTest,PackUint8ToUint32)168 TEST(PackTest, PackUint8ToUint32) {
169   std::vector<uint8_t> input = {0xAB, 0x0B, 0x00, 0xF0, 0x01};
170   auto output = PackToUint32<uint8_t>(input);
171   EXPECT_THAT(output, ::testing::ElementsAre(0xAB0B00F0, 0x01000000));
172 }
173 
TEST(PackTest,PackInt8ToUint32)174 TEST(PackTest, PackInt8ToUint32) {
175   std::vector<int8_t> input = {static_cast<signed char>(0x81), 0x0B, 0x00, 0x20,
176                                0x01};
177   auto output = PackToUint32<int8_t>(input);
178   EXPECT_THAT(output, ::testing::ElementsAre(0x810B0020, 0x01000000));
179 }
180 
TEST(PackTest,PackUint8ToUint32PerfectSize)181 TEST(PackTest, PackUint8ToUint32PerfectSize) {
182   std::vector<uint8_t> input = {3, 2, 1, 0};
183   auto output = PackToUint32<uint8_t>(input);
184   EXPECT_THAT(output, ::testing::ElementsAre(0x03020100));
185 }
186 
XLA_TEST_F(DequantizeTest,MinCombinedUint16R1)187 XLA_TEST_F(DequantizeTest, MinCombinedUint16R1) {
188   XlaBuilder builder(TestName());
189   auto input = GenerateInput<uint16_t>();
190   auto x = ConstantR1<uint32_t>(&builder, PackToUint32<uint16_t>(input));
191   QuantizedRange range(0, 255.0f);
192   xla::Dequantize<uint16_t>(x, range, "MIN_COMBINED");
193   auto expected = GenerateMinCombinedOutput<uint16_t>(range);
194   ComputeAndCompareR1<bfloat16>(&builder, expected, {});
195 }
196 
XLA_TEST_F(DequantizeTest,MinCombinedUint8R1)197 XLA_TEST_F(DequantizeTest, MinCombinedUint8R1) {
198   XlaBuilder builder(TestName());
199   auto input = GenerateInput<uint8_t>();
200   auto x = ConstantR1<uint32_t>(&builder, PackToUint32<uint8_t>(input));
201   QuantizedRange range(0, 127.0f);
202   xla::Dequantize<uint8_t>(x, range, "MIN_COMBINED");
203   auto expected = GenerateMinCombinedOutput<uint8_t>(range);
204   ComputeAndCompareR1<bfloat16>(&builder, expected, {});
205 }
206 
XLA_TEST_F(DequantizeTest,MinCombinedUint8R2)207 XLA_TEST_F(DequantizeTest, MinCombinedUint8R2) {
208   XlaBuilder builder(TestName());
209   std::vector<std::vector<uint8_t>> input = {
210       {0, 1, 2, 3},
211       {4, 5, 6, 7},
212       {8, 9, 10, 11},
213       {12, 13, 16, 15},
214   };
215   auto x =
216       ConstantR2<uint32_t>(&builder, {{PackToUint32<uint8_t>(input[0])[0]},
217                                       {PackToUint32<uint8_t>(input[1])[0]},
218                                       {PackToUint32<uint8_t>(input[2])[0]},
219                                       {PackToUint32<uint8_t>(input[3])[0]}});
220   QuantizedRange range(0, 255.0f);
221   xla::Dequantize<uint8_t>(x, range, "MIN_COMBINED");
222   const Array2D<bfloat16> expected = {
223       {bfloat16(0.0), bfloat16(1.0), bfloat16(2.0), bfloat16(3.0)},
224       {bfloat16(4.0), bfloat16(5.0), bfloat16(6.0), bfloat16(7.0)},
225       {bfloat16(8.0), bfloat16(9.0), bfloat16(10.0), bfloat16(11.0)},
226       {bfloat16(12.0), bfloat16(13.0), bfloat16(16.0), bfloat16(15.0)},
227   };
228   ComputeAndCompareR2<bfloat16>(&builder, expected, {});
229 }
230 
XLA_TEST_F(DequantizeTest,MinCombinedUint8R2TransposeOutput)231 XLA_TEST_F(DequantizeTest, MinCombinedUint8R2TransposeOutput) {
232   XlaBuilder builder(TestName());
233   std::vector<std::vector<uint8_t>> input = {
234       {0, 1, 2, 3},
235       {4, 5, 6, 7},
236       {8, 9, 10, 11},
237       {12, 13, 16, 15},
238   };
239   auto x =
240       ConstantR2<uint32_t>(&builder, {{PackToUint32<uint8_t>(input[0])[0]},
241                                       {PackToUint32<uint8_t>(input[1])[0]},
242                                       {PackToUint32<uint8_t>(input[2])[0]},
243                                       {PackToUint32<uint8_t>(input[3])[0]}});
244   QuantizedRange range(0, 255.0f);
245   xla::Dequantize<uint8_t>(x, range, "MIN_COMBINED", /*transpose_output=*/true);
246   const Array2D<bfloat16> expected = {
247       {bfloat16(0.0), bfloat16(4.0), bfloat16(8.0), bfloat16(12.0)},
248       {bfloat16(1.0), bfloat16(5.0), bfloat16(9.0), bfloat16(13.0)},
249       {bfloat16(2.0), bfloat16(6.0), bfloat16(10.0), bfloat16(16.0)},
250       {bfloat16(3.0), bfloat16(7.0), bfloat16(11.0), bfloat16(15.0)},
251   };
252   ComputeAndCompareR2<bfloat16>(&builder, expected, {});
253 }
254 
XLA_TEST_F(DequantizeTest,MinCombinedUint8R2TailingZero)255 XLA_TEST_F(DequantizeTest, MinCombinedUint8R2TailingZero) {
256   XlaBuilder builder(TestName());
257   std::vector<std::vector<uint8_t>> input = {
258       {0, 1, 2, 3, 16},
259       {4, 5, 6, 7, 17},
260       {8, 9, 10, 11, 18},
261       {12, 13, 16, 15, 19},
262   };
263   auto x = ConstantR2<uint32_t>(
264       &builder,
265       {{PackToUint32<uint8_t>(input[0])[0], PackToUint32<uint8_t>(input[0])[1]},
266        {PackToUint32<uint8_t>(input[1])[0], PackToUint32<uint8_t>(input[1])[1]},
267        {PackToUint32<uint8_t>(input[2])[0], PackToUint32<uint8_t>(input[2])[1]},
268        {PackToUint32<uint8_t>(input[3])[0],
269         PackToUint32<uint8_t>(input[3])[1]}});
270   QuantizedRange range(0, 255.0f);
271   xla::Dequantize<uint8_t>(x, range, "MIN_COMBINED");
272 
273   const Array2D<bfloat16> expected = {
274       {bfloat16(0.0), bfloat16(1.0), bfloat16(2.0), bfloat16(3.0),
275        bfloat16(16.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
276       {bfloat16(4.0), bfloat16(5.0), bfloat16(6.0), bfloat16(7.0),
277        bfloat16(17.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
278       {bfloat16(8.0), bfloat16(9.0), bfloat16(10.0), bfloat16(11.0),
279        bfloat16(18.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
280       {bfloat16(12.0), bfloat16(13.0), bfloat16(16.0), bfloat16(15.0),
281        bfloat16(19.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
282   };
283   ComputeAndCompareR2<bfloat16>(&builder, expected, {});
284 }
285 
XLA_TEST_F(DequantizeTest,MinCombinedUint8R2TailingZeroTransposeOutput)286 XLA_TEST_F(DequantizeTest, MinCombinedUint8R2TailingZeroTransposeOutput) {
287   XlaBuilder builder(TestName());
288   std::vector<std::vector<uint8_t>> input = {
289       {0, 1, 2, 3, 16},
290       {4, 5, 6, 7, 17},
291       {8, 9, 10, 11, 18},
292       {12, 13, 16, 15, 19},
293   };
294   auto x = ConstantR2<uint32_t>(
295       &builder,
296       {{PackToUint32<uint8_t>(input[0])[0], PackToUint32<uint8_t>(input[0])[1]},
297        {PackToUint32<uint8_t>(input[1])[0], PackToUint32<uint8_t>(input[1])[1]},
298        {PackToUint32<uint8_t>(input[2])[0], PackToUint32<uint8_t>(input[2])[1]},
299        {PackToUint32<uint8_t>(input[3])[0],
300         PackToUint32<uint8_t>(input[3])[1]}});
301   QuantizedRange range(0, 255.0f);
302   xla::Dequantize<uint8_t>(x, range, "MIN_COMBINED", /*transpose_output=*/true);
303 
304   const Array2D<bfloat16> expected = {
305       {bfloat16(0.0), bfloat16(4.0), bfloat16(8.0), bfloat16(12.0)},
306       {bfloat16(1.0), bfloat16(5.0), bfloat16(9.0), bfloat16(13.0)},
307       {bfloat16(2.0), bfloat16(6.0), bfloat16(10.0), bfloat16(16.0)},
308       {bfloat16(3.0), bfloat16(7.0), bfloat16(11.0), bfloat16(15.0)},
309       {bfloat16(16.0), bfloat16(17.0), bfloat16(18.0), bfloat16(19.0)},
310       {bfloat16(0.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
311       {bfloat16(0.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
312       {bfloat16(0.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
313   };
314   ComputeAndCompareR2<bfloat16>(&builder, expected, {});
315 }
316 
XLA_TEST_F(DequantizeTest,MinCombinedUint8LargeSizeTest)317 XLA_TEST_F(DequantizeTest, MinCombinedUint8LargeSizeTest) {
318   XlaBuilder builder(TestName());
319   Array2D<uint8_t> input = GenerateLargeSizeInput<uint8_t>(500, 3547);
320   Array2D<uint32_t> input_packed = PackLargeInput<uint8_t>(input);
321 
322   auto x = ConstantR2FromArray2D<uint32_t>(&builder, input_packed);
323   QuantizedRange range(0, 255.0f);
324   xla::Dequantize<uint8_t>(x, range, "MIN_COMBINED");
325 
326   const Array2D<bfloat16> expected =
327       GenerateLargeSizeMinCombinedOutput<uint8_t>(input, range);
328   ComputeAndCompareR2<bfloat16>(&builder, expected, {});
329 }
330 
XLA_TEST_F(DequantizeTest,MinCombinedUint8LargeSizeTestTransposeOutput)331 XLA_TEST_F(DequantizeTest, MinCombinedUint8LargeSizeTestTransposeOutput) {
332   XlaBuilder builder(TestName());
333   Array2D<uint8_t> input = GenerateLargeSizeInput<uint8_t>(500, 3547);
334   Array2D<uint32_t> input_packed = PackLargeInput<uint8_t>(input);
335 
336   auto x = ConstantR2FromArray2D<uint32_t>(&builder, input_packed);
337   QuantizedRange range(0, 255.0f);
338   xla::Dequantize<uint8_t>(x, range, "MIN_COMBINED", /*transpose_output=*/true);
339 
340   const Array2D<bfloat16> expected =
341       GenerateLargeSizeMinCombinedOutput<uint8_t>(input, range,
342                                                   /*transpose_output=*/true);
343   ComputeAndCompareR2<bfloat16>(&builder, expected, {});
344 }
345 
346 }  // namespace
347 }  // namespace xla
348