xref: /aosp_15_r20/external/tensorflow/tensorflow/core/kernels/quantized_matmul_op_test.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #define EIGEN_USE_THREADS
17 
18 #include <functional>
19 #include <memory>
20 #include <vector>
21 
22 #include "tensorflow/core/framework/allocator.h"
23 #include "tensorflow/core/framework/fake_input.h"
24 #include "tensorflow/core/framework/node_def_builder.h"
25 #include "tensorflow/core/framework/op_kernel.h"
26 #include "tensorflow/core/framework/tensor.h"
27 #include "tensorflow/core/framework/tensor_testutil.h"
28 #include "tensorflow/core/framework/types.h"
29 #include "tensorflow/core/framework/types.pb.h"
30 #include "tensorflow/core/kernels/ops_testutil.h"
31 #include "tensorflow/core/kernels/ops_util.h"
32 #include "tensorflow/core/kernels/quantization_utils.h"
33 #include "tensorflow/core/lib/core/status_test_util.h"
34 #include "tensorflow/core/platform/test.h"
35 
36 namespace tensorflow {
37 
38 class QuantizedMatMulTest : public OpsTestBase {
39  protected:
40 };
41 
42 // Runs two small matrices through the operator, and leaves all the parameters
43 // at their default values.
TEST_F(QuantizedMatMulTest,Small_NoParams)44 TEST_F(QuantizedMatMulTest, Small_NoParams) {
45   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
46                    .Input(FakeInput(DT_QUINT8))
47                    .Input(FakeInput(DT_QUINT8))
48                    .Input(FakeInput(DT_FLOAT))
49                    .Input(FakeInput(DT_FLOAT))
50                    .Input(FakeInput(DT_FLOAT))
51                    .Input(FakeInput(DT_FLOAT))
52                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
53                    .Finalize(node_def()));
54   TF_ASSERT_OK(InitOp());
55   // A matrix is:
56   // |  1 |  2 |  3 |
57   // |  4 |  5 |  6 |
58   AddInputFromArray<quint8>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6});
59   // B matrix is:
60   // |  7 |  8 |  9 | 10 |
61   // | 11 | 12 | 13 | 14 |
62   // | 15 | 16 | 17 | 18 |
63   AddInputFromArray<quint8>(TensorShape({3, 4}),
64                             {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
65   AddInputFromArray<float>(TensorShape({}), {0});
66   AddInputFromArray<float>(TensorShape({}), {255.0f});
67   AddInputFromArray<float>(TensorShape({}), {0});
68   AddInputFromArray<float>(TensorShape({}), {255.0f});
69 
70   TF_ASSERT_OK(RunOpKernel());
71   // Here are the results we expect, from hand calculations:
72   // (1 * 7) + (2 * 11) + (3 * 15) = 74
73   // (1 * 8) + (2 * 12) + (3 * 16) = 80
74   // (1 * 9) + (2 * 13) + (3 * 17) = 86
75   // (1 * 10) + (2 * 14) + (3 * 18) = 92
76   // (4 * 7) + (5 * 11) + (6 * 15) = 173
77   // (4 * 8) + (5 * 12) + (6 * 16) = 188
78   // (4 * 9) + (5 * 13) + (6 * 17) = 203
79   // (4 * 10) + (5 * 14) + (6 * 18) = 218
80   Tensor expected(allocator(), DT_QINT32, TensorShape({2, 4}));
81   test::FillValues<qint32>(&expected, {74, 80, 86, 92, 173, 188, 203, 218});
82   test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
83 }
84 
85 // This test multiplies two 1x1 8bit matrices, and compares the
86 // results with hand-calculated expectations.
TEST_F(QuantizedMatMulTest,VerySmall_WithParams)87 TEST_F(QuantizedMatMulTest, VerySmall_WithParams) {
88   // These parameters reflect a typical production usage of eight-bit matmuls
89   // in an Inception-style network.
90   const bool transpose_a = true;
91   const int a_rows = 1;
92   const int a_cols = 1;
93   const int b_rows = 1;
94   const int b_cols = 1;
95   const bool transpose_b = false;
96   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
97                    .Input(FakeInput(DT_QUINT8))
98                    .Input(FakeInput(DT_QUINT8))
99                    .Input(FakeInput(DT_FLOAT))
100                    .Input(FakeInput(DT_FLOAT))
101                    .Input(FakeInput(DT_FLOAT))
102                    .Input(FakeInput(DT_FLOAT))
103                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
104                    .Attr("transpose_a", transpose_a)
105                    .Attr("transpose_b", transpose_b)
106                    .Finalize(node_def()));
107   TF_ASSERT_OK(InitOp());
108   // The A matrix is:
109   // |  -1 |
110   // The input array only contains unsigned bytes, so we specify the actual
111   // values as n+a_offset, where a_offset is 12 above. For example that means -1
112   // is represented as -1 + 12, or 11.
113   // We have set the transpose_a flag to true, so the matrix is transposed, and
114   // for filling the values the in-memory storage order is effectively
115   // column major, rather than the default row-major.
116   AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11});
117 
118   // The B matrix is:
119   // |   1 |
120   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
121   AddInputFromArray<float>(TensorShape({}), {-12.0f});
122   AddInputFromArray<float>(TensorShape({}), {243.0f});
123   AddInputFromArray<float>(TensorShape({}), {1.0f});
124   AddInputFromArray<float>(TensorShape({}), {256.0f});
125   TF_ASSERT_OK(RunOpKernel());
126   // We're requesting C = A.transposed() * B,
127   // so we expect to get these results:
128   // 1*-1 = -1
129   // | -1 |
130   Tensor expected(allocator(), DT_QINT32, TensorShape({a_cols, b_cols}));
131   test::FillValues<qint32>(&expected, {-1});
132   test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
133 }
134 
135 // This test multiplies two 1x1 8bit matrices, but sets an invalid quantization
136 // range, so we expect to get an error
TEST_F(QuantizedMatMulTest,VerySmall_BadRange)137 TEST_F(QuantizedMatMulTest, VerySmall_BadRange) {
138   // These parameters reflect a typical production usage of eight-bit matmuls
139   // in an Inception-style network.
140   const bool transpose_a = true;
141   const int a_rows = 1;
142   const int a_cols = 1;
143   const int b_rows = 1;
144   const int b_cols = 1;
145   const bool transpose_b = false;
146   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
147                    .Input(FakeInput(DT_QUINT8))
148                    .Input(FakeInput(DT_QUINT8))
149                    .Input(FakeInput(DT_FLOAT))
150                    .Input(FakeInput(DT_FLOAT))
151                    .Input(FakeInput(DT_FLOAT))
152                    .Input(FakeInput(DT_FLOAT))
153                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
154                    .Attr("transpose_a", transpose_a)
155                    .Attr("transpose_b", transpose_b)
156                    .Finalize(node_def()));
157   TF_ASSERT_OK(InitOp());
158   // The A matrix is:
159   // |  -1 |
160   AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11});
161 
162   // The B matrix is:
163   // |   1 |
164   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
165   AddInputFromArray<float>(TensorShape({}), {-12.0f});
166   AddInputFromArray<float>(TensorShape({}), {243.0f});
167   // Here we set the range so that the min and max are equal, so we expect to
168   // see an error when we run.
169   AddInputFromArray<float>(TensorShape({}), {1.0f});
170   AddInputFromArray<float>(TensorShape({}), {1.0f});
171   EXPECT_EQ(::tensorflow::error::INVALID_ARGUMENT, RunOpKernel().code());
172 }
173 
174 // This test multiplies two 1x1 8bit matrices, but sets invalid quantized min
175 // and max values, so we expect to get an error
TEST_F(QuantizedMatMulTest,VerySmall_BadMinMax)176 TEST_F(QuantizedMatMulTest, VerySmall_BadMinMax) {
177   // These parameters reflect a typical production usage of eight-bit matmuls
178   // in an Inception-style network.
179   const bool transpose_a = true;
180   const int a_rows = 1;
181   const int a_cols = 1;
182   const int b_rows = 1;
183   const int b_cols = 1;
184   const bool transpose_b = false;
185   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
186                    .Input(FakeInput(DT_QUINT8))
187                    .Input(FakeInput(DT_QUINT8))
188                    .Input(FakeInput(DT_FLOAT))
189                    .Input(FakeInput(DT_FLOAT))
190                    .Input(FakeInput(DT_FLOAT))
191                    .Input(FakeInput(DT_FLOAT))
192                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
193                    .Attr("transpose_a", transpose_a)
194                    .Attr("transpose_b", transpose_b)
195                    .Finalize(node_def()));
196   TF_ASSERT_OK(InitOp());
197   // The A matrix is:
198   // |  -1 |
199   AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11});
200   // The B matrix is:
201   // |   1 |
202   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
203   // Here we set the error of a non scalar min_a value, so we expect to see an
204   // error when we run.
205   AddInputFromArray<float>(TensorShape({1}), {2});
206   AddInputFromArray<float>(TensorShape({}), {243.0f});
207   AddInputFromArray<float>(TensorShape({}), {1.0f});
208   AddInputFromArray<float>(TensorShape({}), {256.0f});
209   EXPECT_EQ(::tensorflow::error::INVALID_ARGUMENT, RunOpKernel().code());
210 }
211 
212 // This test multiplies a couple of small 8-bit matrices, and compares the
213 // results with hand-calculated expectations. It uses shifts and offsets to
214 // control the range of the outputs.
TEST_F(QuantizedMatMulTest,Small_WithParams)215 TEST_F(QuantizedMatMulTest, Small_WithParams) {
216   // These parameters reflect a typical production usage of eight-bit matmuls
217   // in an Inception-style network.
218   const bool transpose_a = true;
219   const int a_rows = 3;
220   const int a_cols = 4;
221   const int b_rows = 3;
222   const int b_cols = 2;
223   const bool transpose_b = false;
224   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
225                    .Input(FakeInput(DT_QUINT8))
226                    .Input(FakeInput(DT_QUINT8))
227                    .Input(FakeInput(DT_FLOAT))
228                    .Input(FakeInput(DT_FLOAT))
229                    .Input(FakeInput(DT_FLOAT))
230                    .Input(FakeInput(DT_FLOAT))
231                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
232                    .Attr("transpose_a", transpose_a)
233                    .Attr("transpose_b", transpose_b)
234                    .Finalize(node_def()));
235   TF_ASSERT_OK(InitOp());
236   // The A matrix is:
237   // |  -1 |  -5 |  -9 |
238   // |  -2 |  -6 | -10 |
239   // |  -3 |  -7 | -11 |
240   // |  -4 |  -8 | -12 |
241   // The input array only contains unsigned bytes, so we specify the actual
242   // values as n+a_offset, where a_offset is 12 above. For example that means -1
243   // is represented as -1 + 12, or 11.
244   // We have set the transpose_a flag to true, so the matrix is transposed, and
245   // for filling the values the in-memory storage order is effectively
246   // column major, rather than the default row-major.
247   AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {
248                                                                11,
249                                                                10,
250                                                                9,
251                                                                8,
252                                                                7,
253                                                                6,
254                                                                5,
255                                                                4,
256                                                                3,
257                                                                2,
258                                                                1,
259                                                                0,
260                                                            });
261 
262   // The B matrix is:
263   // |   1 |   4|
264   // |   2 |   5|
265   // |   3 |   6|
266   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {
267                                                                1,
268                                                                4,
269                                                                2,
270                                                                5,
271                                                                3,
272                                                                6,
273                                                            });
274   AddInputFromArray<float>(TensorShape({}), {-12.0f});
275   AddInputFromArray<float>(TensorShape({}), {243.0f});
276   AddInputFromArray<float>(TensorShape({}), {0});
277   AddInputFromArray<float>(TensorShape({}), {255.0f});
278   TF_ASSERT_OK(RunOpKernel());
279   // We're requesting C = A.transposed() * B,
280   // so we expect to get these results:
281   // 1*-1 + 2*-5 + 3*-9 = -38
282   // 4*-1 + 5*-5 + 6*-9 = -83
283   // 1*-2 + 2*-6 + 3*-10 = -44
284   // 4*-2 + 5*-6 + 6*-10 = -98
285   // 1*-3 + 2*-7 + 3*-11 = -50
286   // 4*-3 + 5*-7 + 6*-11 = -113
287   // 1*-4 + 2*-8 + 3*-12 = -56
288   // 4*-4 + 5*-8 + 6*-12 = -128
289   // |  -38 |  -83 |
290   // |  -44 |  -98 |
291   // |  -50 | -113 |
292   // |  -56 | -128 |
293   Tensor expected(allocator(), DT_QINT32, TensorShape({a_cols, b_cols}));
294   test::FillValues<qint32>(&expected, {
295                                           -38,
296                                           -83,
297                                           -44,
298                                           -98,
299                                           -50,
300                                           -113,
301                                           -56,
302                                           -128,
303                                       });
304   test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
305 }
306 
307 // This test multiplies a couple of medium-sized 8-bit matrices, and tests the
308 // results against what we saw from running a float MatMul with equivalent
309 // inputs.
TEST_F(QuantizedMatMulTest,Medium_WithParams)310 TEST_F(QuantizedMatMulTest, Medium_WithParams) {
311   const bool transpose_a = true;
312   const bool transpose_b = false;
313   TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
314                    .Input(FakeInput(DT_QUINT8))
315                    .Input(FakeInput(DT_QUINT8))
316                    .Input(FakeInput(DT_FLOAT))
317                    .Input(FakeInput(DT_FLOAT))
318                    .Input(FakeInput(DT_FLOAT))
319                    .Input(FakeInput(DT_FLOAT))
320                    .Attr("Toutput", DataTypeToEnum<qint32>::v())
321                    .Attr("transpose_a", transpose_a)
322                    .Attr("transpose_b", transpose_b)
323                    .Finalize(node_def()));
324   TF_ASSERT_OK(InitOp());
325 
326   const int a_rows = 8;
327   const int a_cols = 8;
328   const float a_min = -2164.25f;
329   const float a_max = 2006.27f;
330   Tensor a_float(DT_FLOAT, {a_rows, a_cols});
331   test::FillValues<float>(
332       &a_float,
333       {-1014.12, -157.382, -810.17,  1435.28,  1016.37,  219.684,  -316.054,
334        -2164.25, 2006.27,  -547.444, 857.376,  404.376,  9.72115,  332.588,
335        194.385,  -286.57,  26.062,   23.1125,  110.436,  247.055,  -127.683,
336        -376.275, -124.81,  -846.826, -77.1507, 305.581,  -202.747, 12.9528,
337        9.64886,  872.686,  40.9069,  197.816,  44.16,    -306.768, -1457.52,
338        -368.939, -1049.42, -486.353, 1745.87,  95.7695,  395.773,  -254.333,
339        -404.27,  787.16,   -2.44114, 199.37,   -1024.08, 784.901,  235.055,
340        -42.7295, 241.498,  -245.365, 470.763,  186.159,  186.579,  -220.163,
341        1304.58,  386.272,  -358.853, -755.996, 360.109,  -866.007, 55.2828,
342        -508.801});
343   Tensor a_quantized = FloatTensorToQuantized<quint8>(a_float, a_min, a_max);
344 
345   const int b_rows = 8;
346   const int b_cols = 8;
347   const float b_min = -0.739539f;
348   const float b_max = 0.641057f;
349   Tensor b_float(DT_FLOAT, {b_rows, b_cols});
350   test::FillValues<float>(
351       &b_float,
352       {-0.294619, -0.0670519, 0.261507,   -0.126274, 0.127229,   -0.176945,
353        -0.251223, 0.231086,   0.453694,   0.415666,  -0.288733,  0.508717,
354        0.211551,  0.0435907,  -0.582383,  -0.308779, 0.0696883,  -0.438122,
355        0.114,     0.433964,   0.109883,   0.284931,  -0.149661,  0.108657,
356        0.458333,  -0.130231,  -0.35805,   -0.123206, -0.437968,  0.0282411,
357        0.628818,  -0.0522173, -0.0233403, 0.124863,  0.217165,   0.262294,
358        -0.171005, -0.254693,  -0.200433,  -0.287354, 0.488166,   -0.0354688,
359        -0.118091, -0.590444,  0.491537,   -0.739539, 0.083117,   0.282482,
360        0.275269,  -0.36574,   0.107476,   0.0511428, -0.136887,  -0.0149852,
361        -0.259694, 0.641057,   0.264054,   -0.295126, -0.0218791, 0.361211,
362        0.012448,  0.0709718,  -0.392394,  -0.434215});
363   Tensor b_quantized = FloatTensorToQuantized<quint8>(b_float, b_min, b_max);
364 
365   AddInputFromArray<quint8>(a_quantized.shape(), a_quantized.flat<quint8>());
366   AddInputFromArray<quint8>(b_quantized.shape(), b_quantized.flat<quint8>());
367   AddInputFromArray<float>(TensorShape({}), {a_min});
368   AddInputFromArray<float>(TensorShape({}), {a_max});
369   AddInputFromArray<float>(TensorShape({}), {b_min});
370   AddInputFromArray<float>(TensorShape({}), {b_max});
371   TF_ASSERT_OK(RunOpKernel());
372 
373   Tensor expected_float(DT_FLOAT, {a_cols, b_cols});
374   test::FillValues<float>(
375       &expected_float,
376       {1776.82f,  421.058f,  -854.308f, 1430.65f,  503.105f,  57.2744f,
377        -1514.97f, -1163.66f, -87.0979f, -394.577f, -39.4983f, -79.1938f,
378        -329.029f, 313.475f,  446.929f,  -59.5855f, 350.837f,  238.655f,
379        -609.21f,  350.499f,  192.238f,  847.576f,  -103.177f, 185.886f,
380        -90.5335f, 200.787f,  99.1981f,  -717.076f, 763.815f,  -703.726f,
381        -125.164f, 732.325f,  -51.5303f, -418.826f, 60.0783f,  -299.658f,
382        231.41f,   72.0622f,  -289.244f, 663.776f,  391.177f,  294.415f,
383        -484.148f, -677.932f, -180.342f, -194.764f, 761.715f,  553.061f,
384        -283.355f, 321.109f,  351.269f,  1171.7f,   -857.497f, 343.804f,
385        -494.599f, -844.119f, 725.237f,  586.052f,  -735.013f, -897.723f,
386        -122.434f, -502.907f, 1264.6f,   -239.991f});
387 
388   const Tensor& output_quantized = *GetOutput(0);
389   const float output_min = GetOutput(1)->flat<float>()(0);
390   const float output_max = GetOutput(2)->flat<float>()(0);
391   Tensor output_float =
392       QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
393   test::ExpectTensorNear<float>(expected_float, output_float, 15.0);
394 }
395 
396 }  // namespace tensorflow
397