xref: /aosp_15_r20/external/pytorch/aten/src/ATen/test/quantized_test.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <gtest/gtest.h>
2 
3 #include <ATen/ATen.h>
4 #include <ATen/test/test_assert.h>
5 #include <cmath>
6 #include <iostream>
7 #include <limits>
8 #include <memory>
9 #include <sstream>
10 #include <type_traits>
11 // For quantize_val
12 #include <ATen/native/quantized/AffineQuantizer.h>
13 #include <c10/core/ScalarType.h>
14 #include <c10/util/irange.h>
15 #include <ATen/quantized/Quantizer.h>
16 
17 using namespace at;
18 #ifndef ATEN_CPU_STATIC_DISPATCH
19 
TEST(TestQTensor,QuantDequantAPIs)20 TEST(TestQTensor, QuantDequantAPIs) {
21   auto num_elements = 10;
22   Tensor r = at::ones({num_elements});
23   const double scale = 1.0;
24   const int64_t zero_point = 2;
25   const Tensor qr = at::quantize_per_tensor(r, scale, zero_point, kQUInt8);
26   ASSERT_EQ(qr.q_scale(), scale);
27   ASSERT_EQ(qr.q_zero_point(), zero_point);
28   ASSERT_TRUE(qr.is_quantized());
29   ASSERT_FALSE(r.is_quantized());
30 
31   // int_repr
32   Tensor int_repr = qr.int_repr();
33   auto* int_repr_data = int_repr.data_ptr<uint8_t>();
34   for (const auto i : c10::irange(num_elements)) {
35     ASSERT_EQ(int_repr_data[i], 3);
36   }
37 
38   // Check for correct quantization
39   auto r_data = r.data_ptr<float>();
40   auto qr_data = qr.data_ptr<quint8>();
41   for (const auto i : c10::irange(num_elements)) {
42     ASSERT_EQ(
43         native::quantize_val<quint8>(scale, zero_point, r_data[i]).val_,
44         qr_data[i].val_);
45   }
46 
47   // Check for correct dequantization
48   Tensor rqr = qr.dequantize();
49   auto rqr_data = rqr.data_ptr<float>();
50   for (const auto i : c10::irange(num_elements)) {
51     ASSERT_EQ(r_data[i], rqr_data[i]);
52   }
53   for (const auto i : c10::irange(num_elements)) {
54     ASSERT_EQ(
55         r_data[i],
56         native::dequantize_val(qr.q_scale(), qr.q_zero_point(), qr_data[i]));
57   }
58 
59   // Check for correct requantization
60   double new_scale = 2.0;
61   int64_t new_zero_point = 1;
62   Tensor reqr = at::quantize_per_tensor(r, new_scale, new_zero_point, kQInt8);
63   auto reqr_data = reqr.data_ptr<qint8>();
64   for (const auto i : c10::irange(num_elements)) {
65     reqr_data[i].val_ =
66         native::requantize_val<quint8, qint8>(
67             scale, zero_point, new_scale, new_zero_point, qr_data[i])
68             .val_;
69     const qint8 expected =
70         native::quantize_val<qint8>(new_scale, new_zero_point, rqr_data[i]);
71     ASSERT_EQ(expected.val_, reqr_data[i].val_);
72   }
73 }
74 
TEST(TestQTensor,RoundingMode)75 TEST(TestQTensor, RoundingMode) {
76   // We assume that quantization is defined as:
77   //   qx = clamp(zero_point + round(x / scale))
78   // If the zero_point is added before rounding, the result will be wrong.
79   int32_t zero_point = 5;
80   std::vector<float> x_values{
81       -5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5};
82   std::vector<uint8_t> qx_expect{
83       0, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11}; // scale = 1.0
84 
85   Tensor x = from_blob(x_values.data(), x_values.size());
86   Tensor qx = at::quantize_per_tensor(x, /*scale=*/1.0, zero_point, kQUInt8);
87 
88   auto qx_data = qx.data_ptr<quint8>();
89   for (const auto idx : c10::irange(x_values.size())) {
90     ASSERT_EQ(qx_expect[idx], qx_data[idx].val_)
91         << "Tie breaking during rounding element " << idx << " failed!";
92   }
93 }
94 
TEST(TestQTensor,Item)95 TEST(TestQTensor, Item) {
96   Tensor r = at::ones({1});
97   const float scale = 1;
98   const int32_t zero_point = 2;
99   Tensor qr = at::quantize_per_tensor(r, scale, zero_point, kQUInt8);
100   ASSERT_EQ(r.item().to<float>(), qr.item().to<float>());
101 }
102 
TEST(TestQTensor,EmptyQuantized)103 TEST(TestQTensor, EmptyQuantized) {
104   float scale = 0.5;
105   int zero_point = 10;
106   int val = 100;
107   int numel = 10;
108   Tensor q = at::_empty_affine_quantized(
109       {numel}, at::device(at::kCPU).dtype(kQUInt8), scale, zero_point);
110   // Assigning to QTensor
111   auto* q_data = q.data_ptr<quint8>();
112   for (const auto i : c10::irange(numel)) {
113     q_data[i].val_ = val;
114   }
115 
116   // dequantize
117   auto r = q.dequantize();
118   auto* r_data = r.data_ptr<float>();
119   for (const auto i : c10::irange(numel)) {
120     ASSERT_EQ(r_data[i], (val - zero_point) * scale);
121   }
122 }
123 
TEST(TestQTensor,EmptyPerchannelQuantized)124 TEST(TestQTensor, EmptyPerchannelQuantized) {
125   int numel = 10;
126   auto scales = rand({numel}).toType(kDouble);
127   auto zero_points = randint(10, {10}).toType(kLong);
128   int val = 100;
129   int ch_axis = 0;
130   Tensor q = at::_empty_per_channel_affine_quantized(
131       {numel},
132       scales,
133       zero_points,
134       ch_axis,
135       at::device(at::kCPU).dtype(kQUInt8));
136   // Assigning to QTensor
137   auto* q_data = q.data_ptr<quint8>();
138   for (const auto i : c10::irange(numel)) {
139     q_data[i].val_ = val;
140   }
141 
142   // dequantize
143   auto r = q.dequantize();
144   auto* r_data = r.data_ptr<float>();
145   for (const auto i : c10::irange(numel)) {
146     ASSERT_EQ(
147         r_data[i],
148         (val - zero_points[i].item().to<int>()) * scales[i].item().to<float>());
149   }
150 }
151 
TEST(TestQTensor,QuantizePerChannel4d)152 TEST(TestQTensor, QuantizePerChannel4d) {
153   int C = 64, H = 10, W = 10;
154   auto scales = rand({C}).toType(kDouble);
155   auto zero_points = randint(10, {C}).toType(kLong);
156   int ch_axis = 1;
157   // create 4d tensor where each H x W image is a range(0, H*W)
158   Tensor tensor = at::empty({1, C, H, W}, at::device(at::kCPU).dtype(kFloat));
159   auto* tensor_data = tensor.mutable_data_ptr<float>();
160   for (int c = 0, i = 0; c < C; ++c) {
161     for (int e = 0; e < H * W; ++e, ++i) {
162       tensor_data[i] = e;
163     }
164   }
165   // quantize and check values
166   Tensor q = at::native::quantize_per_channel(
167       tensor, scales, zero_points, ch_axis, kQUInt8);
168   auto* q_data = (uint8_t*)q.data_ptr<quint8>();
169   for (int c = 0, i = 0; c < C; ++c) {
170     float inv_scale = 1.0f / static_cast<float>(scales[c].item<double>());
171     int64_t zero_point = zero_points[c].item<int64_t>();
172     for (int e = 0; e < H * W; ++e, ++i) {
173       // downsize qval to 255 if val is greater than max uint8_t value
174       // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers,bugprone-narrowing-conversions)
175       int qval = std::min<int>(zero_point + std::nearbyint(e * inv_scale), 255);
176       ASSERT_EQ((int)q_data[i], qval);
177     }
178   }
179 }
180 
TEST(TestQTensor,QuantizePerChannel4dChannelsLast)181 TEST(TestQTensor, QuantizePerChannel4dChannelsLast) {
182   int C = 64, H = 10, W = 10;
183   auto scales = rand({C}).toType(kDouble);
184   auto zero_points = randint(10, {C}).toType(kLong);
185   int ch_axis = 1;
186   // create 4d tensor where each H x W image is a range(0, H*W)
187   Tensor tensor = at::empty(
188       {1, C, H, W},
189       at::device(at::kCPU).dtype(kFloat).memory_format(
190           at::MemoryFormat::ChannelsLast));
191   auto* tensor_data = tensor.data_ptr<float>();
192   for (int e = 0, i = 0; e < H * W; ++e) {
193     for (int c = 0; c < C; ++c, ++i) {
194       tensor_data[i] = e;
195     }
196   }
197 
198   // quantize and check values
199   Tensor q = at::native::quantize_per_channel(
200       tensor, scales, zero_points, ch_axis, kQUInt8);
201   auto* q_data = (uint8_t*)q.data_ptr<quint8>();
202   for (int e = 0, i = 0; e < H * W; ++e) {
203     for (int c = 0; c < C; ++c, ++i) {
204       float inv_scale = 1.0f / static_cast<float>(scales[c].item<double>());
205       int64_t zero_point = zero_points[c].item<int64_t>();
206       // downsize qval to 255 if val is greater than max uint8_t value
207       // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers,bugprone-narrowing-conversions)
208       int qval = std::min<int>(zero_point + std::nearbyint(e * inv_scale), 255);
209       ASSERT_EQ((int)q_data[i], qval);
210     }
211   }
212 }
213 
TEST(TestQTensor,FromBlobQuantizedPerTensor)214 TEST(TestQTensor, FromBlobQuantizedPerTensor) {
215   const float scale = 0.1;
216   const int64_t zero_point = 10;
217   std::vector<int64_t> shape = {5, 10};
218   auto numel = c10::multiply_integers(shape);
219 
220   TensorOptions options(at::kQUInt8);
221 
222   auto custom_vec = std::make_unique<std::vector<uint8_t>>();
223   custom_vec->resize(numel);
224 
225   uint8_t* custom_data = custom_vec->data();
226   for (const auto i : c10::irange(numel)) {
227     custom_data[i] = i;
228   }
229   bool customDataDeleted{false};
230   auto deleteWhenDone = custom_vec.release();
231   auto deleter = [deleteWhenDone, custom_data, &customDataDeleted](void* inp) {
232     ASSERT_EQ((void*)inp, (void*)custom_data);
233     delete deleteWhenDone;
234     customDataDeleted = true;
235   };
236   {
237   Tensor qtensor = at::from_blob_quantized_per_tensor_affine(custom_data, shape, deleter, scale, zero_point, options);
238 
239   uint8_t* q_data = (uint8_t*)qtensor.data_ptr<quint8>();
240   for (const auto i : c10::irange(numel)) {
241     ASSERT_EQ((int)custom_data[i], (int)q_data[i]);
242   }
243   for (int h = 0, i = 0; h < shape[0]; ++h) {
244     for (int w = 0; w < shape[1]; ++w, ++i) {
245       ASSERT_EQ(
246           qtensor[h][w].item<float>(),
247           (custom_data[i] - zero_point) * scale);
248     }
249   }
250   ASSERT_EQ((float)qtensor.q_scale(), (float)scale);
251   ASSERT_EQ(qtensor.q_zero_point(), zero_point);
252   }
253   TORCH_CHECK(customDataDeleted);
254 }
255 
TEST(TestQTensor,FromBlobQuantizedPerChannel)256 TEST(TestQTensor, FromBlobQuantizedPerChannel) {
257   int C = 64, H = 10, W = 5;
258   std::vector<int64_t> shape = {1, C, H, W};
259   auto scales = rand({C}).toType(kDouble);
260   auto zero_points = randint(10, {C}).toType(kLong);
261   auto numel = c10::multiply_integers(shape);
262   int ch_axis = 1;
263   TensorOptions options(at::kQUInt8);
264 
265   auto custom_vec = std::make_unique<std::vector<uint8_t>>();
266   custom_vec->resize(numel);
267 
268   uint8_t* custom_data = custom_vec->data();
269   for (const auto i : c10::irange(numel)) {
270     custom_data[i] = i;
271   }
272   bool customDataDeleted{false};
273   auto deleteWhenDone = custom_vec.release();
274   auto deleter = [deleteWhenDone, custom_data, &customDataDeleted](void* inp) {
275     ASSERT_EQ((void*)inp, (void*)custom_data);
276     delete deleteWhenDone;
277     customDataDeleted = true;
278   };
279   {
280   Tensor qtensor = at::from_blob_quantized_per_channel_affine(custom_data, shape, deleter, scales, zero_points, ch_axis, options);
281   uint8_t* q_data = (uint8_t*)qtensor.data_ptr<quint8>();
282   for (const auto i : c10::irange(numel)) {
283     ASSERT_EQ((int)custom_data[i], (int)q_data[i]);
284   }
285   ASSERT_TRUE(at::allclose(qtensor.q_per_channel_scales(), scales));
286   ASSERT_TRUE(at::allclose(qtensor.q_per_channel_zero_points(), zero_points));
287   ASSERT_TRUE(qtensor.is_quantized());
288   }
289   TORCH_CHECK(customDataDeleted);
290 }
291 
292 #if defined(__ARM_NEON__) || defined(__aarch64__)
TEST(TestQTensor,TestArmVectorizedQuantizeDequantize)293 TEST(TestQTensor, TestArmVectorizedQuantizeDequantize) {
294   const float scale = 7;
295   const int numel = 132;
296 
297   std::vector<float> x_values;
298   for (const auto i : c10::irange(numel)) {
299     x_values.push_back(9 * i);
300   }
301 
302   const Tensor x = from_blob(x_values.data(), x_values.size());
303 
304   auto test_for_datatype = [&](
305       const ScalarType scalar_type,
306       const auto get_data_ptr,
307       const auto quantize_val_with_datatype,
308       const int zero_point_min,
309       const int zero_point_max) {
310     for (int zero_point : {zero_point_min, 10, zero_point_max}) {
311       const Tensor q = at::quantize_per_tensor(x, scale, zero_point, scalar_type);
312       auto* q_data = get_data_ptr(q);
313       for (const auto i : c10::irange(numel)) {
314         ASSERT_EQ(
315           q_data[i].val_,
316           quantize_val_with_datatype(scale, zero_point, x_values[i]).val_);
317       }
318       const Tensor r = q.dequantize();
319       const float* r_data = r.const_data_ptr<float>();
320       for (const auto i : c10::irange(numel)) {
321         ASSERT_FLOAT_EQ(
322           r_data[i],
323           native::dequantize_val(scale, zero_point, q_data[i]));
324       }
325     }
326   };
327 
328   // Unsigned Int 8
329   test_for_datatype(
330     kQUInt8,
331     [](Tensor q) { return q.data_ptr<quint8>(); },
332     native::quantize_val<quint8>,
333     std::numeric_limits<uint8_t>::min(),
334     std::numeric_limits<uint8_t>::max());
335 
336   // Signed Int 8
337   test_for_datatype(
338     kQInt8,
339     [](Tensor q) { return q.data_ptr<qint8>(); },
340     native::quantize_val<qint8>,
341     std::numeric_limits<int8_t>::min(),
342     std::numeric_limits<int8_t>::max());
343 
344   // Signed Int 32 (not optimized with vectorization)
345   test_for_datatype(
346     kQInt32,
347     [](Tensor q) { return q.data_ptr<qint32>(); },
348     native::quantize_val<qint32>,
349     std::numeric_limits<int32_t>::min(),
350     std::numeric_limits<int32_t>::max());
351 }
352 #endif // (__ARM_NEON__) || defined(__aarch64__)
353 
354 #endif // ATEN_CPU_STATIC_DISPATCH
355