1 #include <gtest/gtest.h>
2
3 #include <ATen/ATen.h>
4 #include <ATen/test/test_assert.h>
5 #include <cmath>
6 #include <iostream>
7 #include <limits>
8 #include <memory>
9 #include <sstream>
10 #include <type_traits>
11 // For quantize_val
12 #include <ATen/native/quantized/AffineQuantizer.h>
13 #include <c10/core/ScalarType.h>
14 #include <c10/util/irange.h>
15 #include <ATen/quantized/Quantizer.h>
16
17 using namespace at;
18 #ifndef ATEN_CPU_STATIC_DISPATCH
19
TEST(TestQTensor,QuantDequantAPIs)20 TEST(TestQTensor, QuantDequantAPIs) {
21 auto num_elements = 10;
22 Tensor r = at::ones({num_elements});
23 const double scale = 1.0;
24 const int64_t zero_point = 2;
25 const Tensor qr = at::quantize_per_tensor(r, scale, zero_point, kQUInt8);
26 ASSERT_EQ(qr.q_scale(), scale);
27 ASSERT_EQ(qr.q_zero_point(), zero_point);
28 ASSERT_TRUE(qr.is_quantized());
29 ASSERT_FALSE(r.is_quantized());
30
31 // int_repr
32 Tensor int_repr = qr.int_repr();
33 auto* int_repr_data = int_repr.data_ptr<uint8_t>();
34 for (const auto i : c10::irange(num_elements)) {
35 ASSERT_EQ(int_repr_data[i], 3);
36 }
37
38 // Check for correct quantization
39 auto r_data = r.data_ptr<float>();
40 auto qr_data = qr.data_ptr<quint8>();
41 for (const auto i : c10::irange(num_elements)) {
42 ASSERT_EQ(
43 native::quantize_val<quint8>(scale, zero_point, r_data[i]).val_,
44 qr_data[i].val_);
45 }
46
47 // Check for correct dequantization
48 Tensor rqr = qr.dequantize();
49 auto rqr_data = rqr.data_ptr<float>();
50 for (const auto i : c10::irange(num_elements)) {
51 ASSERT_EQ(r_data[i], rqr_data[i]);
52 }
53 for (const auto i : c10::irange(num_elements)) {
54 ASSERT_EQ(
55 r_data[i],
56 native::dequantize_val(qr.q_scale(), qr.q_zero_point(), qr_data[i]));
57 }
58
59 // Check for correct requantization
60 double new_scale = 2.0;
61 int64_t new_zero_point = 1;
62 Tensor reqr = at::quantize_per_tensor(r, new_scale, new_zero_point, kQInt8);
63 auto reqr_data = reqr.data_ptr<qint8>();
64 for (const auto i : c10::irange(num_elements)) {
65 reqr_data[i].val_ =
66 native::requantize_val<quint8, qint8>(
67 scale, zero_point, new_scale, new_zero_point, qr_data[i])
68 .val_;
69 const qint8 expected =
70 native::quantize_val<qint8>(new_scale, new_zero_point, rqr_data[i]);
71 ASSERT_EQ(expected.val_, reqr_data[i].val_);
72 }
73 }
74
TEST(TestQTensor,RoundingMode)75 TEST(TestQTensor, RoundingMode) {
76 // We assume that quantization is defined as:
77 // qx = clamp(zero_point + round(x / scale))
78 // If the zero_point is added before rounding, the result will be wrong.
79 int32_t zero_point = 5;
80 std::vector<float> x_values{
81 -5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5};
82 std::vector<uint8_t> qx_expect{
83 0, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11}; // scale = 1.0
84
85 Tensor x = from_blob(x_values.data(), x_values.size());
86 Tensor qx = at::quantize_per_tensor(x, /*scale=*/1.0, zero_point, kQUInt8);
87
88 auto qx_data = qx.data_ptr<quint8>();
89 for (const auto idx : c10::irange(x_values.size())) {
90 ASSERT_EQ(qx_expect[idx], qx_data[idx].val_)
91 << "Tie breaking during rounding element " << idx << " failed!";
92 }
93 }
94
TEST(TestQTensor,Item)95 TEST(TestQTensor, Item) {
96 Tensor r = at::ones({1});
97 const float scale = 1;
98 const int32_t zero_point = 2;
99 Tensor qr = at::quantize_per_tensor(r, scale, zero_point, kQUInt8);
100 ASSERT_EQ(r.item().to<float>(), qr.item().to<float>());
101 }
102
TEST(TestQTensor,EmptyQuantized)103 TEST(TestQTensor, EmptyQuantized) {
104 float scale = 0.5;
105 int zero_point = 10;
106 int val = 100;
107 int numel = 10;
108 Tensor q = at::_empty_affine_quantized(
109 {numel}, at::device(at::kCPU).dtype(kQUInt8), scale, zero_point);
110 // Assigning to QTensor
111 auto* q_data = q.data_ptr<quint8>();
112 for (const auto i : c10::irange(numel)) {
113 q_data[i].val_ = val;
114 }
115
116 // dequantize
117 auto r = q.dequantize();
118 auto* r_data = r.data_ptr<float>();
119 for (const auto i : c10::irange(numel)) {
120 ASSERT_EQ(r_data[i], (val - zero_point) * scale);
121 }
122 }
123
TEST(TestQTensor,EmptyPerchannelQuantized)124 TEST(TestQTensor, EmptyPerchannelQuantized) {
125 int numel = 10;
126 auto scales = rand({numel}).toType(kDouble);
127 auto zero_points = randint(10, {10}).toType(kLong);
128 int val = 100;
129 int ch_axis = 0;
130 Tensor q = at::_empty_per_channel_affine_quantized(
131 {numel},
132 scales,
133 zero_points,
134 ch_axis,
135 at::device(at::kCPU).dtype(kQUInt8));
136 // Assigning to QTensor
137 auto* q_data = q.data_ptr<quint8>();
138 for (const auto i : c10::irange(numel)) {
139 q_data[i].val_ = val;
140 }
141
142 // dequantize
143 auto r = q.dequantize();
144 auto* r_data = r.data_ptr<float>();
145 for (const auto i : c10::irange(numel)) {
146 ASSERT_EQ(
147 r_data[i],
148 (val - zero_points[i].item().to<int>()) * scales[i].item().to<float>());
149 }
150 }
151
TEST(TestQTensor,QuantizePerChannel4d)152 TEST(TestQTensor, QuantizePerChannel4d) {
153 int C = 64, H = 10, W = 10;
154 auto scales = rand({C}).toType(kDouble);
155 auto zero_points = randint(10, {C}).toType(kLong);
156 int ch_axis = 1;
157 // create 4d tensor where each H x W image is a range(0, H*W)
158 Tensor tensor = at::empty({1, C, H, W}, at::device(at::kCPU).dtype(kFloat));
159 auto* tensor_data = tensor.mutable_data_ptr<float>();
160 for (int c = 0, i = 0; c < C; ++c) {
161 for (int e = 0; e < H * W; ++e, ++i) {
162 tensor_data[i] = e;
163 }
164 }
165 // quantize and check values
166 Tensor q = at::native::quantize_per_channel(
167 tensor, scales, zero_points, ch_axis, kQUInt8);
168 auto* q_data = (uint8_t*)q.data_ptr<quint8>();
169 for (int c = 0, i = 0; c < C; ++c) {
170 float inv_scale = 1.0f / static_cast<float>(scales[c].item<double>());
171 int64_t zero_point = zero_points[c].item<int64_t>();
172 for (int e = 0; e < H * W; ++e, ++i) {
173 // downsize qval to 255 if val is greater than max uint8_t value
174 // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers,bugprone-narrowing-conversions)
175 int qval = std::min<int>(zero_point + std::nearbyint(e * inv_scale), 255);
176 ASSERT_EQ((int)q_data[i], qval);
177 }
178 }
179 }
180
TEST(TestQTensor,QuantizePerChannel4dChannelsLast)181 TEST(TestQTensor, QuantizePerChannel4dChannelsLast) {
182 int C = 64, H = 10, W = 10;
183 auto scales = rand({C}).toType(kDouble);
184 auto zero_points = randint(10, {C}).toType(kLong);
185 int ch_axis = 1;
186 // create 4d tensor where each H x W image is a range(0, H*W)
187 Tensor tensor = at::empty(
188 {1, C, H, W},
189 at::device(at::kCPU).dtype(kFloat).memory_format(
190 at::MemoryFormat::ChannelsLast));
191 auto* tensor_data = tensor.data_ptr<float>();
192 for (int e = 0, i = 0; e < H * W; ++e) {
193 for (int c = 0; c < C; ++c, ++i) {
194 tensor_data[i] = e;
195 }
196 }
197
198 // quantize and check values
199 Tensor q = at::native::quantize_per_channel(
200 tensor, scales, zero_points, ch_axis, kQUInt8);
201 auto* q_data = (uint8_t*)q.data_ptr<quint8>();
202 for (int e = 0, i = 0; e < H * W; ++e) {
203 for (int c = 0; c < C; ++c, ++i) {
204 float inv_scale = 1.0f / static_cast<float>(scales[c].item<double>());
205 int64_t zero_point = zero_points[c].item<int64_t>();
206 // downsize qval to 255 if val is greater than max uint8_t value
207 // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers,bugprone-narrowing-conversions)
208 int qval = std::min<int>(zero_point + std::nearbyint(e * inv_scale), 255);
209 ASSERT_EQ((int)q_data[i], qval);
210 }
211 }
212 }
213
TEST(TestQTensor,FromBlobQuantizedPerTensor)214 TEST(TestQTensor, FromBlobQuantizedPerTensor) {
215 const float scale = 0.1;
216 const int64_t zero_point = 10;
217 std::vector<int64_t> shape = {5, 10};
218 auto numel = c10::multiply_integers(shape);
219
220 TensorOptions options(at::kQUInt8);
221
222 auto custom_vec = std::make_unique<std::vector<uint8_t>>();
223 custom_vec->resize(numel);
224
225 uint8_t* custom_data = custom_vec->data();
226 for (const auto i : c10::irange(numel)) {
227 custom_data[i] = i;
228 }
229 bool customDataDeleted{false};
230 auto deleteWhenDone = custom_vec.release();
231 auto deleter = [deleteWhenDone, custom_data, &customDataDeleted](void* inp) {
232 ASSERT_EQ((void*)inp, (void*)custom_data);
233 delete deleteWhenDone;
234 customDataDeleted = true;
235 };
236 {
237 Tensor qtensor = at::from_blob_quantized_per_tensor_affine(custom_data, shape, deleter, scale, zero_point, options);
238
239 uint8_t* q_data = (uint8_t*)qtensor.data_ptr<quint8>();
240 for (const auto i : c10::irange(numel)) {
241 ASSERT_EQ((int)custom_data[i], (int)q_data[i]);
242 }
243 for (int h = 0, i = 0; h < shape[0]; ++h) {
244 for (int w = 0; w < shape[1]; ++w, ++i) {
245 ASSERT_EQ(
246 qtensor[h][w].item<float>(),
247 (custom_data[i] - zero_point) * scale);
248 }
249 }
250 ASSERT_EQ((float)qtensor.q_scale(), (float)scale);
251 ASSERT_EQ(qtensor.q_zero_point(), zero_point);
252 }
253 TORCH_CHECK(customDataDeleted);
254 }
255
TEST(TestQTensor,FromBlobQuantizedPerChannel)256 TEST(TestQTensor, FromBlobQuantizedPerChannel) {
257 int C = 64, H = 10, W = 5;
258 std::vector<int64_t> shape = {1, C, H, W};
259 auto scales = rand({C}).toType(kDouble);
260 auto zero_points = randint(10, {C}).toType(kLong);
261 auto numel = c10::multiply_integers(shape);
262 int ch_axis = 1;
263 TensorOptions options(at::kQUInt8);
264
265 auto custom_vec = std::make_unique<std::vector<uint8_t>>();
266 custom_vec->resize(numel);
267
268 uint8_t* custom_data = custom_vec->data();
269 for (const auto i : c10::irange(numel)) {
270 custom_data[i] = i;
271 }
272 bool customDataDeleted{false};
273 auto deleteWhenDone = custom_vec.release();
274 auto deleter = [deleteWhenDone, custom_data, &customDataDeleted](void* inp) {
275 ASSERT_EQ((void*)inp, (void*)custom_data);
276 delete deleteWhenDone;
277 customDataDeleted = true;
278 };
279 {
280 Tensor qtensor = at::from_blob_quantized_per_channel_affine(custom_data, shape, deleter, scales, zero_points, ch_axis, options);
281 uint8_t* q_data = (uint8_t*)qtensor.data_ptr<quint8>();
282 for (const auto i : c10::irange(numel)) {
283 ASSERT_EQ((int)custom_data[i], (int)q_data[i]);
284 }
285 ASSERT_TRUE(at::allclose(qtensor.q_per_channel_scales(), scales));
286 ASSERT_TRUE(at::allclose(qtensor.q_per_channel_zero_points(), zero_points));
287 ASSERT_TRUE(qtensor.is_quantized());
288 }
289 TORCH_CHECK(customDataDeleted);
290 }
291
292 #if defined(__ARM_NEON__) || defined(__aarch64__)
TEST(TestQTensor,TestArmVectorizedQuantizeDequantize)293 TEST(TestQTensor, TestArmVectorizedQuantizeDequantize) {
294 const float scale = 7;
295 const int numel = 132;
296
297 std::vector<float> x_values;
298 for (const auto i : c10::irange(numel)) {
299 x_values.push_back(9 * i);
300 }
301
302 const Tensor x = from_blob(x_values.data(), x_values.size());
303
304 auto test_for_datatype = [&](
305 const ScalarType scalar_type,
306 const auto get_data_ptr,
307 const auto quantize_val_with_datatype,
308 const int zero_point_min,
309 const int zero_point_max) {
310 for (int zero_point : {zero_point_min, 10, zero_point_max}) {
311 const Tensor q = at::quantize_per_tensor(x, scale, zero_point, scalar_type);
312 auto* q_data = get_data_ptr(q);
313 for (const auto i : c10::irange(numel)) {
314 ASSERT_EQ(
315 q_data[i].val_,
316 quantize_val_with_datatype(scale, zero_point, x_values[i]).val_);
317 }
318 const Tensor r = q.dequantize();
319 const float* r_data = r.const_data_ptr<float>();
320 for (const auto i : c10::irange(numel)) {
321 ASSERT_FLOAT_EQ(
322 r_data[i],
323 native::dequantize_val(scale, zero_point, q_data[i]));
324 }
325 }
326 };
327
328 // Unsigned Int 8
329 test_for_datatype(
330 kQUInt8,
331 [](Tensor q) { return q.data_ptr<quint8>(); },
332 native::quantize_val<quint8>,
333 std::numeric_limits<uint8_t>::min(),
334 std::numeric_limits<uint8_t>::max());
335
336 // Signed Int 8
337 test_for_datatype(
338 kQInt8,
339 [](Tensor q) { return q.data_ptr<qint8>(); },
340 native::quantize_val<qint8>,
341 std::numeric_limits<int8_t>::min(),
342 std::numeric_limits<int8_t>::max());
343
344 // Signed Int 32 (not optimized with vectorization)
345 test_for_datatype(
346 kQInt32,
347 [](Tensor q) { return q.data_ptr<qint32>(); },
348 native::quantize_val<qint32>,
349 std::numeric_limits<int32_t>::min(),
350 std::numeric_limits<int32_t>::max());
351 }
352 #endif // (__ARM_NEON__) || defined(__aarch64__)
353
354 #endif // ATEN_CPU_STATIC_DISPATCH
355