1 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2 #include <ATen/core/Tensor.h>
3 #include <ATen/ceil_div.h>
4 #include <ATen/Dispatch.h>
5 #include <ATen/native/TensorIterator.h>
6 #include <ATen/native/cpu/Loops.h>
7 #include <ATen/native/DispatchStub.h>
8 #include <c10/util/irange.h>
9
10 #ifndef AT_PER_OPERATOR_HEADERS
11 #include <ATen/Functions.h>
12 #include <ATen/NativeFunctions.h>
13 #else
14 #include <ATen/ops/empty.h>
15 #include <ATen/ops/int_repr_native.h>
16 #endif
17
18 namespace at {
19 namespace native {
20
21 // When input Tensor is non-dense, i.e. the allocated memory
22 // is larger than the memory used by all the elements, we'll
23 // convert it to dense tensor, otherwise we'll keep the memory
24 // format of the output the same as input
int_repr_quantized_cpu(const Tensor & self)25 Tensor int_repr_quantized_cpu(const Tensor& self) {
26 Tensor dst;
27 // NOLINTNEXTLINE(clang-diagnostic-unused-variable)
28 AT_DISPATCH_QINT_AND_SUB_BYTE_TYPES(self.scalar_type(), "int_repr", [&]() {
29 if (bit_width == 4 || bit_width == 2) {
30 int64_t out_size = at::ceil_div(self.numel() * bit_width, (int64_t)8);
31 dst = at::empty(
32 {out_size},
33 self.options().dtype(UNDERLYING_TYPE),
34 self.suggest_memory_format());
35 const underlying_t* qdata = reinterpret_cast<const underlying_t*>(self.const_data_ptr<scalar_t>());
36 for (const auto i : c10::irange(dst.numel())) {
37 dst[i] = static_cast<underlying_t>(qdata[i]);
38 }
39 } else {
40 dst = at::empty(
41 self.sizes(),
42 self.options().dtype(UNDERLYING_TYPE),
43 self.suggest_memory_format());
44 auto iter = TensorIteratorConfig()
45 .check_all_same_dtype(false)
46 .add_output(dst)
47 .add_input(self)
48 .build();
49 cpu_kernel(iter, [](scalar_t value) -> underlying_t { return value.val_; });
50 }
51 });
52 return dst;
53 }
54
55 } // namespace native
56 } // namespace at
57