xref: /aosp_15_r20/external/pytorch/aten/src/ATen/test/tensor_iterator_test.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <gtest/gtest.h>
2 #include <thread>
3 
4 #include <ATen/ATen.h>
5 #include <ATen/native/TensorIterator.h>
6 #include <ATen/native/cpu/Loops.h>
7 
8 using namespace at;
9 
10 // An operation with a CUDA tensor and CPU scalar should keep the scalar
11 // on the CPU (and lift it to a parameter).
TEST(TensorIteratorTest,CPUScalar)12 TEST(TensorIteratorTest, CPUScalar) {
13   if (!at::hasCUDA()) return;
14   Tensor out;
15   auto x = at::randn({5, 5}, kCUDA);
16   auto y = at::ones(1, kCPU).squeeze();
17   auto iter = TensorIterator::binary_op(out, x, y);
18   EXPECT_TRUE(iter.device(0).is_cuda()) << "result should be CUDA";
19   EXPECT_TRUE(iter.device(1).is_cuda()) << "x should be CUDA";
20   EXPECT_TRUE(iter.device(2).is_cpu()) << "y should be CPU";
21 }
22 
23 // Verifies multiple zero-dim CPU inputs are not coerced to CUDA
TEST(TensorIteratorTest,CPUScalarInputs)24 TEST(TensorIteratorTest, CPUScalarInputs) {
25   if (!at::hasCUDA()) return;
26   Tensor out = at::empty({5, 5}, kCUDA);
27   auto x = at::ones(1, kCPU).squeeze();
28   auto y = at::ones(1, kCPU).squeeze();
29   // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
30   ASSERT_ANY_THROW(TensorIterator::binary_op(out, x, y));
31 }
32 
33 // Mixing CPU and CUDA tensors should raise an exception (if the CPU tensor isn't zero-dim)
TEST(TensorIteratorTest,MixedDevices)34 TEST(TensorIteratorTest, MixedDevices) {
35   if (!at::hasCUDA()) return;
36   Tensor out;
37   auto x = at::randn({5, 5}, kCUDA);
38   auto y = at::ones({5}, kCPU);
39   // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
40   ASSERT_ANY_THROW(TensorIterator::binary_op(out, x, y));
41 }
42 
random_tensor_for_type(at::ScalarType scalar_type)43 Tensor random_tensor_for_type(at::ScalarType scalar_type) {
44   if (at::isFloatingType(scalar_type)) {
45     return at::randn({5, 5}, at::device(kCPU).dtype(scalar_type));
46   } else if (scalar_type == kBool) {
47     return at::randint(0, 2, {5, 5}, at::device(kCPU).dtype(scalar_type));
48   } else {
49     return at::randint(1, 10, {5, 5}, at::device(kCPU).dtype(scalar_type));
50   }
51 }
52 
53 #define UNARY_TEST_ITER_FOR_TYPE(ctype,name)                                    \
54 TEST(TensorIteratorTest, SerialLoopUnary_##name) {                              \
55   Tensor out;                                                                   \
56   auto in = random_tensor_for_type(k##name);                                    \
57   auto expected = in.add(1);                                                    \
58   auto iter = TensorIterator::unary_op(out, in);                                \
59   at::native::cpu_serial_kernel(iter, [=](ctype a) -> ctype { return a + 1; }); \
60   ASSERT_ANY_THROW(out.equal(expected));                                        \
61 }
62 
63 #define NO_OUTPUT_UNARY_TEST_ITER_FOR_TYPE(ctype,name)                         \
64 TEST(TensorIteratorTest, SerialLoopUnaryNoOutput_##name) {                     \
65   auto in = random_tensor_for_type(k##name);                                   \
66   auto iter = at::TensorIteratorConfig()                                       \
67       .add_owned_input(in)                                                           \
68       .build();                                                                \
69   int64_t acc = 0;                                                             \
70   at::native::cpu_serial_kernel(iter, [&](ctype a) -> void { acc++; }); \
71   EXPECT_TRUE(acc == in.numel());                                              \
72 }
73 
74 #define BINARY_TEST_ITER_FOR_TYPE(ctype,name)                                            \
75 TEST(TensorIteratorTest, SerialLoopBinary_##name) {                                      \
76   Tensor out;                                                                            \
77   auto in1 = random_tensor_for_type(k##name);                                            \
78   auto in2 = random_tensor_for_type(k##name);                                            \
79   auto expected = in1.add(in2);                                                          \
80   auto iter = TensorIterator::binary_op(out, in1, in2);                                  \
81   at::native::cpu_serial_kernel(iter, [=](ctype a, ctype b) -> ctype { return a + b; }); \
82   ASSERT_ANY_THROW(out.equal(expected));                                                 \
83 }
84 
85 #define NO_OUTPUT_BINARY_TEST_ITER_FOR_TYPE(ctype,name)                          \
86 TEST(TensorIteratorTest, SerialLoopBinaryNoOutput_##name) {                      \
87   auto in1 = random_tensor_for_type(k##name);                                    \
88   auto in2 = random_tensor_for_type(k##name);                                    \
89   auto iter = at::TensorIteratorConfig()                                         \
90       .add_owned_input(in1)                                                            \
91       .add_owned_input(in2)                                                            \
92       .build();                                                                  \
93   int64_t acc = 0;                                                               \
94   at::native::cpu_serial_kernel(iter, [&](ctype a, ctype b) -> void { acc++; }); \
95   EXPECT_TRUE(acc == in1.numel());                                               \
96 }
97 
98 #define POINTWISE_TEST_ITER_FOR_TYPE(ctype,name)                                                      \
99 TEST(TensorIteratorTest, SerialLoopPointwise_##name) {                                                \
100   Tensor out;                                                                                         \
101   auto in1 = random_tensor_for_type(k##name);                                                         \
102   auto in2 = random_tensor_for_type(k##name);                                                         \
103   auto in3 = random_tensor_for_type(k##name);                                                         \
104   auto expected = in1.add(in2).add(in3);                                                              \
105   auto iter = at::TensorIteratorConfig()                                                              \
106       .add_output(out)                                                                                \
107       .add_owned_input(in1)                                                                                 \
108       .add_owned_input(in2)                                                                                 \
109       .add_owned_input(in3)                                                                                 \
110       .build();                                                                                       \
111   at::native::cpu_serial_kernel(iter, [=](ctype a, ctype b, ctype c) -> ctype { return a + b + c; }); \
112   ASSERT_ANY_THROW(out.equal(expected));                                                              \
113 }
114 
115 #define NO_OUTPUT_POINTWISE_TEST_ITER_FOR_TYPE(ctype,name)                                \
116 TEST(TensorIteratorTest, SerialLoopPoinwiseNoOutput_##name) {                             \
117   auto in1 = random_tensor_for_type(k##name);                                             \
118   auto in2 = random_tensor_for_type(k##name);                                             \
119   auto in3 = random_tensor_for_type(k##name);                                             \
120   auto iter = at::TensorIteratorConfig()                                                  \
121       .add_owned_input(in1)                                                                     \
122       .add_owned_input(in2)                                                                     \
123       .add_owned_input(in3)                                                                     \
124       .build();                                                                           \
125   int64_t acc = 0;                                                                        \
126   at::native::cpu_serial_kernel(iter, [&](ctype a, ctype b, ctype c) -> void { acc++; }); \
127   EXPECT_TRUE(acc == in1.numel());                                                        \
128 }
129 
130 // The alternative way to calculate a < b is (b - a).clamp(0).toBool()
131 // To prevent an overflow in subtraction (b - a) for unsigned types(unit, bool)
132 // we will convert in to int first
133 #define COMPARISON_TEST_ITER_FOR_TYPE(ctype,name)                                          \
134 TEST(TensorIteratorTest, ComparisonLoopBinary_##name) {                                    \
135   auto in1 = random_tensor_for_type(k##name);                                              \
136   auto in2 = random_tensor_for_type(k##name);                                              \
137   Tensor out = at::empty({0}, in1.options().dtype(kBool));                                 \
138   Tensor diff;                                                                             \
139   if (k##name == kByte || k##name == kBool) {                                              \
140     diff = in2.to(kInt).sub(in1.to(kInt));                                                 \
141   } else {                                                                                 \
142     diff = in2.sub(in1);                                                                   \
143   }                                                                                        \
144   auto expected = diff.clamp_min(0).to(kBool);                                             \
145   auto iter = TensorIterator::comparison_op(out, in1, in2);                                \
146   at::native::cpu_serial_kernel(iter, [=](ctype a, ctype b) -> bool { return a < b; });    \
147   EXPECT_TRUE(out.equal(expected));                                                        \
148 }
149 
150 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
151 AT_FORALL_SCALAR_TYPES(UNARY_TEST_ITER_FOR_TYPE)
152 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
AT_FORALL_SCALAR_TYPES(BINARY_TEST_ITER_FOR_TYPE)153 AT_FORALL_SCALAR_TYPES(BINARY_TEST_ITER_FOR_TYPE)
154 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
155 AT_FORALL_SCALAR_TYPES(POINTWISE_TEST_ITER_FOR_TYPE)
156 AT_FORALL_SCALAR_TYPES(NO_OUTPUT_UNARY_TEST_ITER_FOR_TYPE)
157 AT_FORALL_SCALAR_TYPES(NO_OUTPUT_BINARY_TEST_ITER_FOR_TYPE)
158 AT_FORALL_SCALAR_TYPES(NO_OUTPUT_POINTWISE_TEST_ITER_FOR_TYPE)
159 AT_FORALL_SCALAR_TYPES_AND(Bool, COMPARISON_TEST_ITER_FOR_TYPE)
160 
161 TEST(TensorIteratorTest, SerialLoopSingleThread) {
162   std::thread::id thread_id = std::this_thread::get_id();
163   Tensor out;
164   auto x = at::zeros({50000}, at::TensorOptions(kCPU).dtype(kInt));
165   auto iter = TensorIterator::unary_op(out, x);
166   at::native::cpu_serial_kernel(iter, [=](int a) -> int {
167     std::thread::id lambda_thread_id = std::this_thread::get_id();
168     EXPECT_TRUE(lambda_thread_id == thread_id);
169     return a + 1;
170   });
171 }
172 
TEST(TensorIteratorTest,InputDType)173 TEST(TensorIteratorTest, InputDType) {
174   auto iter = at::TensorIteratorConfig()
175       .check_all_same_dtype(false)
176       .add_owned_output(at::ones({1, 1}, at::dtype(at::kBool)))
177       .add_owned_input(at::ones({1, 1}, at::dtype(at::kFloat)))
178       .add_owned_input(at::ones({1, 1}, at::dtype(at::kDouble)))
179       .build();
180   EXPECT_TRUE(iter.input_dtype() == at::kFloat);
181   EXPECT_TRUE(iter.input_dtype(0) == at::kFloat);
182   EXPECT_TRUE(iter.input_dtype(1) == at::kDouble);
183 }
184 
TEST(TensorIteratorTest,ComputeCommonDTypeInputOnly)185 TEST(TensorIteratorTest, ComputeCommonDTypeInputOnly) {
186   auto iter = at::TensorIteratorConfig()
187       .add_owned_output(at::ones({1, 1}, at::dtype(at::kBool)))
188       .add_owned_input(at::ones({1, 1}, at::dtype(at::kFloat)))
189       .add_owned_input(at::ones({1, 1}, at::dtype(at::kDouble)))
190       .promote_inputs_to_common_dtype(true)
191       .build();
192   EXPECT_TRUE(iter.dtype(0) == at::kBool);
193   EXPECT_TRUE(iter.dtype(1) == at::kDouble);
194   EXPECT_TRUE(iter.dtype(2) == at::kDouble);
195   EXPECT_TRUE(iter.common_dtype() == at::kDouble);
196 }
197 
TEST(TensorIteratorTest,DoNotComputeCommonDTypeInputOnly)198 TEST(TensorIteratorTest, DoNotComputeCommonDTypeInputOnly) {
199   auto iter = at::TensorIteratorConfig()
200       .check_all_same_dtype(false)
201       .add_owned_output(at::ones({1, 1}, at::dtype(at::kLong)))
202       .add_owned_input(at::ones({1, 1}, at::dtype(at::kFloat)))
203       .add_owned_input(at::ones({1, 1}, at::dtype(at::kDouble)))
204       .build();
205   EXPECT_TRUE(iter.dtype(0) == at::kLong);
206   EXPECT_TRUE(iter.dtype(1) == at::kFloat);
207   EXPECT_TRUE(iter.dtype(2) == at::kDouble);
208 }
209 
TEST(TensorIteratorTest,FailNonPromotingBinaryOp)210 TEST(TensorIteratorTest, FailNonPromotingBinaryOp) {
211   Tensor out;
212   at::TensorIteratorConfig config;
213   config.add_output(out);
214   config.add_owned_input(at::ones({1,1}, at::dtype(at::kDouble)));
215   config.add_owned_input(at::ones({1,1}, at::dtype(at::kInt)));
216   // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
217   ASSERT_ANY_THROW(config.build());
218 }
219 
TEST(TensorIteratorTest,ForEachConstInput)220 TEST(TensorIteratorTest, ForEachConstInput) {
221   at::Tensor out = at::zeros({10});
222   at::Tensor a = at::_lazy_clone(at::arange({10}).to(at::kFloat));
223   EXPECT_TRUE(c10::impl::cow::is_cow_data_ptr(a.storage().data_ptr()));
224 
225   at::TensorIteratorConfig iter_config;
226   iter_config
227     .add_output(out)
228     .add_const_input(a);
229   auto iter = iter_config.build();
230 
231   auto my_loop = [](char** data, const int64_t* strides, int64_t n) {
232     auto* out_data = data[0];
233     auto* in_data = data[1];
234     for (int64_t i = 0; i < n; i++) {
235       *reinterpret_cast<float*>(out_data) += *reinterpret_cast<float*>(in_data);
236       out_data += strides[0];
237       in_data += strides[1];
238     }
239   };
240 
241   iter.for_each(my_loop);
242   EXPECT_TRUE(c10::impl::cow::is_cow_data_ptr(a.storage().data_ptr()));
243   EXPECT_TRUE(out.eq(a).all().item<bool>());
244 }
245 
246 #define MULTIPLE_OUTPUTS_TEST_ITER_FOR_TYPE(ctype,name)                                             \
247 TEST(TensorIteratorTest, CpuKernelMultipleOutputs_##name) {                                         \
248   auto in1 = random_tensor_for_type(k##name);                                                       \
249   auto in2 = random_tensor_for_type(k##name);                                                       \
250   Tensor out1 = at::empty({0}, in1.options());                                                      \
251   Tensor out2 = at::empty({0}, in1.options());                                                      \
252   auto expected1 = in1.add(in2);                                                                    \
253   auto expected2 = in1.mul(in2);                                                                    \
254   auto iter = at::TensorIteratorConfig()                                                            \
255     .add_output(out1)                                                                               \
256     .add_output(out2)                                                                               \
257     .add_owned_input(in1)                                                                                 \
258     .add_owned_input(in2)                                                                                 \
259     .build();                                                                                       \
260   at::native::cpu_kernel_multiple_outputs(iter, [=](ctype a, ctype b) -> std::tuple<ctype, ctype> { \
261     ctype add = a + b;                                                                              \
262     ctype mul = a * b;                                                                              \
263     return std::tuple<ctype, ctype>(add, mul);                                                      \
264   });                                                                                               \
265   EXPECT_TRUE(out1.equal(expected1));                                                               \
266   EXPECT_TRUE(out2.equal(expected2));                                                               \
267 }
268 AT_FORALL_SCALAR_TYPES(MULTIPLE_OUTPUTS_TEST_ITER_FOR_TYPE)
269