1 #include <gtest/gtest.h>
2 #include <thread>
3
4 #include <ATen/ATen.h>
5 #include <ATen/native/TensorIterator.h>
6 #include <ATen/native/cpu/Loops.h>
7
8 using namespace at;
9
10 // An operation with a CUDA tensor and CPU scalar should keep the scalar
11 // on the CPU (and lift it to a parameter).
TEST(TensorIteratorTest,CPUScalar)12 TEST(TensorIteratorTest, CPUScalar) {
13 if (!at::hasCUDA()) return;
14 Tensor out;
15 auto x = at::randn({5, 5}, kCUDA);
16 auto y = at::ones(1, kCPU).squeeze();
17 auto iter = TensorIterator::binary_op(out, x, y);
18 EXPECT_TRUE(iter.device(0).is_cuda()) << "result should be CUDA";
19 EXPECT_TRUE(iter.device(1).is_cuda()) << "x should be CUDA";
20 EXPECT_TRUE(iter.device(2).is_cpu()) << "y should be CPU";
21 }
22
23 // Verifies multiple zero-dim CPU inputs are not coerced to CUDA
TEST(TensorIteratorTest,CPUScalarInputs)24 TEST(TensorIteratorTest, CPUScalarInputs) {
25 if (!at::hasCUDA()) return;
26 Tensor out = at::empty({5, 5}, kCUDA);
27 auto x = at::ones(1, kCPU).squeeze();
28 auto y = at::ones(1, kCPU).squeeze();
29 // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
30 ASSERT_ANY_THROW(TensorIterator::binary_op(out, x, y));
31 }
32
33 // Mixing CPU and CUDA tensors should raise an exception (if the CPU tensor isn't zero-dim)
TEST(TensorIteratorTest,MixedDevices)34 TEST(TensorIteratorTest, MixedDevices) {
35 if (!at::hasCUDA()) return;
36 Tensor out;
37 auto x = at::randn({5, 5}, kCUDA);
38 auto y = at::ones({5}, kCPU);
39 // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
40 ASSERT_ANY_THROW(TensorIterator::binary_op(out, x, y));
41 }
42
random_tensor_for_type(at::ScalarType scalar_type)43 Tensor random_tensor_for_type(at::ScalarType scalar_type) {
44 if (at::isFloatingType(scalar_type)) {
45 return at::randn({5, 5}, at::device(kCPU).dtype(scalar_type));
46 } else if (scalar_type == kBool) {
47 return at::randint(0, 2, {5, 5}, at::device(kCPU).dtype(scalar_type));
48 } else {
49 return at::randint(1, 10, {5, 5}, at::device(kCPU).dtype(scalar_type));
50 }
51 }
52
53 #define UNARY_TEST_ITER_FOR_TYPE(ctype,name) \
54 TEST(TensorIteratorTest, SerialLoopUnary_##name) { \
55 Tensor out; \
56 auto in = random_tensor_for_type(k##name); \
57 auto expected = in.add(1); \
58 auto iter = TensorIterator::unary_op(out, in); \
59 at::native::cpu_serial_kernel(iter, [=](ctype a) -> ctype { return a + 1; }); \
60 ASSERT_ANY_THROW(out.equal(expected)); \
61 }
62
63 #define NO_OUTPUT_UNARY_TEST_ITER_FOR_TYPE(ctype,name) \
64 TEST(TensorIteratorTest, SerialLoopUnaryNoOutput_##name) { \
65 auto in = random_tensor_for_type(k##name); \
66 auto iter = at::TensorIteratorConfig() \
67 .add_owned_input(in) \
68 .build(); \
69 int64_t acc = 0; \
70 at::native::cpu_serial_kernel(iter, [&](ctype a) -> void { acc++; }); \
71 EXPECT_TRUE(acc == in.numel()); \
72 }
73
74 #define BINARY_TEST_ITER_FOR_TYPE(ctype,name) \
75 TEST(TensorIteratorTest, SerialLoopBinary_##name) { \
76 Tensor out; \
77 auto in1 = random_tensor_for_type(k##name); \
78 auto in2 = random_tensor_for_type(k##name); \
79 auto expected = in1.add(in2); \
80 auto iter = TensorIterator::binary_op(out, in1, in2); \
81 at::native::cpu_serial_kernel(iter, [=](ctype a, ctype b) -> ctype { return a + b; }); \
82 ASSERT_ANY_THROW(out.equal(expected)); \
83 }
84
85 #define NO_OUTPUT_BINARY_TEST_ITER_FOR_TYPE(ctype,name) \
86 TEST(TensorIteratorTest, SerialLoopBinaryNoOutput_##name) { \
87 auto in1 = random_tensor_for_type(k##name); \
88 auto in2 = random_tensor_for_type(k##name); \
89 auto iter = at::TensorIteratorConfig() \
90 .add_owned_input(in1) \
91 .add_owned_input(in2) \
92 .build(); \
93 int64_t acc = 0; \
94 at::native::cpu_serial_kernel(iter, [&](ctype a, ctype b) -> void { acc++; }); \
95 EXPECT_TRUE(acc == in1.numel()); \
96 }
97
98 #define POINTWISE_TEST_ITER_FOR_TYPE(ctype,name) \
99 TEST(TensorIteratorTest, SerialLoopPointwise_##name) { \
100 Tensor out; \
101 auto in1 = random_tensor_for_type(k##name); \
102 auto in2 = random_tensor_for_type(k##name); \
103 auto in3 = random_tensor_for_type(k##name); \
104 auto expected = in1.add(in2).add(in3); \
105 auto iter = at::TensorIteratorConfig() \
106 .add_output(out) \
107 .add_owned_input(in1) \
108 .add_owned_input(in2) \
109 .add_owned_input(in3) \
110 .build(); \
111 at::native::cpu_serial_kernel(iter, [=](ctype a, ctype b, ctype c) -> ctype { return a + b + c; }); \
112 ASSERT_ANY_THROW(out.equal(expected)); \
113 }
114
115 #define NO_OUTPUT_POINTWISE_TEST_ITER_FOR_TYPE(ctype,name) \
116 TEST(TensorIteratorTest, SerialLoopPoinwiseNoOutput_##name) { \
117 auto in1 = random_tensor_for_type(k##name); \
118 auto in2 = random_tensor_for_type(k##name); \
119 auto in3 = random_tensor_for_type(k##name); \
120 auto iter = at::TensorIteratorConfig() \
121 .add_owned_input(in1) \
122 .add_owned_input(in2) \
123 .add_owned_input(in3) \
124 .build(); \
125 int64_t acc = 0; \
126 at::native::cpu_serial_kernel(iter, [&](ctype a, ctype b, ctype c) -> void { acc++; }); \
127 EXPECT_TRUE(acc == in1.numel()); \
128 }
129
130 // The alternative way to calculate a < b is (b - a).clamp(0).toBool()
131 // To prevent an overflow in subtraction (b - a) for unsigned types(unit, bool)
132 // we will convert in to int first
133 #define COMPARISON_TEST_ITER_FOR_TYPE(ctype,name) \
134 TEST(TensorIteratorTest, ComparisonLoopBinary_##name) { \
135 auto in1 = random_tensor_for_type(k##name); \
136 auto in2 = random_tensor_for_type(k##name); \
137 Tensor out = at::empty({0}, in1.options().dtype(kBool)); \
138 Tensor diff; \
139 if (k##name == kByte || k##name == kBool) { \
140 diff = in2.to(kInt).sub(in1.to(kInt)); \
141 } else { \
142 diff = in2.sub(in1); \
143 } \
144 auto expected = diff.clamp_min(0).to(kBool); \
145 auto iter = TensorIterator::comparison_op(out, in1, in2); \
146 at::native::cpu_serial_kernel(iter, [=](ctype a, ctype b) -> bool { return a < b; }); \
147 EXPECT_TRUE(out.equal(expected)); \
148 }
149
150 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
151 AT_FORALL_SCALAR_TYPES(UNARY_TEST_ITER_FOR_TYPE)
152 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
AT_FORALL_SCALAR_TYPES(BINARY_TEST_ITER_FOR_TYPE)153 AT_FORALL_SCALAR_TYPES(BINARY_TEST_ITER_FOR_TYPE)
154 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
155 AT_FORALL_SCALAR_TYPES(POINTWISE_TEST_ITER_FOR_TYPE)
156 AT_FORALL_SCALAR_TYPES(NO_OUTPUT_UNARY_TEST_ITER_FOR_TYPE)
157 AT_FORALL_SCALAR_TYPES(NO_OUTPUT_BINARY_TEST_ITER_FOR_TYPE)
158 AT_FORALL_SCALAR_TYPES(NO_OUTPUT_POINTWISE_TEST_ITER_FOR_TYPE)
159 AT_FORALL_SCALAR_TYPES_AND(Bool, COMPARISON_TEST_ITER_FOR_TYPE)
160
161 TEST(TensorIteratorTest, SerialLoopSingleThread) {
162 std::thread::id thread_id = std::this_thread::get_id();
163 Tensor out;
164 auto x = at::zeros({50000}, at::TensorOptions(kCPU).dtype(kInt));
165 auto iter = TensorIterator::unary_op(out, x);
166 at::native::cpu_serial_kernel(iter, [=](int a) -> int {
167 std::thread::id lambda_thread_id = std::this_thread::get_id();
168 EXPECT_TRUE(lambda_thread_id == thread_id);
169 return a + 1;
170 });
171 }
172
TEST(TensorIteratorTest,InputDType)173 TEST(TensorIteratorTest, InputDType) {
174 auto iter = at::TensorIteratorConfig()
175 .check_all_same_dtype(false)
176 .add_owned_output(at::ones({1, 1}, at::dtype(at::kBool)))
177 .add_owned_input(at::ones({1, 1}, at::dtype(at::kFloat)))
178 .add_owned_input(at::ones({1, 1}, at::dtype(at::kDouble)))
179 .build();
180 EXPECT_TRUE(iter.input_dtype() == at::kFloat);
181 EXPECT_TRUE(iter.input_dtype(0) == at::kFloat);
182 EXPECT_TRUE(iter.input_dtype(1) == at::kDouble);
183 }
184
TEST(TensorIteratorTest,ComputeCommonDTypeInputOnly)185 TEST(TensorIteratorTest, ComputeCommonDTypeInputOnly) {
186 auto iter = at::TensorIteratorConfig()
187 .add_owned_output(at::ones({1, 1}, at::dtype(at::kBool)))
188 .add_owned_input(at::ones({1, 1}, at::dtype(at::kFloat)))
189 .add_owned_input(at::ones({1, 1}, at::dtype(at::kDouble)))
190 .promote_inputs_to_common_dtype(true)
191 .build();
192 EXPECT_TRUE(iter.dtype(0) == at::kBool);
193 EXPECT_TRUE(iter.dtype(1) == at::kDouble);
194 EXPECT_TRUE(iter.dtype(2) == at::kDouble);
195 EXPECT_TRUE(iter.common_dtype() == at::kDouble);
196 }
197
TEST(TensorIteratorTest,DoNotComputeCommonDTypeInputOnly)198 TEST(TensorIteratorTest, DoNotComputeCommonDTypeInputOnly) {
199 auto iter = at::TensorIteratorConfig()
200 .check_all_same_dtype(false)
201 .add_owned_output(at::ones({1, 1}, at::dtype(at::kLong)))
202 .add_owned_input(at::ones({1, 1}, at::dtype(at::kFloat)))
203 .add_owned_input(at::ones({1, 1}, at::dtype(at::kDouble)))
204 .build();
205 EXPECT_TRUE(iter.dtype(0) == at::kLong);
206 EXPECT_TRUE(iter.dtype(1) == at::kFloat);
207 EXPECT_TRUE(iter.dtype(2) == at::kDouble);
208 }
209
TEST(TensorIteratorTest,FailNonPromotingBinaryOp)210 TEST(TensorIteratorTest, FailNonPromotingBinaryOp) {
211 Tensor out;
212 at::TensorIteratorConfig config;
213 config.add_output(out);
214 config.add_owned_input(at::ones({1,1}, at::dtype(at::kDouble)));
215 config.add_owned_input(at::ones({1,1}, at::dtype(at::kInt)));
216 // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
217 ASSERT_ANY_THROW(config.build());
218 }
219
TEST(TensorIteratorTest,ForEachConstInput)220 TEST(TensorIteratorTest, ForEachConstInput) {
221 at::Tensor out = at::zeros({10});
222 at::Tensor a = at::_lazy_clone(at::arange({10}).to(at::kFloat));
223 EXPECT_TRUE(c10::impl::cow::is_cow_data_ptr(a.storage().data_ptr()));
224
225 at::TensorIteratorConfig iter_config;
226 iter_config
227 .add_output(out)
228 .add_const_input(a);
229 auto iter = iter_config.build();
230
231 auto my_loop = [](char** data, const int64_t* strides, int64_t n) {
232 auto* out_data = data[0];
233 auto* in_data = data[1];
234 for (int64_t i = 0; i < n; i++) {
235 *reinterpret_cast<float*>(out_data) += *reinterpret_cast<float*>(in_data);
236 out_data += strides[0];
237 in_data += strides[1];
238 }
239 };
240
241 iter.for_each(my_loop);
242 EXPECT_TRUE(c10::impl::cow::is_cow_data_ptr(a.storage().data_ptr()));
243 EXPECT_TRUE(out.eq(a).all().item<bool>());
244 }
245
246 #define MULTIPLE_OUTPUTS_TEST_ITER_FOR_TYPE(ctype,name) \
247 TEST(TensorIteratorTest, CpuKernelMultipleOutputs_##name) { \
248 auto in1 = random_tensor_for_type(k##name); \
249 auto in2 = random_tensor_for_type(k##name); \
250 Tensor out1 = at::empty({0}, in1.options()); \
251 Tensor out2 = at::empty({0}, in1.options()); \
252 auto expected1 = in1.add(in2); \
253 auto expected2 = in1.mul(in2); \
254 auto iter = at::TensorIteratorConfig() \
255 .add_output(out1) \
256 .add_output(out2) \
257 .add_owned_input(in1) \
258 .add_owned_input(in2) \
259 .build(); \
260 at::native::cpu_kernel_multiple_outputs(iter, [=](ctype a, ctype b) -> std::tuple<ctype, ctype> { \
261 ctype add = a + b; \
262 ctype mul = a * b; \
263 return std::tuple<ctype, ctype>(add, mul); \
264 }); \
265 EXPECT_TRUE(out1.equal(expected1)); \
266 EXPECT_TRUE(out2.equal(expected2)); \
267 }
268 AT_FORALL_SCALAR_TYPES(MULTIPLE_OUTPUTS_TEST_ITER_FOR_TYPE)
269