xref: /aosp_15_r20/external/pytorch/aten/src/ATen/test/cuda_tensor_interop_test.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <gtest/gtest.h>
2 
3 #include <ATen/ATen.h>
4 #include <ATen/cuda/CUDAContext.h>
5 #include <c10/util/irange.h>
6 #include <caffe2/core/init.h>
7 #include <caffe2/core/operator.h>
8 #include <caffe2/core/context_gpu.h>
9 #include <caffe2/utils/math.h>
10 
11 // dumbest possible copies
12 template<typename T>
cuda_get(T * addr)13 T cuda_get(T* addr) {
14   T result;
15   CUDA_ENFORCE(cudaMemcpy(&result, addr, sizeof(T), cudaMemcpyDefault));
16   return result;
17 }
18 
19 template<typename T>
cuda_set(T * addr,T value)20 void cuda_set(T* addr, T value) {
21   CUDA_ENFORCE(cudaMemcpy(addr, &value, sizeof(T), cudaMemcpyDefault));
22 }
23 
TEST(CUDACaffe2ToPytorch,SimpleLegacy)24 TEST(CUDACaffe2ToPytorch, SimpleLegacy) {
25   if (!at::cuda::is_available()) return;
26   caffe2::Tensor c2_tensor(caffe2::CUDA);
27   c2_tensor.Resize(4, 4);
28   auto data = c2_tensor.mutable_data<int64_t>();
29   {
30     caffe2::CUDAContext context;
31     caffe2::math::Set<int64_t>(16, 777, data, &context);
32   }
33   at::Tensor at_tensor(c2_tensor);
34   ASSERT_TRUE(at_tensor.is_cuda());
35 
36   auto at_cpu = at_tensor.cpu();
37   auto it = at_cpu.data_ptr<int64_t>();
38   for (const auto i : c10::irange(16)) {
39     ASSERT_EQ(it[i], 777);
40   }
41 }
42 
TEST(CUDACaffe2ToPytorch,Simple)43 TEST(CUDACaffe2ToPytorch, Simple) {
44   if (!at::cuda::is_available()) return;
45   caffe2::Tensor c2_tensor =
46       caffe2::empty({4, 4}, at::dtype<int64_t>().device(caffe2::CUDA));
47   auto data = c2_tensor.mutable_data<int64_t>();
48   {
49     caffe2::CUDAContext context;
50     caffe2::math::Set<int64_t>(16, 777, data, &context);
51   }
52   at::Tensor at_tensor(c2_tensor);
53   ASSERT_TRUE(at_tensor.is_cuda());
54 
55   auto at_cpu = at_tensor.cpu();
56   auto it = at_cpu.data_ptr<int64_t>();
57   for (const auto i : c10::irange(16)) {
58     ASSERT_EQ(it[i], 777);
59   }
60 }
61 
TEST(CUDACaffe2ToPytorch,Op)62 TEST(CUDACaffe2ToPytorch, Op) {
63   if (!at::cuda::is_available()) return;
64   caffe2::Tensor c2_tensor =
65       caffe2::empty({3, 3}, at::dtype<int64_t>().device(caffe2::CUDA));
66   auto data = c2_tensor.mutable_data<int64_t>();
67   {
68     caffe2::CUDAContext context;
69     caffe2::math::Set<int64_t>(9, 111, data, &context);
70   }
71   at::Tensor at_tensor(c2_tensor);
72   ASSERT_TRUE(at_tensor.is_cuda());
73 
74   ASSERT_EQ(at::sum(at_tensor).item<int64_t>(), 999);
75 }
76 
TEST(CUDAPytorchToCaffe2,Op)77 TEST(CUDAPytorchToCaffe2, Op) {
78   if (!at::cuda::is_available()) return;
79   caffe2::Workspace workspace;
80   caffe2::NetDef net;
81 
82   auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
83   auto at_tensor_b = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
84   auto at_tensor_c = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
85 
86   auto* c2_tensor_a = BlobSetTensor(workspace.CreateBlob("a"), caffe2::Tensor(at_tensor_a));
87   auto* c2_tensor_b = BlobSetTensor(workspace.CreateBlob("b"), caffe2::Tensor(at_tensor_b));
88   (void)c2_tensor_a;
89   (void)c2_tensor_b;
90 
91   // Test Alias
92   {
93     caffe2::Tensor c2_tensor_from_aten(at_tensor_c);
94     BlobSetTensor(workspace.CreateBlob("c"), c2_tensor_from_aten.Alias());
95   }
96 
97   {
98     auto op = net.add_op();
99     op->set_type("Sum");
100     op->add_input("a");
101     op->add_input("b");
102     op->add_input("c");
103     op->add_output("d");
104     op->mutable_device_option()->set_device_type(caffe2::PROTO_CUDA);
105   }
106 
107   workspace.RunNetOnce(net);
108 
109   const auto& result = workspace.GetBlob("d")->Get<caffe2::Tensor>();
110   ASSERT_EQ(result.GetDeviceType(), caffe2::CUDA);
111 
112   auto data = result.data<float>();
113   for (const auto i : c10::irange(25)) {
114     ASSERT_EQ(cuda_get(data + i), 3.0);
115   }
116   at::Tensor at_result(result);
117   ASSERT_TRUE(at_result.is_cuda());
118   ASSERT_EQ(at::sum(at_result).item<float>(), 75);
119 }
120 
TEST(CUDAPytorchToCaffe2,SharedStorageWrite)121 TEST(CUDAPytorchToCaffe2, SharedStorageWrite) {
122   if (!at::cuda::is_available()) return;
123   auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
124   auto at_tensor_b = at_tensor_a.view({25});
125 
126   caffe2::Tensor c2_tensor_a(at_tensor_a);
127   caffe2::Tensor c2_tensor_b(at_tensor_b);
128 
129   // change is visible everywhere
130   cuda_set<float>(c2_tensor_a.mutable_data<float>() + 1, 123);
131   ASSERT_EQ(cuda_get(c2_tensor_b.mutable_data<float>() + 1), 123);
132   ASSERT_EQ(at_tensor_a[0][1].item().to<float>(), 123);
133   ASSERT_EQ(at_tensor_b[1].item().to<float>(), 123);
134 }
135 
TEST(CUDAPytorchToCaffe2,MutualResizes)136 TEST(CUDAPytorchToCaffe2, MutualResizes) {
137   if (!at::cuda::is_available()) return;
138   auto at_tensor = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
139 
140   caffe2::Tensor c2_tensor(at_tensor);
141 
142   // change is visible
143   cuda_set<float>(c2_tensor.mutable_data<float>(), 123);
144   ASSERT_EQ(at_tensor[0][0].item().to<float>(), 123);
145 
146   // resize PT tensor in smaller direction - storage is preserved
147   at_tensor.resize_({4, 4});
148   cuda_set<float>(c2_tensor.mutable_data<float>() + 1, 234);
149   ASSERT_EQ(at_tensor[0][1].item().to<float>(), 234);
150 
151   // resize PT tensor in larger direction - storage is preserved
152   at_tensor.resize_({6, 6});
153   cuda_set<float>(c2_tensor.mutable_data<float>() + 2, 345);
154   ASSERT_EQ(at_tensor[0][2].item().to<float>(), 345);
155   ASSERT_EQ(c2_tensor.sizes()[0], 6);
156   ASSERT_EQ(c2_tensor.sizes()[1], 6);
157 
158   // resize Caffe2 tensor - semantics are to NOT preserve the data, but the
159   // TensorImpl is still shared
160   c2_tensor.Resize(7, 7);
161   cuda_set<float>(c2_tensor.mutable_data<float>() + 3, 456);
162   ASSERT_EQ(at_tensor[0][3].item().to<float>(), 456);
163   ASSERT_EQ(at_tensor.sizes()[0], 7);
164   ASSERT_EQ(at_tensor.sizes()[1], 7);
165 }
166