1 #include <gtest/gtest.h>
2
3 #include <ATen/ATen.h>
4 #include <ATen/cuda/CUDAContext.h>
5 #include <c10/util/irange.h>
6 #include <caffe2/core/init.h>
7 #include <caffe2/core/operator.h>
8 #include <caffe2/core/context_gpu.h>
9 #include <caffe2/utils/math.h>
10
11 // dumbest possible copies
12 template<typename T>
cuda_get(T * addr)13 T cuda_get(T* addr) {
14 T result;
15 CUDA_ENFORCE(cudaMemcpy(&result, addr, sizeof(T), cudaMemcpyDefault));
16 return result;
17 }
18
19 template<typename T>
cuda_set(T * addr,T value)20 void cuda_set(T* addr, T value) {
21 CUDA_ENFORCE(cudaMemcpy(addr, &value, sizeof(T), cudaMemcpyDefault));
22 }
23
TEST(CUDACaffe2ToPytorch,SimpleLegacy)24 TEST(CUDACaffe2ToPytorch, SimpleLegacy) {
25 if (!at::cuda::is_available()) return;
26 caffe2::Tensor c2_tensor(caffe2::CUDA);
27 c2_tensor.Resize(4, 4);
28 auto data = c2_tensor.mutable_data<int64_t>();
29 {
30 caffe2::CUDAContext context;
31 caffe2::math::Set<int64_t>(16, 777, data, &context);
32 }
33 at::Tensor at_tensor(c2_tensor);
34 ASSERT_TRUE(at_tensor.is_cuda());
35
36 auto at_cpu = at_tensor.cpu();
37 auto it = at_cpu.data_ptr<int64_t>();
38 for (const auto i : c10::irange(16)) {
39 ASSERT_EQ(it[i], 777);
40 }
41 }
42
TEST(CUDACaffe2ToPytorch,Simple)43 TEST(CUDACaffe2ToPytorch, Simple) {
44 if (!at::cuda::is_available()) return;
45 caffe2::Tensor c2_tensor =
46 caffe2::empty({4, 4}, at::dtype<int64_t>().device(caffe2::CUDA));
47 auto data = c2_tensor.mutable_data<int64_t>();
48 {
49 caffe2::CUDAContext context;
50 caffe2::math::Set<int64_t>(16, 777, data, &context);
51 }
52 at::Tensor at_tensor(c2_tensor);
53 ASSERT_TRUE(at_tensor.is_cuda());
54
55 auto at_cpu = at_tensor.cpu();
56 auto it = at_cpu.data_ptr<int64_t>();
57 for (const auto i : c10::irange(16)) {
58 ASSERT_EQ(it[i], 777);
59 }
60 }
61
TEST(CUDACaffe2ToPytorch,Op)62 TEST(CUDACaffe2ToPytorch, Op) {
63 if (!at::cuda::is_available()) return;
64 caffe2::Tensor c2_tensor =
65 caffe2::empty({3, 3}, at::dtype<int64_t>().device(caffe2::CUDA));
66 auto data = c2_tensor.mutable_data<int64_t>();
67 {
68 caffe2::CUDAContext context;
69 caffe2::math::Set<int64_t>(9, 111, data, &context);
70 }
71 at::Tensor at_tensor(c2_tensor);
72 ASSERT_TRUE(at_tensor.is_cuda());
73
74 ASSERT_EQ(at::sum(at_tensor).item<int64_t>(), 999);
75 }
76
TEST(CUDAPytorchToCaffe2,Op)77 TEST(CUDAPytorchToCaffe2, Op) {
78 if (!at::cuda::is_available()) return;
79 caffe2::Workspace workspace;
80 caffe2::NetDef net;
81
82 auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
83 auto at_tensor_b = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
84 auto at_tensor_c = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
85
86 auto* c2_tensor_a = BlobSetTensor(workspace.CreateBlob("a"), caffe2::Tensor(at_tensor_a));
87 auto* c2_tensor_b = BlobSetTensor(workspace.CreateBlob("b"), caffe2::Tensor(at_tensor_b));
88 (void)c2_tensor_a;
89 (void)c2_tensor_b;
90
91 // Test Alias
92 {
93 caffe2::Tensor c2_tensor_from_aten(at_tensor_c);
94 BlobSetTensor(workspace.CreateBlob("c"), c2_tensor_from_aten.Alias());
95 }
96
97 {
98 auto op = net.add_op();
99 op->set_type("Sum");
100 op->add_input("a");
101 op->add_input("b");
102 op->add_input("c");
103 op->add_output("d");
104 op->mutable_device_option()->set_device_type(caffe2::PROTO_CUDA);
105 }
106
107 workspace.RunNetOnce(net);
108
109 const auto& result = workspace.GetBlob("d")->Get<caffe2::Tensor>();
110 ASSERT_EQ(result.GetDeviceType(), caffe2::CUDA);
111
112 auto data = result.data<float>();
113 for (const auto i : c10::irange(25)) {
114 ASSERT_EQ(cuda_get(data + i), 3.0);
115 }
116 at::Tensor at_result(result);
117 ASSERT_TRUE(at_result.is_cuda());
118 ASSERT_EQ(at::sum(at_result).item<float>(), 75);
119 }
120
TEST(CUDAPytorchToCaffe2,SharedStorageWrite)121 TEST(CUDAPytorchToCaffe2, SharedStorageWrite) {
122 if (!at::cuda::is_available()) return;
123 auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
124 auto at_tensor_b = at_tensor_a.view({25});
125
126 caffe2::Tensor c2_tensor_a(at_tensor_a);
127 caffe2::Tensor c2_tensor_b(at_tensor_b);
128
129 // change is visible everywhere
130 cuda_set<float>(c2_tensor_a.mutable_data<float>() + 1, 123);
131 ASSERT_EQ(cuda_get(c2_tensor_b.mutable_data<float>() + 1), 123);
132 ASSERT_EQ(at_tensor_a[0][1].item().to<float>(), 123);
133 ASSERT_EQ(at_tensor_b[1].item().to<float>(), 123);
134 }
135
TEST(CUDAPytorchToCaffe2,MutualResizes)136 TEST(CUDAPytorchToCaffe2, MutualResizes) {
137 if (!at::cuda::is_available()) return;
138 auto at_tensor = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
139
140 caffe2::Tensor c2_tensor(at_tensor);
141
142 // change is visible
143 cuda_set<float>(c2_tensor.mutable_data<float>(), 123);
144 ASSERT_EQ(at_tensor[0][0].item().to<float>(), 123);
145
146 // resize PT tensor in smaller direction - storage is preserved
147 at_tensor.resize_({4, 4});
148 cuda_set<float>(c2_tensor.mutable_data<float>() + 1, 234);
149 ASSERT_EQ(at_tensor[0][1].item().to<float>(), 234);
150
151 // resize PT tensor in larger direction - storage is preserved
152 at_tensor.resize_({6, 6});
153 cuda_set<float>(c2_tensor.mutable_data<float>() + 2, 345);
154 ASSERT_EQ(at_tensor[0][2].item().to<float>(), 345);
155 ASSERT_EQ(c2_tensor.sizes()[0], 6);
156 ASSERT_EQ(c2_tensor.sizes()[1], 6);
157
158 // resize Caffe2 tensor - semantics are to NOT preserve the data, but the
159 // TensorImpl is still shared
160 c2_tensor.Resize(7, 7);
161 cuda_set<float>(c2_tensor.mutable_data<float>() + 3, 456);
162 ASSERT_EQ(at_tensor[0][3].item().to<float>(), 456);
163 ASSERT_EQ(at_tensor.sizes()[0], 7);
164 ASSERT_EQ(at_tensor.sizes()[1], 7);
165 }
166