xref: /aosp_15_r20/external/pytorch/aten/src/ATen/cuda/PeerToPeerAccess.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <ATen/cuda/PeerToPeerAccess.h>
2 
3 #include <ATen/cuda/CUDAContext.h>
4 
5 #include <c10/cuda/CUDACachingAllocator.h>
6 #include <c10/cuda/CUDAGuard.h>
7 #include <c10/util/Exception.h>
8 #include <c10/util/irange.h>
9 
10 #include <vector>
11 
12 namespace at::cuda {
13 
14 static std::vector<int8_t> p2pAccessEnabled_;
15 static int64_t num_devices_ = -1;
16 
17 namespace detail {
18 
init_p2p_access_cache(int64_t num_devices)19 void init_p2p_access_cache(int64_t num_devices) {
20   // p2pAccessEnabled records if p2p copies are allowed between pairs of
21   // devices. Values include "1" (copy allowed), "0" (copy not allowed), and
22   // "-1" (unknown).
23   // Currently the max number of gpus in P2P group is 8, so if there are more
24   // we enable P2P in groups of 8
25   p2pAccessEnabled_.clear();
26   p2pAccessEnabled_.resize(num_devices * num_devices, -1);
27   num_devices_ = num_devices;
28 
29   for (const auto i : c10::irange(num_devices)) {
30     p2pAccessEnabled_[i * num_devices + i] = 1;
31   }
32 }
33 
34 }  // namespace detail
35 
get_p2p_access(int dev,int dev_to_access)36 bool get_p2p_access(int dev, int dev_to_access) {
37   at::globalContext().lazyInitCUDA();
38 
39   TORCH_CHECK(dev >= 0 || dev < num_devices_,
40               dev, " is not a device");
41   TORCH_CHECK(dev_to_access >= 0 || dev_to_access < num_devices_,
42               dev_to_access, " is not a device");
43   TORCH_INTERNAL_ASSERT(num_devices_ >= 0, "p2p access cache not initialized");
44 
45   auto &cache = p2pAccessEnabled_[dev * num_devices_ + dev_to_access];
46 
47   if (cache != -1) {
48     return cache;
49   }
50 
51   int result = 0;
52   C10_CUDA_CHECK(cudaDeviceCanAccessPeer(&result, dev, dev_to_access));
53   cache = result ? 1 : 0;
54   if (cache) {
55     CUDACachingAllocator::enablePeerAccess(dev, dev_to_access);
56   }
57 
58   return cache;
59 }
60 
61 }  // namespace at::cuda::detail
62