xref: /aosp_15_r20/external/pytorch/test/cpp/c10d/CUDATest.cu (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include "CUDATest.hpp"
2 #include <ATen/cuda/Exceptions.h>
3 
4 namespace c10d {
5 namespace test {
6 
7 namespace {
waitClocks(const uint64_t count)8 __global__ void waitClocks(const uint64_t count) {
9   // Few AMD specific GPUs have different clock intrinsic
10 #if defined(__GFX11__) && defined(USE_ROCM) && !defined(__CUDA_ARCH__)
11   clock_t start = wall_clock64();
12 #else
13   clock_t start = clock64();
14 #endif
15   clock_t offset = 0;
16   while (offset < count) {
17     offset = clock() - start;
18   }
19 }
20 
21 } // namespace
22 
cudaSleep(at::cuda::CUDAStream & stream,uint64_t clocks)23 void cudaSleep(at::cuda::CUDAStream& stream, uint64_t clocks) {
24   waitClocks<<<1, 1, 0, stream.stream()>>>(clocks);
25   C10_CUDA_KERNEL_LAUNCH_CHECK();
26 }
27 
cudaNumDevices()28 int cudaNumDevices() {
29   int n = 0;
30   C10_CUDA_CHECK_WARN(cudaGetDeviceCount(&n));
31   return n;
32 }
33 
34 } // namespace test
35 } // namespace c10d
36