xref: /aosp_15_r20/external/pytorch/c10/cuda/CUDAGraphsC10Utils.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <c10/cuda/CUDAStream.h>
4 #include <iostream>
5 #include <utility>
6 
7 // CUDA Graphs utils used by c10 and aten.
8 // aten/cuda/CUDAGraphsUtils.cuh adds utils used by aten only.
9 
10 namespace c10::cuda {
11 
12 using CaptureId_t = unsigned long long;
13 
14 // first is set if the instance is created by CUDAGraph::capture_begin.
15 // second is set if the instance is created by at::cuda::graph_pool_handle.
16 using MempoolId_t = std::pair<CaptureId_t, CaptureId_t>;
17 
18 // RAII guard for "cudaStreamCaptureMode", a thread-local value
19 // that controls the error-checking strictness of a capture.
20 struct C10_CUDA_API CUDAStreamCaptureModeGuard {
CUDAStreamCaptureModeGuardCUDAStreamCaptureModeGuard21   CUDAStreamCaptureModeGuard(cudaStreamCaptureMode desired)
22       : strictness_(desired) {
23     C10_CUDA_CHECK(cudaThreadExchangeStreamCaptureMode(&strictness_));
24   }
~CUDAStreamCaptureModeGuardCUDAStreamCaptureModeGuard25   ~CUDAStreamCaptureModeGuard() {
26     C10_CUDA_CHECK_WARN(cudaThreadExchangeStreamCaptureMode(&strictness_));
27   }
28 
29  private:
30   cudaStreamCaptureMode strictness_;
31 };
32 
33 // Protects against enum cudaStreamCaptureStatus implementation changes.
34 // Some compilers seem not to like static_assert without the messages.
35 static_assert(
36     int(cudaStreamCaptureStatus::cudaStreamCaptureStatusNone) == 0,
37     "unexpected int(cudaStreamCaptureStatusNone) value");
38 static_assert(
39     int(cudaStreamCaptureStatus::cudaStreamCaptureStatusActive) == 1,
40     "unexpected int(cudaStreamCaptureStatusActive) value");
41 static_assert(
42     int(cudaStreamCaptureStatus::cudaStreamCaptureStatusInvalidated) == 2,
43     "unexpected int(cudaStreamCaptureStatusInvalidated) value");
44 
45 enum class CaptureStatus : int {
46   None = int(cudaStreamCaptureStatus::cudaStreamCaptureStatusNone),
47   Active = int(cudaStreamCaptureStatus::cudaStreamCaptureStatusActive),
48   Invalidated = int(cudaStreamCaptureStatus::cudaStreamCaptureStatusInvalidated)
49 };
50 
51 inline std::ostream& operator<<(std::ostream& os, CaptureStatus status) {
52   switch (status) {
53     case CaptureStatus::None:
54       os << "cudaStreamCaptureStatusNone";
55       break;
56     case CaptureStatus::Active:
57       os << "cudaStreamCaptureStatusActive";
58       break;
59     case CaptureStatus::Invalidated:
60       os << "cudaStreamCaptureStatusInvalidated";
61       break;
62     default:
63       TORCH_INTERNAL_ASSERT(
64           false, "Unknown CUDA graph CaptureStatus", int(status));
65   }
66   return os;
67 }
68 
69 // Use this version where you're sure a CUDA context exists already.
currentStreamCaptureStatusMayInitCtx()70 inline CaptureStatus currentStreamCaptureStatusMayInitCtx() {
71   cudaStreamCaptureStatus is_capturing{cudaStreamCaptureStatusNone};
72   C10_CUDA_CHECK(
73       cudaStreamIsCapturing(c10::cuda::getCurrentCUDAStream(), &is_capturing));
74   return CaptureStatus(is_capturing);
75 }
76 
77 } // namespace c10::cuda
78