xref: /aosp_15_r20/external/pytorch/torch/csrc/cuda/Stream.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <pybind11/pybind11.h>
2 #include <torch/csrc/Device.h>
3 #include <torch/csrc/THP.h>
4 #include <torch/csrc/cuda/Module.h>
5 #include <torch/csrc/cuda/Stream.h>
6 #include <torch/csrc/utils/pybind.h>
7 #include <torch/csrc/utils/python_numbers.h>
8 
9 #include <c10/cuda/CUDAGuard.h>
10 
11 #include <cuda_runtime_api.h>
12 #include <structmember.h>
13 
14 PyObject* THCPStreamClass = nullptr;
15 
THCPStream_pynew(PyTypeObject * type,PyObject * args,PyObject * kwargs)16 static PyObject* THCPStream_pynew(
17     PyTypeObject* type,
18     PyObject* args,
19     PyObject* kwargs) {
20   HANDLE_TH_ERRORS
21 
22   const auto current_device = c10::cuda::current_device();
23 
24   int priority = 0;
25   int64_t stream_id = 0;
26   int64_t device_index = 0;
27   int64_t device_type = 0;
28   uint64_t stream_ptr = 0;
29 
30   // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
31   constexpr const char* kwlist[] = {
32       "priority",
33       "stream_id",
34       "device_index",
35       "device_type",
36       "stream_ptr",
37       nullptr};
38   if (!PyArg_ParseTupleAndKeywords(
39           args,
40           kwargs,
41           "|iLLLK",
42           // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
43           const_cast<char**>(kwlist),
44           &priority,
45           &stream_id,
46           &device_index,
47           &device_type,
48           &stream_ptr)) {
49     return nullptr;
50   }
51 
52   THPObjectPtr ptr(type->tp_alloc(type, 0));
53   if (!ptr) {
54     return nullptr;
55   }
56 
57   if (stream_ptr) {
58     TORCH_CHECK(
59         priority == 0, "Priority was explicitly set for a external stream")
60   }
61   at::cuda::CUDAStream stream = (stream_id || device_index || device_type)
62       ? at::cuda::CUDAStream::unpack3(
63             stream_id,
64             static_cast<c10::DeviceIndex>(device_index),
65             static_cast<c10::DeviceType>(device_type))
66       : stream_ptr ? at::cuda::getStreamFromExternal(
67                          // NOLINTNEXTLINE(performance-no-int-to-ptr)
68                          reinterpret_cast<cudaStream_t>(stream_ptr),
69                          current_device)
70                    : at::cuda::getStreamFromPool(priority);
71 
72   THCPStream* self = (THCPStream*)ptr.get();
73   self->stream_id = static_cast<int64_t>(stream.id());
74   self->device_index = static_cast<int64_t>(stream.device_index());
75   self->device_type = static_cast<int64_t>(stream.device_type());
76   new (&self->cuda_stream) at::cuda::CUDAStream(stream);
77 
78   return (PyObject*)ptr.release();
79   END_HANDLE_TH_ERRORS
80 }
81 
THCPStream_dealloc(THCPStream * self)82 static void THCPStream_dealloc(THCPStream* self) {
83   self->cuda_stream.~CUDAStream();
84   Py_TYPE(self)->tp_free((PyObject*)self);
85 }
86 
THCPStream_get_cuda_stream(THCPStream * self,void * unused)87 static PyObject* THCPStream_get_cuda_stream(THCPStream* self, void* unused) {
88   HANDLE_TH_ERRORS
89   return PyLong_FromVoidPtr(self->cuda_stream.stream());
90   END_HANDLE_TH_ERRORS
91 }
92 
THCPStream_get_priority(THCPStream * self,void * unused)93 static PyObject* THCPStream_get_priority(THCPStream* self, void* unused) {
94   HANDLE_TH_ERRORS
95   return THPUtils_packInt64(self->cuda_stream.priority());
96   END_HANDLE_TH_ERRORS
97 }
98 
THCPStream_priority_range(PyObject * _unused,PyObject * noargs)99 static PyObject* THCPStream_priority_range(
100     PyObject* _unused,
101     PyObject* noargs) {
102   HANDLE_TH_ERRORS
103   auto [least_priority, greatest_priority] =
104       at::cuda::CUDAStream::priority_range();
105   return Py_BuildValue("(ii)", least_priority, greatest_priority);
106   END_HANDLE_TH_ERRORS
107 }
108 
THCPStream_query(PyObject * _self,PyObject * noargs)109 static PyObject* THCPStream_query(PyObject* _self, PyObject* noargs) {
110   HANDLE_TH_ERRORS
111   auto self = (THCPStream*)_self;
112   return PyBool_FromLong(self->cuda_stream.query());
113   END_HANDLE_TH_ERRORS
114 }
115 
THCPStream_synchronize(PyObject * _self,PyObject * noargs)116 static PyObject* THCPStream_synchronize(PyObject* _self, PyObject* noargs) {
117   HANDLE_TH_ERRORS {
118     pybind11::gil_scoped_release no_gil;
119     auto self = (THCPStream*)_self;
120     self->cuda_stream.synchronize();
121   }
122   Py_RETURN_NONE;
123   END_HANDLE_TH_ERRORS
124 }
125 
THCPStream_eq(PyObject * _self,PyObject * _other)126 static PyObject* THCPStream_eq(PyObject* _self, PyObject* _other) {
127   HANDLE_TH_ERRORS
128   auto self = (THCPStream*)_self;
129   auto other = (THCPStream*)_other;
130   return PyBool_FromLong(self->cuda_stream == other->cuda_stream);
131   END_HANDLE_TH_ERRORS
132 }
133 
134 // NOLINTNEXTLINE(*-c-arrays*, *-global-variables)
135 static struct PyMemberDef THCPStream_members[] = {{nullptr}};
136 
137 // NOLINTNEXTLINE(*-c-arrays*, *-global-variables)
138 static struct PyGetSetDef THCPStream_properties[] = {
139     {"cuda_stream",
140      (getter)THCPStream_get_cuda_stream,
141      nullptr,
142      nullptr,
143      nullptr},
144     {"priority", (getter)THCPStream_get_priority, nullptr, nullptr, nullptr},
145     {nullptr}};
146 
147 // NOLINTNEXTLINE(*-c-arrays*, *-global-variables)
148 static PyMethodDef THCPStream_methods[] = {
149     {"query", THCPStream_query, METH_NOARGS, nullptr},
150     {"synchronize", THCPStream_synchronize, METH_NOARGS, nullptr},
151     {"priority_range",
152      THCPStream_priority_range,
153      METH_STATIC | METH_NOARGS,
154      nullptr},
155     {"__eq__", THCPStream_eq, METH_O, nullptr},
156     {nullptr}};
157 
158 PyTypeObject THCPStreamType = {
159     PyVarObject_HEAD_INIT(nullptr, 0) "torch._C._CudaStreamBase", /* tp_name */
160     sizeof(THCPStream), /* tp_basicsize */
161     0, /* tp_itemsize */
162     (destructor)THCPStream_dealloc, /* tp_dealloc */
163     0, /* tp_vectorcall_offset */
164     nullptr, /* tp_getattr */
165     nullptr, /* tp_setattr */
166     nullptr, /* tp_reserved */
167     nullptr, /* tp_repr */
168     nullptr, /* tp_as_number */
169     nullptr, /* tp_as_sequence */
170     nullptr, /* tp_as_mapping */
171     nullptr, /* tp_hash  */
172     nullptr, /* tp_call */
173     nullptr, /* tp_str */
174     nullptr, /* tp_getattro */
175     nullptr, /* tp_setattro */
176     nullptr, /* tp_as_buffer */
177     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
178     nullptr, /* tp_doc */
179     nullptr, /* tp_traverse */
180     nullptr, /* tp_clear */
181     nullptr, /* tp_richcompare */
182     0, /* tp_weaklistoffset */
183     nullptr, /* tp_iter */
184     nullptr, /* tp_iternext */
185     THCPStream_methods, /* tp_methods */
186     THCPStream_members, /* tp_members */
187     THCPStream_properties, /* tp_getset */
188     nullptr, /* tp_base */
189     nullptr, /* tp_dict */
190     nullptr, /* tp_descr_get */
191     nullptr, /* tp_descr_set */
192     0, /* tp_dictoffset */
193     nullptr, /* tp_init */
194     nullptr, /* tp_alloc */
195     THCPStream_pynew, /* tp_new */
196 };
197 
THCPStream_init(PyObject * module)198 void THCPStream_init(PyObject* module) {
199   Py_INCREF(THPStreamClass);
200   THCPStreamType.tp_base = THPStreamClass;
201   THCPStreamClass = (PyObject*)&THCPStreamType;
202   if (PyType_Ready(&THCPStreamType) < 0) {
203     throw python_error();
204   }
205   Py_INCREF(&THCPStreamType);
206   if (PyModule_AddObject(
207           module, "_CudaStreamBase", (PyObject*)&THCPStreamType) < 0) {
208     throw python_error();
209   }
210 }
211