1 #include <pybind11/pybind11.h>
2 #include <torch/csrc/Device.h>
3 #include <torch/csrc/THP.h>
4 #include <torch/csrc/cuda/Module.h>
5 #include <torch/csrc/cuda/Stream.h>
6 #include <torch/csrc/utils/pybind.h>
7 #include <torch/csrc/utils/python_numbers.h>
8
9 #include <c10/cuda/CUDAGuard.h>
10
11 #include <cuda_runtime_api.h>
12 #include <structmember.h>
13
14 PyObject* THCPStreamClass = nullptr;
15
THCPStream_pynew(PyTypeObject * type,PyObject * args,PyObject * kwargs)16 static PyObject* THCPStream_pynew(
17 PyTypeObject* type,
18 PyObject* args,
19 PyObject* kwargs) {
20 HANDLE_TH_ERRORS
21
22 const auto current_device = c10::cuda::current_device();
23
24 int priority = 0;
25 int64_t stream_id = 0;
26 int64_t device_index = 0;
27 int64_t device_type = 0;
28 uint64_t stream_ptr = 0;
29
30 // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
31 constexpr const char* kwlist[] = {
32 "priority",
33 "stream_id",
34 "device_index",
35 "device_type",
36 "stream_ptr",
37 nullptr};
38 if (!PyArg_ParseTupleAndKeywords(
39 args,
40 kwargs,
41 "|iLLLK",
42 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
43 const_cast<char**>(kwlist),
44 &priority,
45 &stream_id,
46 &device_index,
47 &device_type,
48 &stream_ptr)) {
49 return nullptr;
50 }
51
52 THPObjectPtr ptr(type->tp_alloc(type, 0));
53 if (!ptr) {
54 return nullptr;
55 }
56
57 if (stream_ptr) {
58 TORCH_CHECK(
59 priority == 0, "Priority was explicitly set for a external stream")
60 }
61 at::cuda::CUDAStream stream = (stream_id || device_index || device_type)
62 ? at::cuda::CUDAStream::unpack3(
63 stream_id,
64 static_cast<c10::DeviceIndex>(device_index),
65 static_cast<c10::DeviceType>(device_type))
66 : stream_ptr ? at::cuda::getStreamFromExternal(
67 // NOLINTNEXTLINE(performance-no-int-to-ptr)
68 reinterpret_cast<cudaStream_t>(stream_ptr),
69 current_device)
70 : at::cuda::getStreamFromPool(priority);
71
72 THCPStream* self = (THCPStream*)ptr.get();
73 self->stream_id = static_cast<int64_t>(stream.id());
74 self->device_index = static_cast<int64_t>(stream.device_index());
75 self->device_type = static_cast<int64_t>(stream.device_type());
76 new (&self->cuda_stream) at::cuda::CUDAStream(stream);
77
78 return (PyObject*)ptr.release();
79 END_HANDLE_TH_ERRORS
80 }
81
THCPStream_dealloc(THCPStream * self)82 static void THCPStream_dealloc(THCPStream* self) {
83 self->cuda_stream.~CUDAStream();
84 Py_TYPE(self)->tp_free((PyObject*)self);
85 }
86
THCPStream_get_cuda_stream(THCPStream * self,void * unused)87 static PyObject* THCPStream_get_cuda_stream(THCPStream* self, void* unused) {
88 HANDLE_TH_ERRORS
89 return PyLong_FromVoidPtr(self->cuda_stream.stream());
90 END_HANDLE_TH_ERRORS
91 }
92
THCPStream_get_priority(THCPStream * self,void * unused)93 static PyObject* THCPStream_get_priority(THCPStream* self, void* unused) {
94 HANDLE_TH_ERRORS
95 return THPUtils_packInt64(self->cuda_stream.priority());
96 END_HANDLE_TH_ERRORS
97 }
98
THCPStream_priority_range(PyObject * _unused,PyObject * noargs)99 static PyObject* THCPStream_priority_range(
100 PyObject* _unused,
101 PyObject* noargs) {
102 HANDLE_TH_ERRORS
103 auto [least_priority, greatest_priority] =
104 at::cuda::CUDAStream::priority_range();
105 return Py_BuildValue("(ii)", least_priority, greatest_priority);
106 END_HANDLE_TH_ERRORS
107 }
108
THCPStream_query(PyObject * _self,PyObject * noargs)109 static PyObject* THCPStream_query(PyObject* _self, PyObject* noargs) {
110 HANDLE_TH_ERRORS
111 auto self = (THCPStream*)_self;
112 return PyBool_FromLong(self->cuda_stream.query());
113 END_HANDLE_TH_ERRORS
114 }
115
THCPStream_synchronize(PyObject * _self,PyObject * noargs)116 static PyObject* THCPStream_synchronize(PyObject* _self, PyObject* noargs) {
117 HANDLE_TH_ERRORS {
118 pybind11::gil_scoped_release no_gil;
119 auto self = (THCPStream*)_self;
120 self->cuda_stream.synchronize();
121 }
122 Py_RETURN_NONE;
123 END_HANDLE_TH_ERRORS
124 }
125
THCPStream_eq(PyObject * _self,PyObject * _other)126 static PyObject* THCPStream_eq(PyObject* _self, PyObject* _other) {
127 HANDLE_TH_ERRORS
128 auto self = (THCPStream*)_self;
129 auto other = (THCPStream*)_other;
130 return PyBool_FromLong(self->cuda_stream == other->cuda_stream);
131 END_HANDLE_TH_ERRORS
132 }
133
134 // NOLINTNEXTLINE(*-c-arrays*, *-global-variables)
135 static struct PyMemberDef THCPStream_members[] = {{nullptr}};
136
137 // NOLINTNEXTLINE(*-c-arrays*, *-global-variables)
138 static struct PyGetSetDef THCPStream_properties[] = {
139 {"cuda_stream",
140 (getter)THCPStream_get_cuda_stream,
141 nullptr,
142 nullptr,
143 nullptr},
144 {"priority", (getter)THCPStream_get_priority, nullptr, nullptr, nullptr},
145 {nullptr}};
146
147 // NOLINTNEXTLINE(*-c-arrays*, *-global-variables)
148 static PyMethodDef THCPStream_methods[] = {
149 {"query", THCPStream_query, METH_NOARGS, nullptr},
150 {"synchronize", THCPStream_synchronize, METH_NOARGS, nullptr},
151 {"priority_range",
152 THCPStream_priority_range,
153 METH_STATIC | METH_NOARGS,
154 nullptr},
155 {"__eq__", THCPStream_eq, METH_O, nullptr},
156 {nullptr}};
157
158 PyTypeObject THCPStreamType = {
159 PyVarObject_HEAD_INIT(nullptr, 0) "torch._C._CudaStreamBase", /* tp_name */
160 sizeof(THCPStream), /* tp_basicsize */
161 0, /* tp_itemsize */
162 (destructor)THCPStream_dealloc, /* tp_dealloc */
163 0, /* tp_vectorcall_offset */
164 nullptr, /* tp_getattr */
165 nullptr, /* tp_setattr */
166 nullptr, /* tp_reserved */
167 nullptr, /* tp_repr */
168 nullptr, /* tp_as_number */
169 nullptr, /* tp_as_sequence */
170 nullptr, /* tp_as_mapping */
171 nullptr, /* tp_hash */
172 nullptr, /* tp_call */
173 nullptr, /* tp_str */
174 nullptr, /* tp_getattro */
175 nullptr, /* tp_setattro */
176 nullptr, /* tp_as_buffer */
177 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
178 nullptr, /* tp_doc */
179 nullptr, /* tp_traverse */
180 nullptr, /* tp_clear */
181 nullptr, /* tp_richcompare */
182 0, /* tp_weaklistoffset */
183 nullptr, /* tp_iter */
184 nullptr, /* tp_iternext */
185 THCPStream_methods, /* tp_methods */
186 THCPStream_members, /* tp_members */
187 THCPStream_properties, /* tp_getset */
188 nullptr, /* tp_base */
189 nullptr, /* tp_dict */
190 nullptr, /* tp_descr_get */
191 nullptr, /* tp_descr_set */
192 0, /* tp_dictoffset */
193 nullptr, /* tp_init */
194 nullptr, /* tp_alloc */
195 THCPStream_pynew, /* tp_new */
196 };
197
THCPStream_init(PyObject * module)198 void THCPStream_init(PyObject* module) {
199 Py_INCREF(THPStreamClass);
200 THCPStreamType.tp_base = THPStreamClass;
201 THCPStreamClass = (PyObject*)&THCPStreamType;
202 if (PyType_Ready(&THCPStreamType) < 0) {
203 throw python_error();
204 }
205 Py_INCREF(&THCPStreamType);
206 if (PyModule_AddObject(
207 module, "_CudaStreamBase", (PyObject*)&THCPStreamType) < 0) {
208 throw python_error();
209 }
210 }
211