xref: /aosp_15_r20/external/pytorch/torch/csrc/cuda/Event.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <pybind11/pybind11.h>
2 #include <torch/csrc/Device.h>
3 #include <torch/csrc/THP.h>
4 #include <torch/csrc/cuda/Event.h>
5 #include <torch/csrc/cuda/Module.h>
6 #include <torch/csrc/cuda/Stream.h>
7 #include <torch/csrc/utils/pybind.h>
8 #include <torch/csrc/utils/pycfunction_helpers.h>
9 #include <torch/csrc/utils/python_arg_parser.h>
10 
11 #include <c10/cuda/CUDAGuard.h>
12 
13 #include <cuda_runtime_api.h>
14 #include <structmember.h>
15 
16 PyObject* THCPEventClass = nullptr;
17 
THCPEvent_pynew(PyTypeObject * type,PyObject * args,PyObject * kwargs)18 static PyObject* THCPEvent_pynew(
19     PyTypeObject* type,
20     PyObject* args,
21     PyObject* kwargs) {
22   HANDLE_TH_ERRORS
23   unsigned char enable_timing = 0;
24   unsigned char blocking = 0;
25   unsigned char interprocess = 0;
26 
27   // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
28   constexpr const char* kwlist[] = {
29       "enable_timing", "blocking", "interprocess", nullptr};
30   if (!PyArg_ParseTupleAndKeywords(
31           args,
32           kwargs,
33           "|bbb",
34           // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
35           const_cast<char**>(kwlist),
36           &enable_timing,
37           &blocking,
38           &interprocess)) {
39     return nullptr;
40   }
41 
42   THPObjectPtr ptr(type->tp_alloc(type, 0));
43   if (!ptr) {
44     return nullptr;
45   }
46 
47   THCPEvent* self = (THCPEvent*)ptr.get();
48   unsigned int flags = (blocking ? cudaEventBlockingSync : cudaEventDefault) |
49       (enable_timing ? cudaEventDefault : cudaEventDisableTiming) |
50       (interprocess ? cudaEventInterprocess : cudaEventDefault);
51 
52   new (&self->cuda_event) at::cuda::CUDAEvent(flags);
53 
54   return (PyObject*)ptr.release();
55   END_HANDLE_TH_ERRORS
56 }
57 
THCPEvent_from_ipc_handle(PyObject * _type,PyObject * args,PyObject * kwargs)58 static PyObject* THCPEvent_from_ipc_handle(
59     PyObject* _type,
60     PyObject* args,
61     PyObject* kwargs) {
62   HANDLE_TH_ERRORS
63   auto type = (PyTypeObject*)_type;
64 
65   static torch::PythonArgParser parser({
66       "from_ipc_handle(Device device, std::string ipc_handle)",
67   });
68   torch::ParsedArgs<2> parsed_args;
69   auto r = parser.parse(args, kwargs, parsed_args);
70 
71   at::Device device = r.device(0);
72   std::string handle_string = r.string(1);
73 
74   TORCH_CHECK(
75       handle_string.size() == sizeof(cudaIpcEventHandle_t),
76       "cudaIpcEventHandle_t expects byte-like object of size ",
77       sizeof(cudaIpcEventHandle_t),
78       ", but got ",
79       handle_string.size());
80   TORCH_CHECK(
81       device.type() == at::kCUDA,
82       "Event can only be created on "
83       "CUDA devices, but got device type ",
84       device.type())
85 
86   THPObjectPtr ptr(type->tp_alloc(type, 0));
87   if (!ptr) {
88     return nullptr;
89   }
90   THCPEvent* self = (THCPEvent*)ptr.get();
91 
92   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
93   cudaIpcEventHandle_t handle;
94   std::memcpy(&handle, handle_string.c_str(), handle_string.size());
95   new (&self->cuda_event) at::cuda::CUDAEvent(device.index(), &handle);
96 
97   return (PyObject*)ptr.release();
98   END_HANDLE_TH_ERRORS
99 }
100 
THCPEvent_dealloc(THCPEvent * self)101 static void THCPEvent_dealloc(THCPEvent* self) {
102   {
103     pybind11::gil_scoped_release no_gil{};
104     self->cuda_event.~CUDAEvent();
105   }
106   Py_TYPE(self)->tp_free((PyObject*)self);
107 }
108 
THCPEvent_get_cuda_event(THCPEvent * self,void * unused)109 static PyObject* THCPEvent_get_cuda_event(THCPEvent* self, void* unused) {
110   HANDLE_TH_ERRORS
111   return PyLong_FromVoidPtr(self->cuda_event.event());
112   END_HANDLE_TH_ERRORS
113 }
114 
THCPEvent_get_device(THCPEvent * self,void * unused)115 static PyObject* THCPEvent_get_device(THCPEvent* self, void* unused) {
116   HANDLE_TH_ERRORS
117   std::optional<at::Device> device = self->cuda_event.device();
118   if (!device) {
119     Py_RETURN_NONE;
120   }
121   return THPDevice_New(device.value());
122   END_HANDLE_TH_ERRORS
123 }
124 
THCPEvent_record(PyObject * _self,PyObject * _stream)125 static PyObject* THCPEvent_record(PyObject* _self, PyObject* _stream) {
126   HANDLE_TH_ERRORS {
127     auto self = (THCPEvent*)_self;
128     auto stream = (THCPStream*)_stream;
129     pybind11::gil_scoped_release no_gil{};
130     self->cuda_event.record(stream->cuda_stream);
131   }
132   Py_RETURN_NONE;
133   END_HANDLE_TH_ERRORS
134 }
135 
THCPEvent_wait(PyObject * _self,PyObject * _stream)136 static PyObject* THCPEvent_wait(PyObject* _self, PyObject* _stream) {
137   HANDLE_TH_ERRORS {
138     auto self = (THCPEvent*)_self;
139     auto stream = (THCPStream*)_stream;
140     pybind11::gil_scoped_release no_gil{};
141     self->cuda_event.block(stream->cuda_stream);
142   }
143   Py_RETURN_NONE;
144   END_HANDLE_TH_ERRORS
145 }
146 
THCPEvent_query(PyObject * _self,PyObject * noargs)147 static PyObject* THCPEvent_query(PyObject* _self, PyObject* noargs) {
148   HANDLE_TH_ERRORS
149   auto self = (THCPEvent*)_self;
150   return PyBool_FromLong(self->cuda_event.query());
151   END_HANDLE_TH_ERRORS
152 }
153 
THCPEvent_elapsed_time(PyObject * _self,PyObject * _other)154 static PyObject* THCPEvent_elapsed_time(PyObject* _self, PyObject* _other) {
155   HANDLE_TH_ERRORS
156   auto self = (THCPEvent*)_self;
157   auto other = (THCPEvent*)_other;
158   return PyFloat_FromDouble(self->cuda_event.elapsed_time(other->cuda_event));
159   END_HANDLE_TH_ERRORS
160 }
161 
THCPEvent_synchronize(PyObject * _self,PyObject * noargs)162 static PyObject* THCPEvent_synchronize(PyObject* _self, PyObject* noargs) {
163   HANDLE_TH_ERRORS {
164     auto self = (THCPEvent*)_self;
165     pybind11::gil_scoped_release no_gil{};
166     self->cuda_event.synchronize();
167   }
168   Py_RETURN_NONE;
169   END_HANDLE_TH_ERRORS
170 }
171 
THCPEvent_ipc_handle(PyObject * _self,PyObject * noargs)172 static PyObject* THCPEvent_ipc_handle(PyObject* _self, PyObject* noargs) {
173   HANDLE_TH_ERRORS
174   auto self = (THCPEvent*)_self;
175   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
176   cudaIpcEventHandle_t handle;
177   self->cuda_event.ipc_handle(&handle);
178   return PyBytes_FromStringAndSize((const char*)&handle, sizeof(handle));
179   END_HANDLE_TH_ERRORS
180 }
181 
182 // NOLINTNEXTLINE(*c-arrays*, *global-variables)
183 static struct PyGetSetDef THCPEvent_properties[] = {
184     {"device", (getter)THCPEvent_get_device, nullptr, nullptr, nullptr},
185     {"cuda_event", (getter)THCPEvent_get_cuda_event, nullptr, nullptr, nullptr},
186     {nullptr}};
187 
188 // NOLINTNEXTLINE(*c-arrays*, *global-variables)
189 static PyMethodDef THCPEvent_methods[] = {
190     {(char*)"from_ipc_handle",
191      castPyCFunctionWithKeywords(THCPEvent_from_ipc_handle),
192      METH_CLASS | METH_VARARGS | METH_KEYWORDS,
193      nullptr},
194     {(char*)"record", THCPEvent_record, METH_O, nullptr},
195     {(char*)"wait", THCPEvent_wait, METH_O, nullptr},
196     {(char*)"query", THCPEvent_query, METH_NOARGS, nullptr},
197     {(char*)"elapsed_time", THCPEvent_elapsed_time, METH_O, nullptr},
198     {(char*)"synchronize", THCPEvent_synchronize, METH_NOARGS, nullptr},
199     {(char*)"ipc_handle", THCPEvent_ipc_handle, METH_NOARGS, nullptr},
200     {nullptr}};
201 
202 PyTypeObject THCPEventType = {
203     PyVarObject_HEAD_INIT(nullptr, 0) "torch._C._CudaEventBase", /* tp_name */
204     sizeof(THCPEvent), /* tp_basicsize */
205     0, /* tp_itemsize */
206     (destructor)THCPEvent_dealloc, /* tp_dealloc */
207     0, /* tp_vectorcall_offset */
208     nullptr, /* tp_getattr */
209     nullptr, /* tp_setattr */
210     nullptr, /* tp_reserved */
211     nullptr, /* tp_repr */
212     nullptr, /* tp_as_number */
213     nullptr, /* tp_as_sequence */
214     nullptr, /* tp_as_mapping */
215     nullptr, /* tp_hash  */
216     nullptr, /* tp_call */
217     nullptr, /* tp_str */
218     nullptr, /* tp_getattro */
219     nullptr, /* tp_setattro */
220     nullptr, /* tp_as_buffer */
221     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
222     nullptr, /* tp_doc */
223     nullptr, /* tp_traverse */
224     nullptr, /* tp_clear */
225     nullptr, /* tp_richcompare */
226     0, /* tp_weaklistoffset */
227     nullptr, /* tp_iter */
228     nullptr, /* tp_iternext */
229     THCPEvent_methods, /* tp_methods */
230     nullptr, /* tp_members */
231     THCPEvent_properties, /* tp_getset */
232     nullptr, /* tp_base */
233     nullptr, /* tp_dict */
234     nullptr, /* tp_descr_get */
235     nullptr, /* tp_descr_set */
236     0, /* tp_dictoffset */
237     nullptr, /* tp_init */
238     nullptr, /* tp_alloc */
239     THCPEvent_pynew, /* tp_new */
240 };
241 
THCPEvent_init(PyObject * module)242 void THCPEvent_init(PyObject* module) {
243   THCPEventClass = (PyObject*)&THCPEventType;
244   if (PyType_Ready(&THCPEventType) < 0) {
245     throw python_error();
246   }
247   Py_INCREF(&THCPEventType);
248   if (PyModule_AddObject(module, "_CudaEventBase", (PyObject*)&THCPEventType) <
249       0) {
250     throw python_error();
251   }
252 }
253