/* * Copyright (c) 2017, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "cm_queue.h" #include "cm_debug.h" #include "cm_device.h" #include "cm_include.h" #include "cm_mem.h" #include "cm_timer.h" struct CM_CREATEQUEUE_PARAM { CM_QUEUE_CREATE_OPTION createOption; // [in/out] void *cmQueueHandle; // [out] int32_t returnValue; // [out] }; struct CM_ENQUEUE_PARAM { void *cmQueueHandle; // [in] void *cmTaskHandle; // [in] void *cmThreadSpaceHandle; // [in] void *cmEventHandle; // [out] uint32_t eventIndex; // [out] index of Event in m_EventArray int32_t returnValue; // [out] }; struct CM_ENQUEUEGROUP_PARAM { void *cmQueueHandle; // [in] void *cmTaskHandle; // [in] void *cmTGrpSpaceHandle; // [in] void *cmEventHandle; // [out] uint32_t eventIndex; // [out] index of Event in m_EventArray int32_t returnValue; // [out] }; struct CM_ENQUEUEHINTS_PARAM { void *cmQueueHandle; // [in] void *cmTaskHandle; // [in] void *cmEventHandle; // [in] uint32_t hints; // [in] uint32_t eventIndex; // [out] index of Event in m_EventArray int32_t returnValue; // [out] }; struct CM_DESTROYEVENT_PARAM { void *cmQueueHandle; // [in] void *cmEventHandle; // [in] int32_t returnValue; // [out] }; struct CM_ENQUEUE_GPUCOPY_V2V_PARAM { void *cmQueueHandle; // [in] void *cmSrcSurface2d; // [in] void *cmDstSurface2d; // [in] uint32_t option; // [in] void *cmEventHandle; // [out] uint32_t eventIndex; // [out] index of Event in m_EventArray int32_t returnValue; // [out] }; struct CM_ENQUEUE_GPUCOPY_L2L_PARAM { void *cmQueueHandle; // [in] void *srcSysMem; // [in] void *dstSysMem; // [in] uint32_t copySize; // [in] uint32_t option; // [in] void *cmEventHandle; // [out] uint32_t eventIndex; // [out] index of Event in m_EventArray int32_t returnValue; // [out] }; struct CM_ENQUEUE_COPY_BUFFER_PARAM { void* cmQueueHandle; // [in] void* buffer; // [in] void* sysMem; // [in] uint32_t offset; // [in] uint64_t copySize; // [in] uint32_t copyDir; // [in] void* wait_event; // [in] void* cmEventHandle; // [out] uint32_t option; // [in] uint32_t eventIndex; // [out] index of Event in m_EventArray int32_t returnValue; // [out] }; struct CM_ENQUEUE_2DInit_PARAM { void *cmQueueHandle; // [in] void *cmSurface2d; // [in] uint32_t initValue; // [in] void *cmEventHandle; // [out] uint32_t eventIndex; // [out] index of Event in m_EventArray int32_t returnValue; // [out] }; struct CM_ENQUEUE_VEBOX_PARAM { void *cmQueueHandle; // [IN] void *cmVeboxHandle; // [IN] CmVeboxG75's handle void *cmEventHandle; // [out] event's handle uint32_t eventIndex; // [out] event's index in m_EventArray CMRT@UMD int32_t returnValue; // [out] return value }; int32_t CmQueue_RT::Create(CmDevice_RT *device, CmQueue_RT *&queue, CM_QUEUE_CREATE_OPTION queueCreateOption) { int32_t result = CM_SUCCESS; queue = new(std::nothrow) CmQueue_RT(device, queueCreateOption); if (queue) { result = queue->Initialize(queueCreateOption); if (result != CM_SUCCESS) { CmQueue_RT::Destroy(queue); } } else { CmAssert(0); result = CM_OUT_OF_HOST_MEMORY; } return result; } int32_t CmQueue_RT::Destroy(CmQueue_RT *&queue) { CmSafeRelease(queue); return CM_SUCCESS; } CmQueue_RT::CmQueue_RT(CmDevice_RT *device, CM_QUEUE_CREATE_OPTION queueCreateOption): m_cmDev(device), m_cmQueueHandle(nullptr), m_queueOption(queueCreateOption) {} CmQueue_RT::~CmQueue_RT() {} int32_t CmQueue_RT::Initialize() { CM_CREATEQUEUE_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMDEVICE_CREATEQUEUE, &inParam, sizeof(inParam)); CHK_FAILURE_RETURN(hr); CHK_FAILURE_RETURN(inParam.returnValue); m_cmQueueHandle = inParam.cmQueueHandle; m_queueOption = inParam.createOption; return CM_SUCCESS; } int32_t CmQueue_RT::Initialize(CM_QUEUE_CREATE_OPTION queueCreateOption) { CM_CREATEQUEUE_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.createOption = queueCreateOption; int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMDEVICE_CREATEQUEUEEX, &inParam, sizeof(inParam)); CHK_FAILURE_RETURN(hr); CHK_FAILURE_RETURN(inParam.returnValue); m_cmQueueHandle = inParam.cmQueueHandle; return CM_SUCCESS; } //! //! Enqueue an task. Each task have one or more kernels running concurrently. //! Each kernel can run in multiple threads concurrently. //! Tasks get executed according to the order they get enqueued. The next task //! doesn't start execute until the current task finishs. //! When the last argument, pThreadSpace, is not nullptr, there are dependency among all threads within a task //! Enqueue will make sure each x/y pair in the CmThreadSpace object is associated with //! a unique thread in the task to enqueue.Otherwise enqueue will fail. //! This is a non-blocking call. i.e. it returs immediately without waiting for //! GPU to finish the execution of the task. //! A CmEvent is generated each time a task is enqueued. The CmEvent can //! be used to check if the task finishs. //! INPUT: //! 1) Array of CmKernel_RT pointers. These kernels are to run concurrently. The //! first nullptr pointer in the array indicates the end of kernels //! 2) Reference to the pointer to CMEvent //! 3) A boolean value to indicate if or not to flush the queue after enqueue the task //! by default the boolean value is TRUE. //! OUTPUT: //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated; //! CM_OUT_OF_HOST_MEMORY if out of host memery; //! CM_FAILURE otherwise. //! More error code is coming. //! CM_RT_API int32_t CmQueue_RT::Enqueue(CmTask *task, CmEvent *&event, const CmThreadSpace *threadSpace) { INSERT_PROFILER_RECORD(); if (task == nullptr) { CmAssert(0); CmDebugMessage(("Kernel array is NULL.")); return CM_INVALID_ARG_VALUE; } m_criticalSection.Acquire(); CM_ENQUEUE_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmTaskHandle = task; inParam.cmQueueHandle = m_cmQueueHandle; inParam.cmThreadSpaceHandle = (void *)threadSpace; inParam.cmEventHandle = event; // to support invisiable event, this field is used for input/output. int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUE, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return CM_SUCCESS; } CM_RT_API int32_t CmQueue_RT::EnqueueWithHints(CmTask *task, CmEvent *&event, uint32_t hints) { INSERT_PROFILER_RECORD(); if (task == nullptr) { CmAssert(0); CmDebugMessage(("Kernel array is NULL.")); return CM_INVALID_ARG_VALUE; } m_criticalSection.Acquire(); CM_ENQUEUEHINTS_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmTaskHandle = task; inParam.cmQueueHandle = m_cmQueueHandle; inParam.hints = hints; inParam.cmEventHandle = event; // to support invisable event, this field is used for input/output int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEWITHHINTS, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return CM_SUCCESS; } //! //! Enqueue an task, which contains one pre-defined kernel to //! copy from host memory to surface //! This is a non-blocking call. i.e. it returs immediately without waiting for //! GPU to finish the execution of the task. //! A CmEvent is generated each time a task is enqueued. The CmEvent can //! be used to check if the task finishs. //! INPUT: //! 1) Pointer to the CmSurface2D_RT as copy destination //! 2) Pointer to the host memory as copy source //! 3) Reference to the pointer to CMEvent //! 4) A boolean value to indicate if or not to flush the queue after enqueue the task //! by default the boolean value is TRUE. //! OUTPUT: //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated; //! CM_OUT_OF_HOST_MEMORY if out of host memery; //! CM_FAILURE otherwise. //! More error code is coming. //! int32_t CmQueue_RT::EnqueueCopyCPUToGPU(CmSurface2D *surface, const unsigned char *sysMem, CmEvent *&event) { INSERT_PROFILER_RECORD(); return EnqueueCopy(surface, sysMem, 0, 0, CM_FASTCOPY_CPU2GPU, CM_FASTCOPY_OPTION_NONBLOCKING, event); } //! //! Enqueue an task, which contains one pre-defined kernel to //! copy from surface to host memory //! This is a non-blocking call. i.e. it returs immediately without waiting for //! GPU to finish the execution of the task. //! A CmEvent is generated each time a task is enqueued. The CmEvent can //! be used to check if the task finishs. //! INPUT: //! 1) Pointer to the CmSurface2D_RT as copy source //! 2) Pointer to the host memory as copy destination //! 3) Reference to the pointer to CMEvent //! 4) A boolean value to indicate if or not to flush the queue after enqueue the task //! by default the boolean value is TRUE. //! OUTPUT: //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated; //! CM_OUT_OF_HOST_MEMORY if out of host memery; //! CM_FAILURE otherwise. //! More error code is coming. //! CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToCPU(CmSurface2D *surface, unsigned char *sysMem, CmEvent *&event) { INSERT_PROFILER_RECORD(); return EnqueueCopy(surface, sysMem, 0, 0, CM_FASTCOPY_GPU2CPU, CM_FASTCOPY_OPTION_NONBLOCKING, event); } //! //! Enqueue an task, which contains one pre-defined kernel to //! copy from linear system memory to tiled video memory //! This API supports both blocking/non-blocking copy, if user pass CM_GPUCOPY_OPTION_BLOCKING as option, //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU. //! A CmEvent is generated each time a task is enqueued. The CmEvent can //! be used to check if the task finishs. //! INPUT: //! 1) Pointer to the CmSurface2D as copy destination //! 2) Pointer to the host memory as copy resource //! 3) width stride in bytes for system memory //! 4) height stride in rows for system memory //! 5) option: CM_FASTCOPY_OPTION_NONBLOCKING,CM_FASTCOPY_OPTION_BLOCKING or CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST //! 6) Reference to the pointer to CMEvent //! //! RETURNS: //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated; //! CM_OUT_OF_HOST_MEMORY if out of host memery; //! CM_FAILURE otherwise. //! CM_RT_API int32_t CmQueue_RT::EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface, const unsigned char *sysMem, const uint32_t widthStride, const uint32_t heightStride, const uint32_t option, CmEvent *&event) { INSERT_PROFILER_RECORD(); return EnqueueCopy(surface, sysMem, widthStride, heightStride, CM_FASTCOPY_CPU2GPU, option, event); } //! //! Enqueue an task, which contains one pre-defined kernel to //! copy from tiled video memory to linear system memory //! This API supports both blocking/non-blocking copy, if user pass CM_FASTCOPY_OPTION_BLOCKING as option, //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU. //! A CmEvent is generated each time a task is enqueued. The CmEvent can //! be used to check if the task finishs. //! INPUT: //! 1) Pointer to the CmSurface2D as copy resource //! 2) Pointer to the host memory as copy destination //! 3) width stride in bytes for system memory //! 4) height stride in rows for system memory //! 5) option: CM_FASTCOPY_OPTION_NONBLOCKING or CM_FASTCOPY_OPTION_BLOCKING //! 6) Reference to the pointer to CMEvent //! //! RETURNS: //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated; //! CM_OUT_OF_HOST_MEMORY if out of host memery; //! CM_FAILURE otherwise. //! CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface, unsigned char *sysMem, const uint32_t widthStride, const uint32_t heightStride, const uint32_t option, CmEvent *&event) { INSERT_PROFILER_RECORD(); return EnqueueCopy(surface, sysMem, widthStride, heightStride, CM_FASTCOPY_GPU2CPU, option, event); } //! //! Enqueue an task, which contains one pre-defined kernel to //! copy from linear system memory to tiled video memory //! This API supports both blocking/non-blocking copy, if user pass CM_GPUCOPY_OPTION_BLOCKING as option, //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU. //! A CmEvent is generated each time a task is enqueued. The CmEvent can //! be used to check if the task finishs. //! INPUT: //! 1) Pointer to the CmSurface2D as copy destination //! 2) Pointer to the host memory as copy resource //! 3) width stride in bytes for system memory //! 4) height stride in rows for system memory //! 5) option: CM_FASTCOPY_OPTION_NONBLOCKING,CM_FASTCOPY_OPTION_BLOCKING or CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST //! 6) Reference to the pointer to CMEvent //! //! RETURNS: //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated; //! CM_OUT_OF_HOST_MEMORY if out of host memery; //! CM_FAILURE otherwise. //! CM_RT_API int32_t CmQueue_RT::EnqueueCopyCPUToGPUFullStrideDup(CmSurface2D *surface, const unsigned char *sysMem, const uint32_t widthStride, const uint32_t heightStride, const uint32_t option, CmEvent *&event) { INSERT_PROFILER_RECORD(); return EnqueueCopy(surface, sysMem, widthStride, heightStride, CM_FASTCOPY_CPU2GPU, option, event); } //! //! Enqueue an task, which contains one pre-defined kernel to //! copy from tiled video memory to linear system memory //! This API supports both blocking/non-blocking copy, if user pass CM_FASTCOPY_OPTION_BLOCKING as option, //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU. //! A CmEvent is generated each time a task is enqueued. The CmEvent can //! be used to check if the task finishs. //! INPUT: //! 1) Pointer to the CmSurface2D as copy resource //! 2) Pointer to the host memory as copy destination //! 3) width stride in bytes for system memory //! 4) height stride in rows for system memory //! 5) option: CM_FASTCOPY_OPTION_NONBLOCKING or CM_FASTCOPY_OPTION_BLOCKING //! 6) Reference to the pointer to CMEvent //! //! RETURNS: //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated; //! CM_OUT_OF_HOST_MEMORY if out of host memery; //! CM_FAILURE otherwise. //! CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToCPUFullStrideDup(CmSurface2D *surface, unsigned char *sysMem, const uint32_t widthStride, const uint32_t heightStride, const uint32_t option, CmEvent *&event) { INSERT_PROFILER_RECORD(); return EnqueueCopy(surface, sysMem, widthStride, heightStride, CM_FASTCOPY_GPU2CPU, option, event); } CM_RT_API int32_t CmQueue_RT::DestroyEvent(CmEvent *&event) { INSERT_PROFILER_RECORD(); if (event == nullptr) { return CM_FAILURE; } CM_DESTROYEVENT_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmQueueHandle = m_cmQueueHandle; inParam.cmEventHandle = event; int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_DESTROYEVENT, &inParam, sizeof(inParam)); CHK_FAILURE_RETURN(hr); CHK_FAILURE_RETURN(inParam.returnValue); event = nullptr; return CM_SUCCESS; } //! //! Function to enqueue task with thread group space pointer //! Arguments: //! 1. Pointer to CmTask, which can only contain one kernel. //! 2. Reference to the pointer to CmEvent that is to be returned //! 3. Pointer to a CmThreadGroupSpace. //! Return Value: //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated //! CM_OUT_OF_HOST_MEMORY if out of host memory //! CM_FAILURE otherwise //! Notes: //! If the kernel has per thread arg, GPGPU object is to be used. //! If the kernel has no per thread arg. GPGPU walker is used. CM_RT_API int32_t CmQueue_RT::EnqueueWithGroup(CmTask *task, CmEvent *&event, const CmThreadGroupSpace *threadGroupSpace) { INSERT_PROFILER_RECORD(); if (task == nullptr) { CmAssert(0); CmDebugMessage(("Kernel array is NULL.")); return CM_INVALID_ARG_VALUE; } m_criticalSection.Acquire(); CM_ENQUEUEGROUP_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmTaskHandle = task; inParam.cmQueueHandle = m_cmQueueHandle; inParam.cmTGrpSpaceHandle = (void *)threadGroupSpace; inParam.cmEventHandle = event; // to support invisiable event, this field is used for input/output. int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEWITHGROUP, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return CM_SUCCESS; } int32_t CmQueue_RT::EnqueueCopy(CmSurface2D *surface, const unsigned char *sysMem, const uint32_t widthStride, const uint32_t heightStride, CM_FASTCOPY_DIRECTION direction, const uint32_t option, CmEvent *&event) { CM_ENQUEUE_GPUCOPY_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmQueueHandle = m_cmQueueHandle; inParam.cmSurface2d = surface; inParam.sysMem = (void *)sysMem; inParam.copyDir = direction; inParam.widthStride = widthStride; inParam.heightStride = heightStride; inParam.option = option; inParam.cmEventHandle = event; m_criticalSection.Acquire(); int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY, &inParam, sizeof(inParam), nullptr, 0); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return hr; } CM_RT_API int32_t CmQueue_RT::EnqueueInitSurface2D(CmSurface2D *surface, const uint32_t initValue, CmEvent *&event) { INSERT_PROFILER_RECORD(); CM_ENQUEUE_2DInit_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmQueueHandle = m_cmQueueHandle; inParam.cmEventHandle = event; inParam.cmSurface2d = surface; inParam.initValue = initValue; m_criticalSection.Acquire(); int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUESURF2DINIT, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return hr; } CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToGPU(CmSurface2D *outputSurface, CmSurface2D *inputSurface, uint32_t option, CmEvent *&event) { INSERT_PROFILER_RECORD(); CM_ENQUEUE_GPUCOPY_V2V_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmQueueHandle = m_cmQueueHandle; inParam.option = option; inParam.cmEventHandle = event; inParam.cmDstSurface2d = outputSurface; inParam.cmSrcSurface2d = inputSurface; m_criticalSection.Acquire(); int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_V2V, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return hr; } CM_RT_API int32_t CmQueue_RT::EnqueueCopyCPUToCPU(unsigned char *dstSysMem, unsigned char *srcSysMem, uint32_t size, uint32_t option, CmEvent *&event) { INSERT_PROFILER_RECORD(); CM_ENQUEUE_GPUCOPY_L2L_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmQueueHandle = m_cmQueueHandle; inParam.srcSysMem = srcSysMem; inParam.dstSysMem = dstSysMem; inParam.copySize = size; inParam.option = option; inParam.cmEventHandle = event; m_criticalSection.Acquire(); int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_L2L, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return hr; } CM_RT_API int32_t CmQueue_RT::EnqueueVebox(CmVebox *vebox, CmEvent *&event) { INSERT_PROFILER_RECORD(); CM_ENQUEUE_VEBOX_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmQueueHandle = m_cmQueueHandle; inParam.cmVeboxHandle = vebox; inParam.cmEventHandle = event; m_criticalSection.Acquire(); int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEVEBOX, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return hr; } CM_QUEUE_CREATE_OPTION CmQueue_RT::GetQueueOption() { return m_queueOption; } CM_RT_API int32_t CmQueue_RT::EnqueueFast(CmTask *task, CmEvent *&event, const CmThreadSpace *threadSpace) { INSERT_PROFILER_RECORD(); if (task == nullptr) { CmAssert(0); CmDebugMessage(("Kernel array is NULL.")); return CM_INVALID_ARG_VALUE; } m_criticalSection.Acquire(); CM_ENQUEUE_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmTaskHandle = task; inParam.cmQueueHandle = m_cmQueueHandle; inParam.cmThreadSpaceHandle = (void *)threadSpace; inParam.cmEventHandle = event; // to support invisiable event, this field is used for input/output. int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEFAST, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return CM_SUCCESS; } CM_RT_API int32_t CmQueue_RT::EnqueueWithGroupFast(CmTask *task, CmEvent *&event, const CmThreadGroupSpace *threadGroupSpace) { INSERT_PROFILER_RECORD(); if (task == nullptr) { CmAssert(0); CmDebugMessage(("Kernel array is NULL.")); return CM_INVALID_ARG_VALUE; } m_criticalSection.Acquire(); CM_ENQUEUEGROUP_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmTaskHandle = task; inParam.cmQueueHandle = m_cmQueueHandle; inParam.cmTGrpSpaceHandle = (void *)threadGroupSpace; inParam.cmEventHandle = event; // to support invisiable event, this field is used for input/output. int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEWITHGROUPFAST, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return CM_SUCCESS; } CM_RT_API int32_t CmQueue_RT::DestroyEventFast(CmEvent *&event) { INSERT_PROFILER_RECORD(); if (event == nullptr) { return CM_INVALID_ARG_VALUE; } CM_DESTROYEVENT_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmQueueHandle = m_cmQueueHandle; inParam.cmEventHandle = event; int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_DESTROYEVENTFAST, &inParam, sizeof(inParam)); CHK_FAILURE_RETURN(hr); CHK_FAILURE_RETURN(inParam.returnValue); event = nullptr; return CM_SUCCESS; } CM_RT_API int32_t CmQueue_RT::SetResidentGroupAndParallelThreadNum(uint32_t residentGroupNum, uint32_t parallelThreadNum) { return CM_NOT_IMPLEMENTED; } CM_RT_API int32_t CmQueue_RT::EnqueueReadBuffer(CmBuffer* buffer, size_t offset, const unsigned char* sysMem, uint64_t sysMemSize, CmEvent* wait_event, CmEvent*& event, unsigned option) { INSERT_PROFILER_RECORD(); CM_ENQUEUE_COPY_BUFFER_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmQueueHandle = m_cmQueueHandle; inParam.buffer = buffer; inParam.sysMem = (void*)sysMem; inParam.copySize = sysMemSize; inParam.offset = offset; inParam.copyDir = 0; inParam.wait_event = wait_event; inParam.option = option; inParam.copyDir = CM_FASTCOPY_GPU2CPU; inParam.cmEventHandle = event; m_criticalSection.Acquire(); int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_BUFFER, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return CM_SUCCESS; } CM_RT_API int32_t CmQueue_RT::EnqueueWriteBuffer(CmBuffer* buffer, size_t offset, const unsigned char* sysMem, uint64_t sysMemSize, CmEvent* wait_event, CmEvent*& event, unsigned option) { INSERT_PROFILER_RECORD(); CM_ENQUEUE_COPY_BUFFER_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(inParam)); inParam.cmQueueHandle = m_cmQueueHandle; inParam.buffer = buffer; inParam.sysMem = (void*)sysMem; inParam.copySize = sysMemSize; inParam.offset = offset; inParam.copyDir = 1; inParam.wait_event = wait_event; inParam.option = option; inParam.copyDir = CM_FASTCOPY_CPU2GPU; inParam.cmEventHandle = event; m_criticalSection.Acquire(); int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_BUFFER, &inParam, sizeof(inParam)); if (FAILED(hr)) { CmAssert(0); m_criticalSection.Release(); return hr; } if (inParam.returnValue != CM_SUCCESS) { m_criticalSection.Release(); return inParam.returnValue; } event = static_cast(inParam.cmEventHandle); m_criticalSection.Release(); return CM_SUCCESS; }