1 /* 2 * Copyright (c) 2017, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 //! 23 //! \file cm_kernel.h 24 //! \brief Contains Class CmKernel definitions 25 //! 26 27 #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNEL_H_ 28 #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNEL_H_ 29 30 #include "cm_def.h" 31 32 namespace CMRT_UMD 33 { 34 class CmThreadSpace; 35 class CmThreadGroupSpace; 36 37 //! \details Before enqueuing a CmKernel, it has to be set up.To set up a 38 //! kernel the application needs to specify the number of kernel 39 //! threads to be created, the values of all arguments to the 40 //! kernel, and optionally the dependency mask specifying the 41 //! dependency among threads. Arguments of a kernel may be 42 //! specified as per kernel arguments or per thread arguments. 43 //! Per kernel arguments are arguments that have the same value 44 //! for all threads of a kernel, whereas per thread arguments may 45 //! have a different value for different threads. After a CmKernel 46 //! is set up, the kernel may be Enqueued for execution 47 //! once or multiple times. Kernel settings are preserved across 48 //! multiple Enqueue calls. After enqueuing a CmKernel the 49 //! application may explicitly calling CmKernel member functions 50 //! to modify the setup subject to implementation restrictions 51 //! described below.\n 52 //! Implementation Restrictions:\n 53 //! The thread count cannot be changed once it has been set. Also 54 //! an argument that has been previously set using SetKernelArg 55 //! cannot be subsequently set using SetThreadArg, and vice versa. 56 //! These are current restrictions imposed by the API 57 //! implementation and may be removed in a future release. 58 class CmKernel 59 { 60 public: 61 //! \brief Set the number of threads for this kernel. 62 //! \details This function specifies the number of threads to create for 63 //! execution of the kernel. For media object, the number of 64 //! threads of all kernels in a task should be no more than 65 //! CAP_USER_DEFINED_THREAD_COUNT_PER_TASK. For media walker, 66 //! the number of threads of the media walker kernel should be 67 //! no more than 261121(511x511) pre-SKL and 4190209(2047x2047) 68 //! SKL+. This function is not necessary if a thread space is 69 //! defined. The thread count set by calling this funciton will 70 //! be overwritten by the thread space dimension. 71 //! \param [in] count 72 //! number of threads. 73 //! \retval CM_SUCCESS if thread number is set successfully. 74 //! \retval CM_INVALID_ARG_VALUE if the thread count exceeds the maximum. 75 //! \note If this function is called more than once with different 76 //! count value, all argument values become invalidated, i.e. 77 //! application needs to call SetKernelArg or SetThreadArg again 78 //! for all arguments. 79 CM_RT_API virtual int32_t SetThreadCount(uint32_t count) = 0; 80 81 //! \brief Set a per-kernel argument. 82 //! \details The total size in bytes of all 83 //! per kernel arguments and per thread arguments should be 84 //! less than or equal to CAP_ARG_SIZE_PER_KERNEL. 85 //! Per kernel arguments are set by calling SetKernelArg. 86 //! Per thread arguments are set by calling SetThreadArg. 87 //! Calling SetThreadArg for a kernel triggers media object 88 //! command. Otherwise media object walker command is used. 89 //! \param [in] index 90 //! Index of argument in MDF kernel function. The index is 91 //! global for per kernel arguments and per thread arguments. 92 //! \param [in] size 93 //! Size of the argument. 94 //! \param [in] value 95 //! Pointer to argument value, could be CM_NULL_SURFACE if the 96 //! arg is not used in kernel. 97 //! \retval CM_SUCCESS if the per-kernel argument is set successfully. 98 //! \retval CM_KERNELPAYLOAD_PERKERNELARG_MUTEX_FAIL if the indirect data set 99 //! \retval CM_INVALID_ARG_INDEX if the argument 'index' is incorrect 100 //! \retval CM_INVALID_ARG_VALUE if the argument 'value' is incorrect 101 //! \retval CM_INVALID_ARG_SIZE if the argument 'size' is incorrect 102 //! \retval CM_FAILURE otherwise 103 CM_RT_API virtual int32_t SetKernelArg(uint32_t index, 104 size_t size, 105 const void *value) = 0; 106 107 //! \brief Set a per thread argument. 108 //! \details The total size in bytes of all 109 //! per kernel arguments and per thread arguments should be 110 //! less than or equal to CAP_ARG_SIZE_PER_KERNEL. 111 //! Per kernel arguments are set by calling SetKernelArg. 112 //! Per thread arguments are set by calling SetThreadArg. 113 //! Calling SetThreadArg for a kernel triggers media object 114 //! command. Otherwise media object walker command is used. 115 //! \param [in] threadId 116 //! Index of the thread. 117 //! \param [in] index 118 //! Index of argument in CM kernel function. The index is 119 //! global for per kernel arguments and per thread arguments. 120 //! \param [in] size 121 //! Size of the argument. 122 //! \param [in] value 123 //! Pointer to argument. Setting a value more than once for the 124 //! same threadId and index is allowed, but the sizes must be 125 //! the same. 126 //! \retval CM_SUCCESS if the per-thread argument is set successfully. 127 //! \retval CM_KERNELPAYLOAD_PERKERNELARG_MUTEX_FAIL if the indirect data set 128 //! \retval CM_INVALID_THREAD_INDEX if the argument 'threadId' is incorrect 129 //! \retval CM_INVALID_ARG_INDEX if the argument 'index' is incorrect 130 //! \retval CM_INVALID_ARG_VALUE if the argument 'value' is incorrect 131 //! \retval CM_INVALID_ARG_SIZE if the argument 'size' is incorrect 132 //! \retval CM_FAILURE otherwise 133 //! \note This API is not recommended to be used. Using threadspace to indicate 134 //! thread indexes is a better choice. 135 CM_RT_API virtual int32_t SetThreadArg(uint32_t threadId, 136 uint32_t index, 137 size_t size, 138 const void *value) = 0; 139 140 //! \brief Set a buffer to be a static buffer. 141 //! \details value points to the buffer's surface index. In CM host 142 //! program, currently we can have at most 4 static buffers 143 //! which are indexed as 0~3. These static buffers can be 144 //! accessed by all functions in the kernel. For SKL and newer 145 //! and newer platforms, these static buffers have surface 146 //! binding table index 1~4. For platforms older than SKL, 147 //! these static buffers are binded at index 0xf3~0xf6. 148 //! \param [in] index 149 //! Index of the global buffer, valid in 0~3. 150 //! \param [in] value 151 //! Pointer to the CmBuffer's SurfaceIndex. 152 //! \retval CM_SUCCESS is the static buffer is set successfully. 153 //! \retval CM_INVALID_GLOBAL_BUFFER_INDEX if the index is not in 0~3 154 //! \retval CM_INVALID_BUFFER_HANDLER if value is invalid 155 //! \retval CM_INVALID_ARG_INDEX if the surfaceIndex pointed by value is invalid 156 //! \retval CM_FAILURE otherwise 157 //! \note The print buffer occupies static buffer index one. If 158 //! CmDevice::InitPrintBuffer is called, static buffer 1 can't be used. 159 CM_RT_API virtual int32_t SetStaticBuffer(uint32_t index, 160 const void *value) = 0; 161 162 //! \brief Set the binding table index directly for a surface. 163 //! \details The assigned binding table index should be a valid value 164 //! for buffer, surface 2D, surface2D UP, sampler surface , 165 //! or sampler 8x8 surface, otherwise, this call will return failure. 166 //! By calling this fucntion, the surfce can be accessed by 167 //! kernel using the btIndex directly. No need to pass surface 168 //! from host to kernel via kernel argument. 169 //! \param [in] surface 170 //! The surface whose binding table index will be set. 171 //! \param [in] bti 172 //! The binding table index. 1~242 on Gen8 and 8~239 on Gen9+ platforms. 173 //! \retval CM_SUCCESS if the binding table index is set successfully. 174 //! \retval CM_KERNELPAYLOAD_SURFACE_INVALID_BTIINDEX if btIndex is invalid 175 //! \retval CM_FAILURE otherwise 176 CM_RT_API virtual int32_t SetSurfaceBTI(SurfaceIndex* surface, 177 uint32_t bti) = 0; 178 179 //! \brief Associate a thread space to this kernel. 180 //! \details This is per kernel thread space. 181 //! \param [in] threadSpace 182 //! the pointer to the CmThreadSpace. 183 //! \retval CM_SUCCESS if the association is successful. 184 //! \retval CM_INVALID_ARG_VALUE if threadSpace is invalid 185 //! \retval CM_INVALID_KERNEL_THREADSPACE if thread group space is set 186 //! \note It is exclusive with AssociateThreadGroupSpace(). 187 CM_RT_API virtual int32_t AssociateThreadSpace(CmThreadSpace* &threadSpace) = 0; 188 189 //! \brief Associates a thread group space with this kernel. 190 //! \details This is per kernel thread group space. Each kernel will 191 //! tri gger a gpgpu walker command. 192 //! \param [in] threadGroupSpace 193 //! A pointer ot the CmThreadGroupSpace. 194 //! \retval CM_SUCCESS if the association is successful. 195 //! \retval CM_INVALID_ARG_VALUE if threadSpace is invalid 196 //! \retval CM_INVALID_KERNEL_THREADSPACE if thread space is set 197 //! \note It is exclusive with AssociateThreadSpace(). 198 CM_RT_API virtual int32_t 199 AssociateThreadGroupSpace(CmThreadGroupSpace* &threadGroupSpace) = 0; 200 201 //! \brief Set sampler heap position by user. 202 //! \details Unlike surface state, each type of sampler state occupies 203 //! different size of space in the sampler heap. The offset in 204 //! the heap is the BTI index times the size of the sampler. 205 //! \param [in] sampler 206 //! The SamplerIndex whose binding table index will be set. 207 //! \param [in] nIndex 208 //! The binding table index. 209 //! \retval CM_SUCCESS if the setting is successful. 210 //! \retval CM_NULL_POINTER if sampler is nullptr 211 //! \retval CM_KERNELPAYLOAD_SAMPLER_INVALID_BTINDEX if nIndex is invalid 212 //! \retval CM_FAILURE otherwise 213 CM_RT_API virtual int32_t 214 SetSamplerBTI(SamplerIndex *sampler, uint32_t nIndex) = 0; 215 216 //! \brief De-associate the thread space from the kernel. 217 //! \details Coupled with AssociateThreadSpace(). 218 //! \param [in] threadSpace 219 //! The pointer to CmThreadSpace. 220 //! \retval CM_SUCCESS if the de-associate operation is successful. 221 //! \retval CM_NULL_POINTER if threadSpace is nullptr 222 //! \retval CM_INVALID_ARG_VALUE if threadSpace has not been set before 223 CM_RT_API virtual int32_t DeAssociateThreadSpace(CmThreadSpace* &threadSpace) = 0; 224 225 //! \brief De-associate the thread group space from the kernel. 226 //! \details Coupled with AssociateThreadGroupSpace(). 227 //! \param [in] threadGroupSpace 228 //! the pointer to CmThreadGroupSpace. 229 //! \retval CM_SUCCESS if the de-associate operation is successful. 230 //! \retval CM_NULL_POINTER if threadGroupSpace is nullptr 231 //! \retval CM_INVALID_ARG_VALUE if threadGroupSpace has not been set before 232 CM_RT_API virtual int32_t 233 DeAssociateThreadGroupSpace(CmThreadGroupSpace* &threadGroupSpace) = 0; 234 235 //! \brief Query the kernel spill memory size. 236 //! \details During Just-In-Time compilation of kernel, if compiler 237 //! detects that more registers are needed than allowed, 238 //! spill happens. This function is to return the spill size. 239 //! This function will return failure if JIT compilation 240 //! doesn't happen. 241 //! \param [out] spillMemorySize 242 //! The spill memory size in bytes. 243 //! \retval CM_SUCCESS if the query is successful. 244 //! \retval CM_FAILURE otherwise. 245 CM_RT_API virtual int32_t QuerySpillSize(uint32_t &spillMemorySize) = 0; 246 247 //! \brief Set SVM or stateless buffer pointer as per-kernel argument. 248 //! \details The total size in bytes of all 249 //! per kernel arguments and per thread arguments should be 250 //! less than or equal to CAP_ARG_SIZE_PER_KERNEL. 251 //! Per kernel arguments are set by calling SetKernelArg. 252 //! Per thread arguments are set by calling SetThreadArg. 253 //! Calling SetThreadArg for a kernel triggers media object 254 //! command. Otherwise media object walker command is used. 255 //! \param [in] index 256 //! Index of argument in MDF kernel function. The index is 257 //! global for per kernel arguments and per thread arguments. 258 //! \param [in] size 259 //! The size of kernel argument. 260 //! \param [in] value 261 //! The SVM or stateless buffer pointer that should be used as 262 //! the argument value for argument specified by index.The 263 //! SVM buffer pointer value specified as the argument 264 //! value can be the pointer returned by CreateBufferSVM(). And 265 //! the stateless buffer pointer value specified as the argument 266 //! value can be the pointer returned by CmBufferStateless:: 267 //! GetGfxAddress() or CmBufferStateless::GetSysAddress(). 268 //! Or can be a pointer + offset into the SVM and stateless 269 //! buffer region. 270 //! \retval CM_SUCCESS if the per-kernel argument is set successfully. 271 //! \retval CM_INVALID_ARG_INDEX if the argument 'index' is incorrect 272 //! \retval CM_INVALID_KERNEL_ARG_POINTER if the argument 'value' is incorrect 273 //! \retval CM_KERNELPAYLOAD_PERKERNELARG_MUTEX_FAIL if the indirect data set 274 //! \retval CM_FAILURE otherwise 275 CM_RT_API virtual int32_t SetKernelArgPointer(uint32_t index, 276 size_t size, 277 const void *value) = 0; 278 279 public: 280 //! \brief Get the kernel binary of this kernel. 281 //! \param [in,out] binary 282 //! Vector to store kernel binary. 283 //! \returns CM_SUCCESS. 284 //! \note This API is implemented for debug purpose. 285 //! 286 CMRT_UMD_API virtual int32_t GetBinary(std::vector<char> &binary) = 0; 287 288 //! \brief Replace the kernel binary of this kernel. 289 //! \param [in] binary 290 //! Vector to store kernel binary. 291 //! \retval CM_SUCCESS if the kernel binary is replaced successfully. 292 //! \retval CM_INVALID_ARG_VALUE if input argument is invalid. 293 //! \note This API is implemented for debug purpose. 294 //! 295 CMRT_UMD_API virtual int32_t ReplaceBinary(std::vector<char> &binary) = 0; 296 297 //! \brief Reset the kernel binary of this kernel. 298 //! \returns CM_SUCCESS. 299 //! \note This API is implemented for debug purpose. 300 //! 301 CMRT_UMD_API virtual int32_t ResetBinary() = 0; 302 }; 303 };//namespace 304 305 #endif // #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNEL_H 306