xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/cm/cm_kernel.h (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_kernel.h
24 //! \brief     Contains Class CmKernel definitions
25 //!
26 
27 #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNEL_H_
28 #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNEL_H_
29 
30 #include "cm_def.h"
31 
32 namespace CMRT_UMD
33 {
34 class CmThreadSpace;
35 class CmThreadGroupSpace;
36 
37 //! \details    Before enqueuing a CmKernel, it has to be set up.To set up a
38 //!             kernel the application needs to specify the number of kernel
39 //!             threads to be created, the values of all arguments to the
40 //!             kernel, and optionally the dependency mask specifying the
41 //!             dependency among threads. Arguments of a kernel may be
42 //!             specified as per kernel arguments or per thread arguments.
43 //!             Per kernel arguments are arguments that have the same value
44 //!             for all threads of a kernel, whereas per thread arguments may
45 //!             have a different value for different threads. After a CmKernel
46 //!             is set up, the kernel may be Enqueued for execution
47 //!             once or multiple times. Kernel settings are preserved across
48 //!             multiple Enqueue calls. After enqueuing a CmKernel the
49 //!             application may explicitly calling CmKernel member functions
50 //!             to modify the setup subject to implementation restrictions
51 //!             described below.\n
52 //!             Implementation Restrictions:\n
53 //!             The thread count cannot be changed once it has been set. Also
54 //!             an argument that has been previously set using SetKernelArg
55 //!             cannot be subsequently set using SetThreadArg, and vice versa.
56 //!             These are current restrictions imposed by the API
57 //!             implementation and may be removed in a future release.
58 class CmKernel
59 {
60 public:
61     //! \brief      Set the number of threads for this kernel.
62     //! \details    This function specifies the number of threads to create for
63     //!             execution of the kernel. For media object, the number of
64     //!             threads of all kernels in a task should be no more than
65     //!             CAP_USER_DEFINED_THREAD_COUNT_PER_TASK. For media walker,
66     //!             the number of threads of the media walker kernel should be
67     //!             no more than 261121(511x511) pre-SKL and 4190209(2047x2047)
68     //!             SKL+. This function is not necessary if a thread space is
69     //!             defined. The thread count set by calling this funciton will
70     //!             be overwritten by the thread space dimension.
71     //! \param      [in] count
72     //!             number of threads.
73     //! \retval     CM_SUCCESS if thread number is set successfully.
74     //! \retval     CM_INVALID_ARG_VALUE if the thread count exceeds the maximum.
75     //! \note       If this function is called more than once with different
76     //!             count value, all argument values become invalidated, i.e.
77     //!             application needs to call SetKernelArg or SetThreadArg again
78     //!             for all arguments.
79     CM_RT_API virtual int32_t SetThreadCount(uint32_t count) = 0;
80 
81     //! \brief      Set a per-kernel argument.
82     //! \details    The total size in bytes of all
83     //!             per kernel arguments and per thread arguments should be
84     //!             less than or equal to CAP_ARG_SIZE_PER_KERNEL.
85     //!             Per kernel arguments are set by calling SetKernelArg.
86     //!             Per thread arguments are set by calling SetThreadArg.
87     //!             Calling SetThreadArg for a kernel triggers media object
88     //!             command. Otherwise media object walker command is used.
89     //! \param      [in] index
90     //!             Index of argument in MDF kernel function. The index is
91     //!             global for per kernel arguments and per thread arguments.
92     //! \param      [in] size
93     //!             Size of the argument.
94     //! \param      [in] value
95     //!             Pointer to argument value, could be CM_NULL_SURFACE if the
96     //!             arg is not used in kernel.
97     //! \retval     CM_SUCCESS if the per-kernel argument is set successfully.
98     //! \retval     CM_KERNELPAYLOAD_PERKERNELARG_MUTEX_FAIL if the indirect data set
99     //! \retval     CM_INVALID_ARG_INDEX if the argument 'index' is incorrect
100     //! \retval     CM_INVALID_ARG_VALUE if the argument 'value' is incorrect
101     //! \retval     CM_INVALID_ARG_SIZE if the argument 'size' is incorrect
102     //! \retval     CM_FAILURE otherwise
103     CM_RT_API virtual int32_t SetKernelArg(uint32_t index,
104                                            size_t size,
105                                            const void *value) = 0;
106 
107     //! \brief      Set a per thread argument.
108     //! \details    The total size in bytes of all
109     //!             per kernel arguments and per thread arguments should be
110     //!             less than or equal to CAP_ARG_SIZE_PER_KERNEL.
111     //!             Per kernel arguments are set by calling SetKernelArg.
112     //!             Per thread arguments are set by calling SetThreadArg.
113     //!             Calling SetThreadArg for a kernel triggers media object
114     //!             command. Otherwise media object walker command is used.
115     //! \param      [in] threadId
116     //!             Index of the thread.
117     //! \param      [in] index
118     //!             Index of argument in CM kernel function. The index is
119     //!             global for per kernel arguments and per thread arguments.
120     //! \param      [in] size
121     //!             Size of the argument.
122     //! \param      [in] value
123     //!             Pointer to argument. Setting a value more than once for the
124     //!             same threadId and index is allowed, but the sizes must be
125     //!             the same.
126     //! \retval     CM_SUCCESS if the per-thread argument is set successfully.
127     //! \retval     CM_KERNELPAYLOAD_PERKERNELARG_MUTEX_FAIL if the indirect data set
128     //! \retval     CM_INVALID_THREAD_INDEX if the argument 'threadId' is incorrect
129     //! \retval     CM_INVALID_ARG_INDEX if the argument 'index' is incorrect
130     //! \retval     CM_INVALID_ARG_VALUE if the argument 'value' is incorrect
131     //! \retval     CM_INVALID_ARG_SIZE if the argument 'size' is incorrect
132     //! \retval     CM_FAILURE otherwise
133     //! \note       This API is not recommended to be used. Using threadspace to indicate
134     //!             thread indexes is a better choice.
135     CM_RT_API virtual int32_t SetThreadArg(uint32_t threadId,
136                                            uint32_t index,
137                                            size_t size,
138                                            const void *value) = 0;
139 
140     //! \brief      Set a buffer to be a static buffer.
141     //! \details    value points to the buffer's surface index. In CM host
142     //!             program, currently we can have at most 4 static buffers
143     //!             which are indexed as 0~3. These static buffers can be
144     //!             accessed by all functions in the kernel. For SKL and newer
145     //!             and newer platforms, these static buffers have surface
146     //!             binding table index 1~4. For platforms older than SKL,
147     //!             these static buffers are binded at index 0xf3~0xf6.
148     //! \param      [in] index
149     //!             Index of the global buffer, valid in 0~3.
150     //! \param      [in] value
151     //!             Pointer to the CmBuffer's SurfaceIndex.
152     //! \retval     CM_SUCCESS is the static buffer is set successfully.
153     //! \retval     CM_INVALID_GLOBAL_BUFFER_INDEX if the index is not in 0~3
154     //! \retval     CM_INVALID_BUFFER_HANDLER if value is invalid
155     //! \retval     CM_INVALID_ARG_INDEX if the surfaceIndex pointed by value is invalid
156     //! \retval     CM_FAILURE otherwise
157     //! \note       The print buffer occupies static buffer index one. If
158     //!             CmDevice::InitPrintBuffer is called, static buffer 1 can't be used.
159     CM_RT_API virtual int32_t SetStaticBuffer(uint32_t index,
160                                               const void *value) = 0;
161 
162     //! \brief      Set the binding table index directly for a surface.
163     //! \details    The assigned binding table index should be a valid value
164     //!             for buffer, surface 2D, surface2D UP, sampler surface ,
165     //!             or sampler 8x8 surface, otherwise, this call will return failure.
166     //!             By calling this fucntion, the surfce can be accessed by
167     //!             kernel using the btIndex directly. No need to pass surface
168     //!             from host to kernel via kernel argument.
169     //! \param      [in] surface
170     //!             The surface whose binding table index will be set.
171     //! \param      [in] bti
172     //!             The binding table index. 1~242 on Gen8 and 8~239 on Gen9+ platforms.
173     //! \retval     CM_SUCCESS if the binding table index is set successfully.
174     //! \retval     CM_KERNELPAYLOAD_SURFACE_INVALID_BTIINDEX if btIndex is invalid
175     //! \retval     CM_FAILURE otherwise
176     CM_RT_API virtual int32_t SetSurfaceBTI(SurfaceIndex* surface,
177                                             uint32_t bti) = 0;
178 
179     //! \brief      Associate a thread space to this kernel.
180     //! \details    This is per kernel thread space.
181     //! \param      [in] threadSpace
182     //!             the pointer to the CmThreadSpace.
183     //! \retval     CM_SUCCESS if the association is successful.
184     //! \retval     CM_INVALID_ARG_VALUE if threadSpace is invalid
185     //! \retval     CM_INVALID_KERNEL_THREADSPACE if thread group space is set
186     //! \note       It is exclusive with AssociateThreadGroupSpace().
187     CM_RT_API virtual int32_t AssociateThreadSpace(CmThreadSpace* &threadSpace) = 0;
188 
189     //! \brief      Associates a thread group space with this kernel.
190     //! \details    This is per kernel thread group space. Each kernel will
191     //!             tri gger a gpgpu walker command.
192     //! \param      [in] threadGroupSpace
193     //!             A pointer ot the CmThreadGroupSpace.
194     //! \retval     CM_SUCCESS if the association is successful.
195     //! \retval     CM_INVALID_ARG_VALUE if threadSpace is invalid
196     //! \retval     CM_INVALID_KERNEL_THREADSPACE if thread space is set
197     //! \note       It is exclusive with AssociateThreadSpace().
198     CM_RT_API virtual int32_t
199     AssociateThreadGroupSpace(CmThreadGroupSpace* &threadGroupSpace) = 0;
200 
201     //! \brief      Set sampler heap position by user.
202     //! \details    Unlike surface state, each type of sampler state occupies
203     //!             different size of space in the sampler heap. The offset in
204     //!             the heap is the BTI index times the size of the sampler.
205     //! \param      [in] sampler
206     //!             The SamplerIndex whose binding table index will be set.
207     //! \param      [in] nIndex
208     //!             The binding table index.
209     //! \retval     CM_SUCCESS if the setting is successful.
210     //! \retval     CM_NULL_POINTER if sampler is nullptr
211     //! \retval     CM_KERNELPAYLOAD_SAMPLER_INVALID_BTINDEX if nIndex is invalid
212     //! \retval     CM_FAILURE otherwise
213     CM_RT_API virtual int32_t
214     SetSamplerBTI(SamplerIndex *sampler, uint32_t nIndex) = 0;
215 
216     //! \brief      De-associate the thread space from the kernel.
217     //! \details    Coupled with AssociateThreadSpace().
218     //! \param      [in] threadSpace
219     //!             The pointer to CmThreadSpace.
220     //! \retval     CM_SUCCESS if the de-associate operation is successful.
221     //! \retval     CM_NULL_POINTER if threadSpace is nullptr
222     //! \retval     CM_INVALID_ARG_VALUE if threadSpace has not been set before
223     CM_RT_API virtual int32_t DeAssociateThreadSpace(CmThreadSpace* &threadSpace) = 0;
224 
225     //! \brief      De-associate the thread group space from the kernel.
226     //! \details    Coupled with AssociateThreadGroupSpace().
227     //! \param      [in] threadGroupSpace
228     //!             the pointer to CmThreadGroupSpace.
229     //! \retval     CM_SUCCESS if the de-associate operation is successful.
230     //! \retval     CM_NULL_POINTER if threadGroupSpace is nullptr
231     //! \retval     CM_INVALID_ARG_VALUE if threadGroupSpace has not been set before
232     CM_RT_API virtual int32_t
233     DeAssociateThreadGroupSpace(CmThreadGroupSpace* &threadGroupSpace) = 0;
234 
235     //! \brief      Query the kernel spill memory size.
236     //! \details    During Just-In-Time compilation of kernel, if compiler
237     //!             detects that more registers are needed than allowed,
238     //!             spill happens. This function is to return the spill size.
239     //!             This function will return failure if JIT compilation
240     //!             doesn't happen.
241     //! \param      [out] spillMemorySize
242     //!             The spill memory size in bytes.
243     //! \retval     CM_SUCCESS if the query is successful.
244     //! \retval     CM_FAILURE otherwise.
245     CM_RT_API virtual int32_t QuerySpillSize(uint32_t &spillMemorySize) = 0;
246 
247     //! \brief      Set SVM or stateless buffer pointer as per-kernel argument.
248     //! \details    The total size in bytes of all
249     //!             per kernel arguments and per thread arguments should be
250     //!             less than or equal to CAP_ARG_SIZE_PER_KERNEL.
251     //!             Per kernel arguments are set by calling SetKernelArg.
252     //!             Per thread arguments are set by calling SetThreadArg.
253     //!             Calling SetThreadArg for a kernel triggers media object
254     //!             command. Otherwise media object walker command is used.
255     //! \param      [in] index
256     //!             Index of argument in MDF kernel function. The index is
257     //!             global for per kernel arguments and per thread arguments.
258     //! \param      [in] size
259     //!             The size of kernel argument.
260     //! \param      [in] value
261     //!             The SVM or stateless buffer pointer that should be used as
262     //!             the argument value for argument specified by index.The
263     //!             SVM buffer pointer value specified as the argument
264     //!             value can be the pointer returned by CreateBufferSVM(). And
265     //!             the stateless buffer pointer value specified as the argument
266     //!             value can be the pointer returned by CmBufferStateless::
267     //!             GetGfxAddress() or CmBufferStateless::GetSysAddress().
268     //!             Or can be a pointer + offset into the SVM and stateless
269     //!             buffer region.
270     //! \retval     CM_SUCCESS if the per-kernel argument is set successfully.
271     //! \retval     CM_INVALID_ARG_INDEX if the argument 'index' is incorrect
272     //! \retval     CM_INVALID_KERNEL_ARG_POINTER if the argument 'value' is incorrect
273     //! \retval     CM_KERNELPAYLOAD_PERKERNELARG_MUTEX_FAIL if the indirect data set
274     //! \retval     CM_FAILURE otherwise
275     CM_RT_API virtual int32_t SetKernelArgPointer(uint32_t index,
276                                                   size_t size,
277                                                   const void *value) = 0;
278 
279 public:
280     //! \brief      Get the kernel binary of this kernel.
281     //! \param      [in,out] binary
282     //!             Vector to store kernel binary.
283     //! \returns    CM_SUCCESS.
284     //! \note       This API is implemented for debug purpose.
285     //!
286     CMRT_UMD_API virtual int32_t GetBinary(std::vector<char> &binary) = 0;
287 
288     //! \brief      Replace the kernel binary of this kernel.
289     //! \param      [in] binary
290     //!             Vector to store kernel binary.
291     //! \retval     CM_SUCCESS if the kernel binary is replaced successfully.
292     //! \retval     CM_INVALID_ARG_VALUE if input argument is invalid.
293     //! \note       This API is implemented for debug purpose.
294     //!
295     CMRT_UMD_API virtual int32_t ReplaceBinary(std::vector<char> &binary) = 0;
296 
297     //! \brief      Reset the kernel binary of this kernel.
298     //! \returns    CM_SUCCESS.
299     //! \note       This API is implemented for debug purpose.
300     //!
301     CMRT_UMD_API virtual int32_t ResetBinary() = 0;
302 };
303 };//namespace
304 
305 #endif  // #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNEL_H
306