1 /*
2 * Copyright (c) 2024, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file vp_render_vebox_update_cmd_packet.cpp
24 //! \brief render packet which used in by mediapipline.
25 //! \details render packet provide the structures and generate the cmd buffer which mediapipline will used.
26 //!
27
28 #include "vp_render_l0_fc_kernel.h"
29
30 using namespace vp;
31
VpRenderL0FcKernel(PVP_MHWINTERFACE hwInterface,VpKernelID kernelID,uint32_t kernelIndex,PVpAllocator allocator)32 VpRenderL0FcKernel::VpRenderL0FcKernel(PVP_MHWINTERFACE hwInterface, VpKernelID kernelID, uint32_t kernelIndex, PVpAllocator allocator) : VpRenderKernelObj(hwInterface, kernelID, kernelIndex, "", allocator)
33 {
34 m_renderHal = hwInterface ? hwInterface->m_renderHal : nullptr;
35 m_kernelIndex = kernelIndex;
36
37 switch (kernelID)
38 {
39 case kernelL0FcCommon:
40 m_kernelName = "FastComp_fc_common";
41 break;
42 case kernelL0FcFP:
43 m_kernelName = "FastExpress_fc_fp";
44 break;
45 case kernelL0Fc444PL3Input:
46 m_kernelName = "ImageRead_fc_444PL3_input";
47 break;
48 default:
49 m_kernelName.assign("");
50 VP_RENDER_ASSERTMESSAGE("Kernel ID cannot map to Kernel Name");
51 break;
52 }
53 m_isAdvKernel = true;
54 m_useIndependentSamplerGroup = true;
55 m_kernelBinaryID = VP_ADV_KERNEL_BINARY_ID(kernelID);
56 }
57
~VpRenderL0FcKernel()58 VpRenderL0FcKernel::~VpRenderL0FcKernel()
59 {
60 MOS_SafeFreeMemory(m_curbe);
61 m_curbe = nullptr;
62 }
63
Init(VpRenderKernel & kernel)64 MOS_STATUS VpRenderL0FcKernel::Init(VpRenderKernel &kernel)
65 {
66 VP_FUNC_CALL();
67
68 VP_RENDER_NORMALMESSAGE("Initializing L0 FC krn %s", kernel.GetKernelName().c_str());
69
70 m_kernelSize = kernel.GetKernelSize();
71
72 uint8_t *pKernelBin = (uint8_t *)kernel.GetKernelBinPointer();
73 VP_RENDER_CHK_NULL_RETURN(pKernelBin);
74
75 m_kernelBinary = pKernelBin + kernel.GetKernelBinOffset();
76
77 m_kernelArgs.clear();
78 for (auto &arg : kernel.GetKernelArgs())
79 {
80 arg.pData = nullptr;
81 m_kernelArgs.insert(std::make_pair(arg.uIndex,arg));
82 }
83
84 m_kernelBtis = kernel.GetKernelBtis();
85
86 m_kernelEnv = kernel.GetKernelExeEnv();
87
88 m_curbeSize = kernel.GetCurbeSize();
89
90 m_inlineData.resize(m_kernelEnv.uInlineDataPayloadSize);
91
92 return MOS_STATUS_SUCCESS;
93 }
94
SetSamplerStates(KERNEL_SAMPLER_STATE_GROUP & samplerStateGroup)95 MOS_STATUS VpRenderL0FcKernel::SetSamplerStates(KERNEL_SAMPLER_STATE_GROUP &samplerStateGroup)
96 {
97 VP_FUNC_CALL();
98
99 if (m_kernelEnv.bHasSample)
100 {
101 samplerStateGroup.clear();
102
103 MHW_SAMPLER_STATE_PARAM samplerStateParam = {};
104 samplerStateParam.Unorm.SamplerFilterMode = MHW_SAMPLER_FILTER_BILINEAR;
105 samplerStateParam.Unorm.MagFilter = MHW_GFX3DSTATE_MAPFILTER_LINEAR;
106 samplerStateParam.Unorm.MinFilter = MHW_GFX3DSTATE_MAPFILTER_LINEAR;
107 samplerStateParam.Unorm.AddressU = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
108 samplerStateParam.Unorm.AddressV = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
109 samplerStateParam.Unorm.AddressW = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
110 samplerStateParam.bInUse = true;
111 samplerStateParam.SamplerType = MHW_SAMPLER_TYPE_3D;
112 if (m_linearSamplerIndex >= 0)
113 {
114 VP_RENDER_NORMALMESSAGE("Bilinear Sampler Set on Sampler Index %d", m_linearSamplerIndex);
115 samplerStateGroup.insert(std::make_pair(m_linearSamplerIndex, samplerStateParam));
116 }
117 else
118 {
119 VP_RENDER_NORMALMESSAGE("Bilinear Sampler NOT SET for Invalid Index %d", m_linearSamplerIndex);
120 }
121
122 samplerStateParam = {};
123 samplerStateParam.Unorm.SamplerFilterMode = MHW_SAMPLER_FILTER_NEAREST;
124 samplerStateParam.Unorm.MagFilter = MHW_GFX3DSTATE_MAPFILTER_NEAREST;
125 samplerStateParam.Unorm.MinFilter = MHW_GFX3DSTATE_MAPFILTER_NEAREST;
126 samplerStateParam.Unorm.AddressU = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
127 samplerStateParam.Unorm.AddressV = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
128 samplerStateParam.Unorm.AddressW = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
129 samplerStateParam.bInUse = true;
130 samplerStateParam.SamplerType = MHW_SAMPLER_TYPE_3D;
131 if (m_nearestSamplerIndex >= 0)
132 {
133 VP_RENDER_NORMALMESSAGE("Nearest Sampler Set on Sampler Index %d", m_nearestSamplerIndex);
134 samplerStateGroup.insert(std::make_pair(m_nearestSamplerIndex, samplerStateParam));
135 }
136 else
137 {
138 VP_RENDER_NORMALMESSAGE("Nearest Sampler NOT SET for Invalid Index %d", m_nearestSamplerIndex);
139 }
140 }
141
142 return MOS_STATUS_SUCCESS;
143 }
144
SetKernelArgs(KERNEL_ARGS & kernelArgs,VP_PACKET_SHARED_CONTEXT * sharedContext)145 MOS_STATUS VpRenderL0FcKernel::SetKernelArgs(KERNEL_ARGS &kernelArgs, VP_PACKET_SHARED_CONTEXT *sharedContext)
146 {
147 VP_FUNC_CALL();
148
149 //All pData will be free in VpL0FcFilter::Destroy so no need to free here
150 for (KRN_ARG &srcArg : kernelArgs)
151 {
152 auto handle = m_kernelArgs.find(srcArg.uIndex);
153
154 if (srcArg.eArgKind == ARG_KIND_GENERAL || srcArg.eArgKind == ARG_KIND_INLINE)
155 {
156 if (handle != m_kernelArgs.end())
157 {
158 KRN_ARG &dstArg = handle->second;
159 if (srcArg.pData == nullptr)
160 {
161 VP_RENDER_ASSERTMESSAGE("The Kernel Argument General Data is null! KernelID %d, argIndex %d", m_kernelId, dstArg.uIndex);
162 return MOS_STATUS_INVALID_PARAMETER;
163 }
164 else
165 {
166 dstArg.eArgKind = srcArg.eArgKind;
167 dstArg.pData = srcArg.pData;
168 srcArg.pData = nullptr;
169 }
170 }
171 }
172 else if (srcArg.eArgKind == ARG_KIND_SAMPLER)
173 {
174 if (handle != m_kernelArgs.end())
175 {
176 KRN_ARG &dstArg = handle->second;
177 if (srcArg.pData == nullptr)
178 {
179 VP_RENDER_ASSERTMESSAGE("The Kernel Argument Sampler Data is null! KernelID %d, argIndex %d", m_kernelId, dstArg.uIndex);
180 return MOS_STATUS_INVALID_PARAMETER;
181 }
182 else
183 {
184 if (*(uint32_t *)srcArg.pData == MHW_SAMPLER_FILTER_BILINEAR)
185 {
186 m_linearSamplerIndex = dstArg.uOffsetInPayload;
187 srcArg.pData = nullptr;
188 }
189 else if (*(uint32_t *)srcArg.pData == MHW_SAMPLER_FILTER_NEAREST)
190 {
191 m_nearestSamplerIndex = dstArg.uOffsetInPayload;
192 srcArg.pData = nullptr;
193 }
194 else
195 {
196 VP_RENDER_ASSERTMESSAGE("The Kernel Argument Sampler Data is INVALID TYPE! KernelID %d, argIndex %d, type %d", m_kernelId, dstArg.uIndex, *(uint32_t *)srcArg.pData);
197 return MOS_STATUS_INVALID_PARAMETER;
198 }
199 }
200 }
201 }
202
203 if (srcArg.pData != nullptr)
204 {
205 srcArg.pData = nullptr;
206 VP_RENDER_ASSERTMESSAGE("The Kernel Argument is set but not used. KernelID %d, argIndex %d", m_kernelId, srcArg.uIndex);
207 }
208 }
209
210 return MOS_STATUS_SUCCESS;
211 }
212
SetKernelStatefulSurfaces(KERNEL_ARG_INDEX_SURFACE_MAP & statefulSurfaces)213 MOS_STATUS VpRenderL0FcKernel::SetKernelStatefulSurfaces(KERNEL_ARG_INDEX_SURFACE_MAP& statefulSurfaces)
214 {
215 m_argIndexSurfMap = statefulSurfaces;
216 return MOS_STATUS_SUCCESS;
217 }
218
GetCurbeState(void * & curbe,uint32_t & curbeLength)219 MOS_STATUS VpRenderL0FcKernel::GetCurbeState(void *&curbe, uint32_t &curbeLength)
220 {
221 VP_FUNC_CALL();
222 curbeLength = m_curbeSize;
223
224 VP_RENDER_NORMALMESSAGE("KernelID %d, Curbe Size %d\n", m_kernelId, curbeLength);
225
226 if (curbeLength == 0)
227 {
228 return MOS_STATUS_INVALID_PARAMETER;
229 }
230
231 uint8_t *pCurbe = (uint8_t *)MOS_AllocAndZeroMemory(curbeLength);
232 VP_RENDER_CHK_NULL_RETURN(pCurbe);
233 MOS_FreeMemAndSetNull(m_curbe);
234 m_curbe = pCurbe;
235
236 for (auto &handle : m_kernelArgs)
237 {
238 KRN_ARG &arg = handle.second;
239 switch (arg.eArgKind)
240 {
241 case ARG_KIND_GENERAL:
242 case ARG_KIND_SURFACE:
243 if (arg.pData != nullptr)
244 {
245 MOS_SecureMemcpy(pCurbe + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize);
246 VP_RENDER_NORMALMESSAGE("Setting Curbe State KernelID %d, index %d , value %d, argKind %d", m_kernelId, arg.uIndex, *(uint32_t *)arg.pData, arg.eArgKind);
247 }
248 else
249 {
250 VP_RENDER_NORMALMESSAGE("KernelID %d, index %d, argKind %d is empty", m_kernelId, arg.uIndex, arg.eArgKind);
251 }
252 break;
253 case ARG_KIND_INLINE:
254 case ARG_KIND_SAMPLER:
255 break;
256 default:
257 VP_PUBLIC_CHK_STATUS_RETURN(MOS_STATUS_UNIMPLEMENTED);
258 }
259 }
260
261 curbe = pCurbe;
262
263 return MOS_STATUS_SUCCESS;
264 }
265
SetupSurfaceState()266 MOS_STATUS VpRenderL0FcKernel::SetupSurfaceState()
267 {
268 VP_FUNC_CALL();
269
270 KERNEL_SURFACE_STATE_PARAM kernelSurfaceParam;
271 m_surfaceState.clear();
272 for (auto it = m_kernelBtis.begin(); it != m_kernelBtis.end(); ++it)
273 {
274 uint32_t argIndex = it->first;
275 uint32_t bti = it->second;
276
277 VP_RENDER_NORMALMESSAGE("Setting Surface State for L0 FC. KernelID %d, layer %d, argIndex %d , bti %d", m_kernelId, m_kernelIndex, argIndex, bti);
278
279 MOS_ZeroMemory(&kernelSurfaceParam, sizeof(KERNEL_SURFACE_STATE_PARAM));
280 kernelSurfaceParam.surfaceOverwriteParams.updatedRenderSurfaces = true;
281 kernelSurfaceParam.surfaceOverwriteParams.bindedKernel = true;
282 PRENDERHAL_SURFACE_STATE_PARAMS pRenderSurfaceParams = &kernelSurfaceParam.surfaceOverwriteParams.renderSurfaceParams;
283 pRenderSurfaceParams->bAVS = false;
284 pRenderSurfaceParams->Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
285 pRenderSurfaceParams->b2PlaneNV12NeededByKernel = true;
286 pRenderSurfaceParams->forceCommonSurfaceMessage = true;
287 SurfaceType surfType = SurfaceTypeInvalid;
288 MOS_HW_RESOURCE_DEF resourceType = MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER;
289
290 auto surfHandle = m_argIndexSurfMap.find(argIndex);
291 VP_PUBLIC_CHK_NOT_FOUND_RETURN(surfHandle, &m_argIndexSurfMap);
292 if (surfHandle->second.combineChannelY)
293 {
294 pRenderSurfaceParams->combineChannelY = true;
295 }
296 surfType = surfHandle->second.surfType;
297 if (surfType == SurfaceTypeInvalid)
298 {
299 VP_RENDER_NORMALMESSAGE("Will skip surface argIndex %d, bti %d for it is set as invalid", argIndex, bti);
300 continue;
301 }
302 pRenderSurfaceParams->isOutput = surfHandle->second.isOutput;
303 if (m_surfaceState.find(surfType) != m_surfaceState.end())
304 {
305 UpdateCurbeBindingIndex(surfType, bti);
306 continue;
307 }
308 auto surf = m_surfaceGroup->find(surfType);
309 if (m_surfaceGroup->end() == surf)
310 {
311 VP_RENDER_ASSERTMESSAGE("surf was not found %d", surfType);
312 return MOS_STATUS_NULL_POINTER;
313 }
314 VP_RENDER_CHK_NULL_RETURN(surf->second);
315 VP_RENDER_CHK_NULL_RETURN(surf->second->osSurface);
316
317 pRenderSurfaceParams->MemObjCtl = (m_renderHal->pOsInterface->pfnCachePolicyGetMemoryObject(
318 resourceType,
319 m_renderHal->pOsInterface->pfnGetGmmClientContext(m_renderHal->pOsInterface)))
320 .DwordValue;
321 pRenderSurfaceParams->Component = COMPONENT_VPCommon;
322
323 if (m_kernelId == kernelL0FcCommon ||
324 m_kernelId == kernelL0FcFP)
325 {
326 kernelSurfaceParam.surfaceOverwriteParams.updatedSurfaceParams = true;
327 kernelSurfaceParam.surfaceOverwriteParams.format = surf->second->osSurface->Format;
328 kernelSurfaceParam.surfaceOverwriteParams.width = MOS_MIN(static_cast<uint16_t>(surf->second->osSurface->dwWidth), static_cast<uint16_t>(surf->second->rcSrc.right));
329 kernelSurfaceParam.surfaceOverwriteParams.height = MOS_MIN(static_cast<uint16_t>(surf->second->osSurface->dwHeight), static_cast<uint16_t>(surf->second->rcSrc.bottom));
330 }
331
332 if (surfHandle->second.needVerticalStirde)
333 {
334 switch (surf->second->SampleType)
335 {
336 case SAMPLE_INTERLEAVED_EVEN_FIRST_TOP_FIELD:
337 case SAMPLE_INTERLEAVED_ODD_FIRST_TOP_FIELD:
338 pRenderSurfaceParams->bVertStride = true;
339 pRenderSurfaceParams->bVertStrideOffs = 0;
340 break;
341 case SAMPLE_INTERLEAVED_EVEN_FIRST_BOTTOM_FIELD:
342 case SAMPLE_INTERLEAVED_ODD_FIRST_BOTTOM_FIELD:
343 pRenderSurfaceParams->bVertStride = true;
344 pRenderSurfaceParams->bVertStrideOffs = 1;
345 break;
346 default:
347 pRenderSurfaceParams->bVertStride = false;
348 pRenderSurfaceParams->bVertStrideOffs = 0;
349 break;
350 }
351 }
352
353 if (surf->second->SurfType == SURF_OUT_RENDERTARGET &&
354 (surf->second->osSurface->Format == Format_YUY2 ||
355 surf->second->osSurface->Format == Format_Y210 ||
356 surf->second->osSurface->Format == Format_Y216 ||
357 surf->second->osSurface->Format == Format_YUYV ||
358 surf->second->osSurface->Format == Format_YVYU ||
359 surf->second->osSurface->Format == Format_UYVY ||
360 surf->second->osSurface->Format == Format_VYUY))
361 {
362 pRenderSurfaceParams->bWidthInDword_Y = true;
363 }
364
365 if (surf->second->osSurface->Format == Format_Buffer)
366 {
367 kernelSurfaceParam.surfaceOverwriteParams.updatedSurfaceParams = true;
368 kernelSurfaceParam.surfaceOverwriteParams.bufferResource = true;
369 }
370
371 m_surfaceState.insert(std::make_pair(surfType, kernelSurfaceParam));
372
373 UpdateCurbeBindingIndex(surfType, bti);
374 }
375
376 return MOS_STATUS_SUCCESS;
377 }
378
GetWalkerSetting(KERNEL_WALKER_PARAMS & walkerParam,KERNEL_PACKET_RENDER_DATA & renderData)379 MOS_STATUS VpRenderL0FcKernel::GetWalkerSetting(KERNEL_WALKER_PARAMS &walkerParam, KERNEL_PACKET_RENDER_DATA &renderData)
380 {
381 VP_FUNC_CALL();
382
383 walkerParam = m_walkerParam;
384
385 walkerParam.iBindingTable = renderData.bindingTable;
386 walkerParam.iMediaID = renderData.mediaID;
387 walkerParam.iCurbeOffset = renderData.iCurbeOffset;
388 // Should use renderData.iCurbeLength instead of kernelSettings.CURBE_Length.
389 // kernelSettings.CURBE_Length is 32 aligned with 5 bits shift.
390 // renderData.iCurbeLength is RENDERHAL_CURBE_BLOCK_ALIGN(64) aligned.
391 walkerParam.iCurbeLength = renderData.iCurbeLength;
392
393 return MOS_STATUS_SUCCESS;
394 }
395
396 // Only for Adv kernels.
SetWalkerSetting(KERNEL_THREAD_SPACE & threadSpace,bool bSyncFlag,bool flushL1)397 MOS_STATUS VpRenderL0FcKernel::SetWalkerSetting(KERNEL_THREAD_SPACE &threadSpace, bool bSyncFlag, bool flushL1)
398 {
399 VP_FUNC_CALL();
400 MOS_ZeroMemory(&m_walkerParam, sizeof(KERNEL_WALKER_PARAMS));
401
402 m_walkerParam.iBlocksX = threadSpace.uWidth;
403 m_walkerParam.iBlocksY = threadSpace.uHeight;
404 m_walkerParam.threadWidth = threadSpace.uLocalWidth;
405 m_walkerParam.threadHeight = threadSpace.uLocalHeight;
406 m_walkerParam.threadDepth = 1;
407 m_walkerParam.isVerticalPattern = false;
408 m_walkerParam.bSyncFlag = bSyncFlag;
409
410 m_walkerParam.pipeControlParams.bUpdateNeeded = true;
411 m_walkerParam.pipeControlParams.bEnableDataPortFlush = true;
412 m_walkerParam.pipeControlParams.bUnTypedDataPortCacheFlush = true;
413 m_walkerParam.pipeControlParams.bFlushRenderTargetCache = false;
414 m_walkerParam.pipeControlParams.bInvalidateTextureCache = false;
415
416 for (auto &handle : m_kernelArgs)
417 {
418 KRN_ARG &arg = handle.second;
419 if (arg.eArgKind == ARG_KIND_INLINE)
420 {
421 if (arg.pData != nullptr)
422 {
423 MOS_SecureMemcpy(m_inlineData.data() + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize);
424 VP_RENDER_NORMALMESSAGE("Setting Inline Data KernelID %d, index %d , value %d, argKind %d", m_kernelId, arg.uIndex, *(uint32_t *)arg.pData, arg.eArgKind);
425 }
426 else
427 {
428 VP_RENDER_NORMALMESSAGE("KernelID %d, index %d, argKind %d is empty", m_kernelId, arg.uIndex, arg.eArgKind);
429 }
430 }
431 }
432 m_walkerParam.inlineDataLength = m_inlineData.size();
433 m_walkerParam.inlineData = m_inlineData.data();
434
435 m_walkerParam.slmSize = m_kernelEnv.uiSlmSize;
436 m_walkerParam.hasBarrier = (m_kernelEnv.uBarrierCount > 0);
437
438 if (m_kernelEnv.uSimdSize != 1)
439 {
440 m_walkerParam.isEmitInlineParameter = true;
441 m_walkerParam.isGenerateLocalID = true;
442 m_walkerParam.emitLocal = MHW_EMIT_LOCAL_XYZ;
443 }
444
445 return MOS_STATUS_SUCCESS;
446 }
447
SetKernelConfigs(KERNEL_CONFIGS & kernelConfigs)448 MOS_STATUS VpRenderL0FcKernel::SetKernelConfigs(KERNEL_CONFIGS& kernelConfigs)
449 {
450 VP_FUNC_CALL();
451
452 auto handle = kernelConfigs.find(m_kernelId);
453 VP_PUBLIC_CHK_NOT_FOUND_RETURN(handle, &kernelConfigs);
454
455 L0_FC_KERNEL_CONFIG *kernelConfig = (L0_FC_KERNEL_CONFIG *)handle->second;
456 VP_PUBLIC_CHK_NULL_RETURN(kernelConfig);
457
458 m_kernelConfig = *kernelConfig;
459
460 return MOS_STATUS_SUCCESS;
461 }
462
SetPerfTag()463 MOS_STATUS VpRenderL0FcKernel::SetPerfTag()
464 {
465 auto pOsInterface = m_hwInterface->m_osInterface;
466 VP_RENDER_CHK_NULL_RETURN(pOsInterface);
467 VP_RENDER_CHK_NULL_RETURN(pOsInterface->pfnSetPerfTag);
468
469 pOsInterface->pfnSetPerfTag(pOsInterface, m_kernelConfig.perfTag);
470
471 return MOS_STATUS_SUCCESS;
472 }