1 /*
2 * Copyright (c) 2024, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file vp_render_vebox_hdr_3dlut_l0_kernel.cpp
24 //! \brief render packet which used in by mediapipline.
25 //! \details render packet provide the structures and generate the cmd buffer which mediapipline will used.
26 //!
27 #include "vp_render_vebox_hdr_3dlut_l0_kernel.h"
28 #include "vp_dumper.h"
29 #include "vp_kernelset.h"
30
31 using namespace vp;
32
33 static const float ccm_identity[12] = {1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f};
34 static float color_matrix_calculation[3][4] = {0.0f};
35
36 template <typename T>
mat_3by3_inv(T * m1,T * m2)37 void mat_3by3_inv(T *m1, T *m2)
38 {
39 double a11, a12, a13, a21, a22, a23, a31, a32, a33;
40 double det, det_inv;
41
42 a11 = *m1;
43 a12 = *(m1 + 1);
44 a13 = *(m1 + 2);
45 a21 = *(m1 + 3);
46 a22 = *(m1 + 4);
47 a23 = *(m1 + 5);
48 a31 = *(m1 + 6);
49 a32 = *(m1 + 7);
50 a33 = *(m1 + 8);
51
52 det = a11 * (a33 * a22 - a32 * a23) - a21 * (a33 * a12 - a32 * a13) + a31 * (a12 * a23 - a13 * a22);
53
54 det_inv = 1 / det;
55
56 *m2 = (float)(det_inv * (a33 * a22 - a32 * a23));
57 *(m2 + 1) = (float)(-det_inv * (a33 * a12 - a32 * a13));
58 *(m2 + 2) = (float)(det_inv * (a12 * a23 - a13 * a22));
59 *(m2 + 3) = (float)(-det_inv * (a33 * a21 - a31 * a23));
60 *(m2 + 4) = (float)(det_inv * (a33 * a11 - a31 * a13));
61 *(m2 + 5) = (float)(-det_inv * (a23 * a11 - a21 * a13));
62 *(m2 + 6) = (float)(det_inv * (a32 * a21 - a31 * a22));
63 *(m2 + 7) = (float)(-det_inv * (a32 * a11 - a31 * a12));
64 *(m2 + 8) = (float)(det_inv * (a22 * a11 - a21 * a12));
65 }
66
67 template <typename T>
mat_mul_float(T * m1,T * m2,T * m3,short m_size,short n_size,short l_size)68 void mat_mul_float(T *m1, T *m2, T *m3, short m_size, short n_size, short l_size)
69 {
70 int i, j, k, Tjn, Tjl, off3;
71 float *m1_offs, *m2_offs, *m2_k_offs, *m3_offs;
72
73 for (j = 0; j < m_size; j++)
74 {
75 Tjn = j * n_size;
76 Tjl = j * l_size;
77 for (k = 0; k < l_size; k++)
78 {
79 off3 = Tjl + k;
80 m3_offs = m3 + off3;
81 *m3_offs = 0;
82 m2_k_offs = m2 + k;
83 m1_offs = m1 + Tjn;
84 for (i = 0; i < n_size; i++)
85 {
86 m2_offs = m2_k_offs + i * l_size;
87 *m3_offs += *m1_offs * *m2_offs;
88 m1_offs++;
89 }
90 }
91 }
92 }
CalcCCMMatrix()93 static void CalcCCMMatrix()
94 {
95 float mat_rgb2xyz_src[3][3], mat_rgb2xyz_dst[3][3];
96 float mat_xyz[3][3], mat_invxyz[3][3];
97 float matW[3];
98 float matE[3] = {};
99
100 float fccmSrcZR, fccmSrcZG, fccmSrcZB, fccmSrcZW;
101 float fccmDstZR, fccmDstZG, fccmDstZB, fccmDstZW;
102
103 const float fccmSrcXR = 0.708000000000000f;
104 const float fccmSrcYR = 0.292000000000000f;
105 const float fccmSrcXG = 0.170000000000000f;
106 const float fccmSrcYG = 0.797000000000000f;
107 const float fccmSrcXB = 0.131000000000000f;
108 const float fccmSrcYB = 0.046000000000000f;
109 const float fccmSrcXW = 0.312700000000000f;
110 const float fccmSrcYW = 0.329000000000000f;
111 const float fccmDstXR = 0.640000000000000f;
112 const float fccmDstYR = 0.330000000000000f;
113 const float fccmDstXG = 0.300000000000000f;
114 const float fccmDstYG = 0.600000000000000f;
115 const float fccmDstXB = 0.150000000000000f;
116 const float fccmDstYB = 0.060000000000000f;
117 const float fccmDstXW = 0.312700000000000f;
118 const float FccmDstYW = 0.329000000000000f;
119
120 fccmSrcZR = 1 - fccmSrcXR - fccmSrcYR;
121 fccmSrcZG = 1 - fccmSrcXG - fccmSrcYG;
122 fccmSrcZB = 1 - fccmSrcXB - fccmSrcYB;
123 fccmSrcZW = 1 - fccmSrcXW - fccmSrcYW;
124
125 mat_xyz[0][0] = fccmSrcXR;
126 mat_xyz[0][1] = fccmSrcXG;
127 mat_xyz[0][2] = fccmSrcXB;
128 mat_xyz[1][0] = fccmSrcYR;
129 mat_xyz[1][1] = fccmSrcYG;
130 mat_xyz[1][2] = fccmSrcYB;
131 mat_xyz[2][0] = fccmSrcZR;
132 mat_xyz[2][1] = fccmSrcZG;
133 mat_xyz[2][2] = fccmSrcZB;
134
135 mat_3by3_inv(mat_xyz[0], mat_invxyz[0]);
136
137 matW[0] = fccmSrcXW / fccmSrcYW;
138 matW[1] = 1;
139 matW[2] = fccmSrcZW / fccmSrcYW;
140
141 mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1);
142
143 mat_rgb2xyz_src[0][0] = matE[0] * fccmSrcXR;
144 mat_rgb2xyz_src[1][0] = matE[0] * fccmSrcYR;
145 mat_rgb2xyz_src[2][0] = matE[0] * fccmSrcZR;
146 mat_rgb2xyz_src[0][1] = matE[1] * fccmSrcXG;
147 mat_rgb2xyz_src[1][1] = matE[1] * fccmSrcYG;
148 mat_rgb2xyz_src[2][1] = matE[1] * fccmSrcZG;
149 mat_rgb2xyz_src[0][2] = matE[2] * fccmSrcXB;
150 mat_rgb2xyz_src[1][2] = matE[2] * fccmSrcYB;
151 mat_rgb2xyz_src[2][2] = matE[2] * fccmSrcZB;
152
153 fccmDstZR = 1 - fccmDstXR - fccmDstYR;
154 fccmDstZG = 1 - fccmDstXG - fccmDstYG;
155 fccmDstZB = 1 - fccmDstXB - fccmDstYB;
156 fccmDstZW = 1 - fccmDstXW - FccmDstYW;
157
158 mat_xyz[0][0] = fccmDstXR;
159 mat_xyz[0][1] = fccmDstXG;
160 mat_xyz[0][2] = fccmDstXB;
161 mat_xyz[1][0] = fccmDstYR;
162 mat_xyz[1][1] = fccmDstYG;
163 mat_xyz[1][2] = fccmDstYB;
164 mat_xyz[2][0] = fccmDstZR;
165 mat_xyz[2][1] = fccmDstZG;
166 mat_xyz[2][2] = fccmDstZB;
167
168 mat_3by3_inv(mat_xyz[0], mat_invxyz[0]);
169
170 matW[0] = fccmDstXW / FccmDstYW;
171 matW[1] = 1;
172 matW[2] = fccmDstZW / FccmDstYW;
173
174 mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1);
175
176 mat_rgb2xyz_dst[0][0] = matE[0] * fccmDstXR;
177 mat_rgb2xyz_dst[1][0] = matE[0] * fccmDstYR;
178 mat_rgb2xyz_dst[2][0] = matE[0] * fccmDstZR;
179 mat_rgb2xyz_dst[0][1] = matE[1] * fccmDstXG;
180 mat_rgb2xyz_dst[1][1] = matE[1] * fccmDstYG;
181 mat_rgb2xyz_dst[2][1] = matE[1] * fccmDstZG;
182 mat_rgb2xyz_dst[0][2] = matE[2] * fccmDstXB;
183 mat_rgb2xyz_dst[1][2] = matE[2] * fccmDstYB;
184 mat_rgb2xyz_dst[2][2] = matE[2] * fccmDstZB;
185
186 float mat_invrgb2xyz_dst[3][3];
187 mat_3by3_inv(mat_rgb2xyz_dst[0], mat_invrgb2xyz_dst[0]);
188
189 float CCMmat[3][3];
190 mat_mul_float(mat_invrgb2xyz_dst[0], mat_rgb2xyz_src[0], CCMmat[0], 3, 3, 3);
191
192 color_matrix_calculation[0][0] = CCMmat[0][0];
193 color_matrix_calculation[0][1] = CCMmat[0][1];
194 color_matrix_calculation[0][2] = CCMmat[0][2];
195 color_matrix_calculation[0][3] = 0.0f;
196 color_matrix_calculation[1][0] = CCMmat[1][0];
197 color_matrix_calculation[1][1] = CCMmat[1][1];
198 color_matrix_calculation[1][2] = CCMmat[1][2];
199 color_matrix_calculation[1][3] = 0.0f;
200 color_matrix_calculation[2][0] = CCMmat[2][0];
201 color_matrix_calculation[2][1] = CCMmat[2][1];
202 color_matrix_calculation[2][2] = CCMmat[2][2];
203 color_matrix_calculation[2][3] = 0.0f;
204 }
205
206 // Used by L0 kernel
VpRenderHdr3DLutL0Kernel(PVP_MHWINTERFACE hwInterface,PVpAllocator allocator)207 VpRenderHdr3DLutL0Kernel::VpRenderHdr3DLutL0Kernel(PVP_MHWINTERFACE hwInterface, PVpAllocator allocator) :
208 VpRenderKernelObj(hwInterface, (VpKernelID)kernelHdr3DLutCalcL0, 0, VP_HDR_KERNEL_NAME_L0_3DLUT, allocator)
209 {
210 VP_FUNC_CALL();
211 m_kernelBinaryID = VP_ADV_KERNEL_BINARY_ID(kernelHdr3DLutCalcL0);
212 m_isAdvKernel = true;
213 }
214
~VpRenderHdr3DLutL0Kernel()215 VpRenderHdr3DLutL0Kernel::~VpRenderHdr3DLutL0Kernel()
216 {
217 MOS_SafeFreeMemory(m_curbe);
218 m_curbe = nullptr;
219 }
220
Init(VpRenderKernel & kernel)221 MOS_STATUS VpRenderHdr3DLutL0Kernel::Init(VpRenderKernel &kernel)
222 {
223 VP_FUNC_CALL();
224
225 VP_RENDER_NORMALMESSAGE("Initializing SR krn %s", kernel.GetKernelName().c_str());
226
227 m_kernelSize = kernel.GetKernelSize();
228
229 uint8_t *pKernelBin = (uint8_t *)kernel.GetKernelBinPointer();
230 VP_RENDER_CHK_NULL_RETURN(pKernelBin);
231
232 m_kernelBinary = pKernelBin + kernel.GetKernelBinOffset();
233
234 m_kernelArgs = kernel.GetKernelArgs();
235
236 for (auto arg : m_kernelArgs)
237 {
238 arg.pData = nullptr;
239 }
240
241 m_kernelBtis = kernel.GetKernelBtis();
242
243 m_kernelEnv = kernel.GetKernelExeEnv();
244
245 m_curbeSize = kernel.GetCurbeSize();
246
247 return MOS_STATUS_SUCCESS;
248 }
249
250
SetupSurfaceState()251 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetupSurfaceState()
252 {
253 VP_FUNC_CALL();
254 VP_RENDER_CHK_STATUS_RETURN(InitCoefSurface(m_maxDisplayLum, m_maxContentLevelLum, m_hdrMode));
255
256 return MOS_STATUS_SUCCESS;
257 }
258
CpPrepareResources()259 MOS_STATUS VpRenderHdr3DLutL0Kernel::CpPrepareResources()
260 {
261 VP_FUNC_CALL();
262
263 PMOS_RESOURCE source[VPHAL_MAX_SOURCES] = {nullptr};
264 PMOS_RESOURCE target[VPHAL_MAX_TARGETS] = {nullptr};
265
266 if ((nullptr != m_hwInterface->m_osInterface) &&
267 (nullptr != m_hwInterface->m_osInterface->osCpInterface))
268 {
269 auto it = m_surfaceGroup->find(SurfaceType3DLutCoef);
270 VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
271 VP_RENDER_CHK_NULL_RETURN(surf);
272 source[0] = &(surf->osSurface->OsResource);
273
274 it = m_surfaceGroup->find(SurfaceType3DLut);
275 surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
276 VP_RENDER_CHK_NULL_RETURN(surf);
277
278 target[0] = &(surf->osSurface->OsResource);
279 m_hwInterface->m_osInterface->osCpInterface->PrepareResources((void **)source, 1, (void **)target, 1);
280 }
281 return MOS_STATUS_SUCCESS;
282 }
283
SetupStatelessBuffer()284 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetupStatelessBuffer()
285 {
286 VP_FUNC_CALL();
287 m_statelessArray.clear();
288 VP_RENDER_CHK_STATUS_RETURN(SetupStatelessBufferResource(SurfaceType3DLutCoef));
289 VP_RENDER_CHK_STATUS_RETURN(SetupStatelessBufferResource(SurfaceType3DLut));
290 return MOS_STATUS_SUCCESS;
291 }
292
GetCurbeState(void * & curbe,uint32_t & curbeLength)293 MOS_STATUS VpRenderHdr3DLutL0Kernel::GetCurbeState(void *&curbe, uint32_t &curbeLength)
294 {
295 VP_FUNC_CALL();
296 curbeLength = m_curbeSize;
297
298 VP_RENDER_NORMALMESSAGE("KernelID %d, Curbe Size %d\n", m_kernelId, curbeLength);
299 if (curbeLength == 0)
300 {
301 return MOS_STATUS_INVALID_PARAMETER;
302 }
303
304 uint8_t *pCurbe = (uint8_t *)MOS_AllocAndZeroMemory(curbeLength);
305 VP_RENDER_CHK_NULL_RETURN(pCurbe);
306 MOS_FreeMemAndSetNull(m_curbe);
307 m_curbe = pCurbe;
308
309 for (auto &arg : m_kernelArgs)
310 {
311 if (arg.eArgKind == ARG_KIND_GENERAL)
312 {
313 if (arg.pData != nullptr)
314 {
315 MOS_SecureMemcpy(pCurbe + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize);
316 VP_RENDER_NORMALMESSAGE("Setting Curbe State KernelID %d, index %d , value %d, argKind %d", m_kernelId, arg.uIndex, *(uint32_t *)arg.pData, arg.eArgKind);
317 }
318 else
319 {
320 VP_RENDER_NORMALMESSAGE("KernelID %d, index %d, argKind %d is empty", m_kernelId, arg.uIndex, arg.eArgKind);
321 }
322 }
323 else if (arg.eArgKind == ARG_KIND_SURFACE)
324 {
325 if (arg.addressMode == AddressingModeStateless && arg.pData != nullptr)
326 {
327 for (uint32_t idx = 0; idx < arg.uSize / sizeof(SurfaceType); idx++)
328 {
329 uint32_t *pSurfaceindex = (uint32_t *)(arg.pData) + idx;
330 SurfaceType surf = (SurfaceType)*pSurfaceindex;
331
332 if (surf != SurfaceTypeInvalid)
333 {
334 auto it = m_statelessArray.find(surf);
335 uint64_t ui64GfxAddress = (m_statelessArray.end() != it) ? it->second : 0xFFFF;
336 *((uint64_t *)(pCurbe + arg.uOffsetInPayload) + idx) = ui64GfxAddress;
337 break;
338 }
339 else
340 {
341 *((uint64_t *)(pCurbe + arg.uOffsetInPayload) + idx) = 0xFFFF;
342 }
343 }
344 }
345 }
346 else if (arg.eArgKind == ARG_KIND_INLINE)
347 {
348 VP_RENDER_NORMALMESSAGE("Skip inline data here");
349 }
350 else
351 {
352 return MOS_STATUS_UNIMPLEMENTED;
353 }
354 }
355
356 curbe = pCurbe;
357
358 return MOS_STATUS_SUCCESS;
359 }
360
361
GetWalkerSetting(KERNEL_WALKER_PARAMS & walkerParam,KERNEL_PACKET_RENDER_DATA & renderData)362 MOS_STATUS VpRenderHdr3DLutL0Kernel::GetWalkerSetting(KERNEL_WALKER_PARAMS &walkerParam, KERNEL_PACKET_RENDER_DATA &renderData)
363 {
364
365 VP_FUNC_CALL();
366
367 walkerParam = m_walkerParam;
368 walkerParam.iBindingTable = renderData.bindingTable;
369 walkerParam.iMediaID = renderData.mediaID;
370 walkerParam.iCurbeOffset = renderData.iCurbeOffset;
371 // Should use renderData.iCurbeLength instead of kernelSettings.CURBE_Length.
372 // kernelSettings.CURBE_Length is 32 aligned with 5 bits shift.
373 // renderData.iCurbeLength is RENDERHAL_CURBE_BLOCK_ALIGN(64) aligned.
374 walkerParam.iCurbeLength = renderData.iCurbeLength;
375 return MOS_STATUS_SUCCESS;
376 }
377
378 // Only for Adv kernels.
SetWalkerSetting(KERNEL_THREAD_SPACE & threadSpace,bool bSyncFlag,bool flushL1)379 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetWalkerSetting(KERNEL_THREAD_SPACE &threadSpace, bool bSyncFlag, bool flushL1)
380 {
381 VP_FUNC_CALL();
382 MOS_ZeroMemory(&m_walkerParam, sizeof(KERNEL_WALKER_PARAMS));
383
384 m_walkerParam.iBlocksX = threadSpace.uWidth;
385 m_walkerParam.iBlocksY = threadSpace.uHeight;
386 m_walkerParam.threadWidth = threadSpace.uLocalWidth;
387 m_walkerParam.threadHeight = threadSpace.uLocalHeight;
388 m_walkerParam.threadDepth = 1;
389 m_walkerParam.isVerticalPattern = false;
390 m_walkerParam.bSyncFlag = bSyncFlag;
391
392 m_walkerParam.pipeControlParams.bUpdateNeeded = true;
393 m_walkerParam.pipeControlParams.bEnableDataPortFlush = true;
394 m_walkerParam.pipeControlParams.bUnTypedDataPortCacheFlush = true;
395 m_walkerParam.pipeControlParams.bFlushRenderTargetCache = false;
396 m_walkerParam.pipeControlParams.bInvalidateTextureCache = false;
397
398 for (auto &arg : m_kernelArgs)
399 {
400 if (arg.eArgKind == ARG_KIND_INLINE)
401 {
402 if (arg.pData != nullptr)
403 {
404 MOS_SecureMemcpy(m_inlineData + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize);
405 VP_RENDER_NORMALMESSAGE("Setting Inline Data KernelID %d, index %d , value %d, argKind %d", m_kernelId, arg.uIndex, *(uint32_t *)arg.pData, arg.eArgKind);
406 }
407 else
408 {
409 VP_RENDER_NORMALMESSAGE("KernelID %d, index %d, argKind %d is empty", m_kernelId, arg.uIndex, arg.eArgKind);
410 }
411 }
412 }
413 m_walkerParam.inlineDataLength = sizeof(m_inlineData);
414 m_walkerParam.inlineData = m_inlineData;
415
416 if (m_kernelEnv.uSimdSize != 1 &&
417 (m_kernelEnv.uiWorkGroupWalkOrderDimensions[0] != 0 ||
418 m_kernelEnv.uiWorkGroupWalkOrderDimensions[1] != 0 ||
419 m_kernelEnv.uiWorkGroupWalkOrderDimensions[2] != 0))
420 {
421 m_walkerParam.isEmitInlineParameter = true;
422 m_walkerParam.isGenerateLocalID = true;
423 m_walkerParam.emitLocal = MHW_EMIT_LOCAL_XYZ;
424 }
425
426 return MOS_STATUS_SUCCESS;
427 }
428
InitCoefSurface(const uint32_t maxDLL,const uint32_t maxCLL,const VPHAL_HDR_MODE hdrMode)429 MOS_STATUS VpRenderHdr3DLutL0Kernel::InitCoefSurface(const uint32_t maxDLL, const uint32_t maxCLL, const VPHAL_HDR_MODE hdrMode)
430 {
431 VP_FUNC_CALL();
432 float *hdrcoefBuffer = nullptr;
433 int32_t oetfCurve = 0, tmMode = 0, tmSrcType = 0;
434 float *ccmMatrix = m_ccmMatrix;
435 float tmMaxCLL = 0.0f, tmMaxDLL = 0.0f;
436
437 MOS_ZeroMemory(m_ccmMatrix, sizeof(m_ccmMatrix));
438
439 // Get surface addr
440 auto it = m_surfaceGroup->find(SurfaceType3DLutCoef);
441 VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
442 VP_RENDER_CHK_NULL_RETURN(surf);
443
444 tmMaxCLL = (float)maxCLL;
445 tmMaxDLL = (float)maxDLL;
446
447 // Lock surface
448 uint8_t *lockedAddr = (uint8_t *)m_allocator->LockResourceForWrite(&surf->osSurface->OsResource);
449
450 VP_RENDER_CHK_NULL_RETURN(lockedAddr);
451
452 hdrcoefBuffer = (float *)lockedAddr;
453
454 if (hdrMode == VPHAL_HDR_MODE_TONE_MAPPING) // H2S
455 {
456 CalcCCMMatrix();
457 MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, color_matrix_calculation, sizeof(float) * 12);
458
459 tmMode = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2S;
460 oetfCurve = (OETF_CURVE_TYPE)OETF_SRGB;
461 tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709;
462 }
463 else // H2H
464 {
465 MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, ccm_identity, sizeof(float) * 12);
466
467 tmMode = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2H;
468 oetfCurve = (OETF_CURVE_TYPE)OETF_CURVE_HDR_2084;
469 tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709;
470 }
471
472 // Fill Coefficient Surface: Media kernel define the layout of coefficients. Please don't change it.
473 const uint32_t pos_coef[17] = {7, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 54, 55, 62, 63};
474
475 // OETF curve
476 ((int *)hdrcoefBuffer)[pos_coef[0]] = oetfCurve;
477 // CCM
478 for (uint32_t i = 0; i < VP_CCM_MATRIX_SIZE; ++i)
479 {
480 hdrcoefBuffer[pos_coef[i + 1]] = ccmMatrix[i];
481 }
482 // TM Source Type
483 ((int *)hdrcoefBuffer)[pos_coef[13]] = tmSrcType;
484 // TM Mode
485 ((int *)hdrcoefBuffer)[pos_coef[14]] = tmMode;
486 // Max CLL and DLL
487 hdrcoefBuffer[pos_coef[15]] = tmMaxCLL;
488 hdrcoefBuffer[pos_coef[16]] = tmMaxDLL;
489
490 //Unlock
491 VP_RENDER_CHK_STATUS_RETURN(m_allocator->UnLock(&surf->osSurface->OsResource));
492
493 return MOS_STATUS_SUCCESS;
494 }
495
SetKernelConfigs(KERNEL_CONFIGS & kernelConfigs)496 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetKernelConfigs(KERNEL_CONFIGS &kernelConfigs)
497 {
498 VP_FUNC_CALL();
499 auto it = kernelConfigs.find((VpKernelID)kernelHdr3DLutCalcL0);
500
501 if (kernelConfigs.end() == it || nullptr == it->second)
502 {
503 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
504 }
505
506 PRENDER_HDR_3DLUT_CAL_PARAMS params = (PRENDER_HDR_3DLUT_CAL_PARAMS)it->second;
507
508 if (m_maxDisplayLum == params->maxDisplayLum && m_maxContentLevelLum == params->maxContentLevelLum &&
509 m_hdrMode == params->hdrMode && m_hdrLutSize == params->threadWidth)
510 {
511 // For such case, 3DLut calculation should be skipped in Policy::GetHdrExecutionCaps.
512 VP_RENDER_ASSERTMESSAGE("No change in 3D Lut parameters!");
513 }
514 else
515 {
516 m_maxDisplayLum = params->maxDisplayLum;
517 m_maxContentLevelLum = params->maxContentLevelLum;
518 m_hdrMode = params->hdrMode;
519 m_hdrLutSize = params->threadWidth;
520 VP_RENDER_NORMALMESSAGE("Maximum Display Luminance %d, Maximum Content Level Luminance %d, HDR mode %d, Lut size %d",
521 m_maxDisplayLum,
522 m_maxContentLevelLum,
523 m_hdrMode,
524 m_hdrLutSize);
525 }
526
527 return MOS_STATUS_SUCCESS;
528 }
529
530
531
SetKernelArgs(KERNEL_ARGS & kernelArgs,VP_PACKET_SHARED_CONTEXT * sharedContext)532 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetKernelArgs(KERNEL_ARGS &kernelArgs, VP_PACKET_SHARED_CONTEXT *sharedContext)
533 {
534 VP_FUNC_CALL();
535
536 //All pData will be free in VpSrFilter::Destroy so no need to free here
537 for (KRN_ARG &srcArg : kernelArgs)
538 {
539 for (KRN_ARG &dstArg : m_kernelArgs)
540 {
541 if (srcArg.uIndex == dstArg.uIndex)
542 {
543 if (dstArg.eArgKind == ARG_KIND_GENERAL || dstArg.eArgKind == ARG_KIND_INLINE || srcArg.eArgKind == ARG_KIND_SURFACE)
544 {
545 if (srcArg.pData == nullptr)
546 {
547 VP_RENDER_ASSERTMESSAGE("The Kernel Argument General Data is null! KernelID %d, argIndex %d", m_kernelId, dstArg.uIndex);
548 return MOS_STATUS_INVALID_PARAMETER;
549 }
550 else
551 {
552 dstArg.eArgKind = srcArg.eArgKind;
553 dstArg.pData = srcArg.pData;
554 srcArg.pData = nullptr;
555 }
556 }
557 }
558 }
559
560 if (srcArg.pData != nullptr)
561 {
562 srcArg.pData = nullptr;
563 VP_RENDER_ASSERTMESSAGE("The Kernel Argument is set but not used. KernelID %d, argIndex %d", m_kernelId, srcArg.uIndex);
564 }
565 }
566
567 return MOS_STATUS_SUCCESS;
568 }
569
DumpSurfaces()570 void VpRenderHdr3DLutL0Kernel::DumpSurfaces()
571 {
572 VP_FUNC_CALL();
573 for (auto &arg : m_kernelArgs)
574 {
575 if (arg.eArgKind == ARG_KIND_SURFACE)
576 {
577 for (uint32_t idx = 0; idx < arg.uSize / (sizeof(SurfaceType) * 2); idx++)
578 {
579 uint32_t *pSurfaceindex = (uint32_t *)(arg.pData) + idx;
580 SurfaceType surfType = (SurfaceType)*pSurfaceindex;
581 if (surfType == SurfaceTypeInvalid)
582 {
583 VP_RENDER_ASSERTMESSAGE("Surf type was invalid");
584 return;
585 }
586 auto surf = m_surfaceGroup->find(surfType);
587 if (m_surfaceGroup->end() == surf)
588 {
589 VP_RENDER_ASSERTMESSAGE("Surf was not found");
590 return;
591 }
592
593 char bufName[MAX_PATH] = {};
594
595 MOS_SecureStringPrint(
596 bufName,
597 MAX_PATH,
598 sizeof(bufName),
599 "k_%d_%s_argi_%d",
600 m_kernelIndex,
601 m_kernelName.c_str(),
602 idx);
603
604 if (surf->second == nullptr)
605 {
606 return;
607 }
608 if (surf->second->osSurface == nullptr)
609 {
610 return;
611 }
612
613 DumpSurface(surf->second, bufName);
614
615 }
616 }
617 }
618
619 return;
620 }