1 /*
2 * Copyright (c) 2024, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     vp_render_vebox_hdr_3dlut_l0_kernel.cpp
24 //! \brief    render packet which used in by mediapipline.
25 //! \details  render packet provide the structures and generate the cmd buffer which mediapipline will used.
26 //!
27 #include "vp_render_vebox_hdr_3dlut_l0_kernel.h"
28 #include "vp_dumper.h"
29 #include "vp_kernelset.h"
30 
31 using namespace vp;
32 
33 static const float ccm_identity[12]               = {1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f};
34 static float       color_matrix_calculation[3][4] = {0.0f};
35 
36 template <typename T>
mat_3by3_inv(T * m1,T * m2)37 void mat_3by3_inv(T *m1, T *m2)
38 {
39     double a11, a12, a13, a21, a22, a23, a31, a32, a33;
40     double det, det_inv;
41 
42     a11 = *m1;
43     a12 = *(m1 + 1);
44     a13 = *(m1 + 2);
45     a21 = *(m1 + 3);
46     a22 = *(m1 + 4);
47     a23 = *(m1 + 5);
48     a31 = *(m1 + 6);
49     a32 = *(m1 + 7);
50     a33 = *(m1 + 8);
51 
52     det = a11 * (a33 * a22 - a32 * a23) - a21 * (a33 * a12 - a32 * a13) + a31 * (a12 * a23 - a13 * a22);
53 
54     det_inv = 1 / det;
55 
56     *m2       = (float)(det_inv * (a33 * a22 - a32 * a23));
57     *(m2 + 1) = (float)(-det_inv * (a33 * a12 - a32 * a13));
58     *(m2 + 2) = (float)(det_inv * (a12 * a23 - a13 * a22));
59     *(m2 + 3) = (float)(-det_inv * (a33 * a21 - a31 * a23));
60     *(m2 + 4) = (float)(det_inv * (a33 * a11 - a31 * a13));
61     *(m2 + 5) = (float)(-det_inv * (a23 * a11 - a21 * a13));
62     *(m2 + 6) = (float)(det_inv * (a32 * a21 - a31 * a22));
63     *(m2 + 7) = (float)(-det_inv * (a32 * a11 - a31 * a12));
64     *(m2 + 8) = (float)(det_inv * (a22 * a11 - a21 * a12));
65 }
66 
67 template <typename T>
mat_mul_float(T * m1,T * m2,T * m3,short m_size,short n_size,short l_size)68 void mat_mul_float(T *m1, T *m2, T *m3, short m_size, short n_size, short l_size)
69 {
70     int    i, j, k, Tjn, Tjl, off3;
71     float *m1_offs, *m2_offs, *m2_k_offs, *m3_offs;
72 
73     for (j = 0; j < m_size; j++)
74     {
75         Tjn = j * n_size;
76         Tjl = j * l_size;
77         for (k = 0; k < l_size; k++)
78         {
79             off3      = Tjl + k;
80             m3_offs   = m3 + off3;
81             *m3_offs  = 0;
82             m2_k_offs = m2 + k;
83             m1_offs   = m1 + Tjn;
84             for (i = 0; i < n_size; i++)
85             {
86                 m2_offs = m2_k_offs + i * l_size;
87                 *m3_offs += *m1_offs * *m2_offs;
88                 m1_offs++;
89             }
90         }
91     }
92 }
CalcCCMMatrix()93 static void CalcCCMMatrix()
94 {
95     float mat_rgb2xyz_src[3][3], mat_rgb2xyz_dst[3][3];
96     float mat_xyz[3][3], mat_invxyz[3][3];
97     float matW[3];
98     float matE[3] = {};
99 
100     float fccmSrcZR, fccmSrcZG, fccmSrcZB, fccmSrcZW;
101     float fccmDstZR, fccmDstZG, fccmDstZB, fccmDstZW;
102 
103     const float fccmSrcXR = 0.708000000000000f;
104     const float fccmSrcYR = 0.292000000000000f;
105     const float fccmSrcXG = 0.170000000000000f;
106     const float fccmSrcYG = 0.797000000000000f;
107     const float fccmSrcXB = 0.131000000000000f;
108     const float fccmSrcYB = 0.046000000000000f;
109     const float fccmSrcXW = 0.312700000000000f;
110     const float fccmSrcYW = 0.329000000000000f;
111     const float fccmDstXR = 0.640000000000000f;
112     const float fccmDstYR = 0.330000000000000f;
113     const float fccmDstXG = 0.300000000000000f;
114     const float fccmDstYG = 0.600000000000000f;
115     const float fccmDstXB = 0.150000000000000f;
116     const float fccmDstYB = 0.060000000000000f;
117     const float fccmDstXW = 0.312700000000000f;
118     const float FccmDstYW = 0.329000000000000f;
119 
120     fccmSrcZR = 1 - fccmSrcXR - fccmSrcYR;
121     fccmSrcZG = 1 - fccmSrcXG - fccmSrcYG;
122     fccmSrcZB = 1 - fccmSrcXB - fccmSrcYB;
123     fccmSrcZW = 1 - fccmSrcXW - fccmSrcYW;
124 
125     mat_xyz[0][0] = fccmSrcXR;
126     mat_xyz[0][1] = fccmSrcXG;
127     mat_xyz[0][2] = fccmSrcXB;
128     mat_xyz[1][0] = fccmSrcYR;
129     mat_xyz[1][1] = fccmSrcYG;
130     mat_xyz[1][2] = fccmSrcYB;
131     mat_xyz[2][0] = fccmSrcZR;
132     mat_xyz[2][1] = fccmSrcZG;
133     mat_xyz[2][2] = fccmSrcZB;
134 
135     mat_3by3_inv(mat_xyz[0], mat_invxyz[0]);
136 
137     matW[0] = fccmSrcXW / fccmSrcYW;
138     matW[1] = 1;
139     matW[2] = fccmSrcZW / fccmSrcYW;
140 
141     mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1);
142 
143     mat_rgb2xyz_src[0][0] = matE[0] * fccmSrcXR;
144     mat_rgb2xyz_src[1][0] = matE[0] * fccmSrcYR;
145     mat_rgb2xyz_src[2][0] = matE[0] * fccmSrcZR;
146     mat_rgb2xyz_src[0][1] = matE[1] * fccmSrcXG;
147     mat_rgb2xyz_src[1][1] = matE[1] * fccmSrcYG;
148     mat_rgb2xyz_src[2][1] = matE[1] * fccmSrcZG;
149     mat_rgb2xyz_src[0][2] = matE[2] * fccmSrcXB;
150     mat_rgb2xyz_src[1][2] = matE[2] * fccmSrcYB;
151     mat_rgb2xyz_src[2][2] = matE[2] * fccmSrcZB;
152 
153     fccmDstZR = 1 - fccmDstXR - fccmDstYR;
154     fccmDstZG = 1 - fccmDstXG - fccmDstYG;
155     fccmDstZB = 1 - fccmDstXB - fccmDstYB;
156     fccmDstZW = 1 - fccmDstXW - FccmDstYW;
157 
158     mat_xyz[0][0] = fccmDstXR;
159     mat_xyz[0][1] = fccmDstXG;
160     mat_xyz[0][2] = fccmDstXB;
161     mat_xyz[1][0] = fccmDstYR;
162     mat_xyz[1][1] = fccmDstYG;
163     mat_xyz[1][2] = fccmDstYB;
164     mat_xyz[2][0] = fccmDstZR;
165     mat_xyz[2][1] = fccmDstZG;
166     mat_xyz[2][2] = fccmDstZB;
167 
168     mat_3by3_inv(mat_xyz[0], mat_invxyz[0]);
169 
170     matW[0] = fccmDstXW / FccmDstYW;
171     matW[1] = 1;
172     matW[2] = fccmDstZW / FccmDstYW;
173 
174     mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1);
175 
176     mat_rgb2xyz_dst[0][0] = matE[0] * fccmDstXR;
177     mat_rgb2xyz_dst[1][0] = matE[0] * fccmDstYR;
178     mat_rgb2xyz_dst[2][0] = matE[0] * fccmDstZR;
179     mat_rgb2xyz_dst[0][1] = matE[1] * fccmDstXG;
180     mat_rgb2xyz_dst[1][1] = matE[1] * fccmDstYG;
181     mat_rgb2xyz_dst[2][1] = matE[1] * fccmDstZG;
182     mat_rgb2xyz_dst[0][2] = matE[2] * fccmDstXB;
183     mat_rgb2xyz_dst[1][2] = matE[2] * fccmDstYB;
184     mat_rgb2xyz_dst[2][2] = matE[2] * fccmDstZB;
185 
186     float mat_invrgb2xyz_dst[3][3];
187     mat_3by3_inv(mat_rgb2xyz_dst[0], mat_invrgb2xyz_dst[0]);
188 
189     float CCMmat[3][3];
190     mat_mul_float(mat_invrgb2xyz_dst[0], mat_rgb2xyz_src[0], CCMmat[0], 3, 3, 3);
191 
192     color_matrix_calculation[0][0] = CCMmat[0][0];
193     color_matrix_calculation[0][1] = CCMmat[0][1];
194     color_matrix_calculation[0][2] = CCMmat[0][2];
195     color_matrix_calculation[0][3] = 0.0f;
196     color_matrix_calculation[1][0] = CCMmat[1][0];
197     color_matrix_calculation[1][1] = CCMmat[1][1];
198     color_matrix_calculation[1][2] = CCMmat[1][2];
199     color_matrix_calculation[1][3] = 0.0f;
200     color_matrix_calculation[2][0] = CCMmat[2][0];
201     color_matrix_calculation[2][1] = CCMmat[2][1];
202     color_matrix_calculation[2][2] = CCMmat[2][2];
203     color_matrix_calculation[2][3] = 0.0f;
204 }
205 
206 // Used by L0 kernel
VpRenderHdr3DLutL0Kernel(PVP_MHWINTERFACE hwInterface,PVpAllocator allocator)207 VpRenderHdr3DLutL0Kernel::VpRenderHdr3DLutL0Kernel(PVP_MHWINTERFACE hwInterface, PVpAllocator allocator) :
208     VpRenderKernelObj(hwInterface, (VpKernelID)kernelHdr3DLutCalcL0, 0, VP_HDR_KERNEL_NAME_L0_3DLUT, allocator)
209 {
210     VP_FUNC_CALL();
211     m_kernelBinaryID = VP_ADV_KERNEL_BINARY_ID(kernelHdr3DLutCalcL0);
212     m_isAdvKernel    = true;
213 }
214 
~VpRenderHdr3DLutL0Kernel()215 VpRenderHdr3DLutL0Kernel::~VpRenderHdr3DLutL0Kernel()
216 {
217     MOS_SafeFreeMemory(m_curbe);
218     m_curbe = nullptr;
219 }
220 
Init(VpRenderKernel & kernel)221 MOS_STATUS VpRenderHdr3DLutL0Kernel::Init(VpRenderKernel &kernel)
222 {
223     VP_FUNC_CALL();
224 
225     VP_RENDER_NORMALMESSAGE("Initializing SR krn %s", kernel.GetKernelName().c_str());
226 
227     m_kernelSize = kernel.GetKernelSize();
228 
229     uint8_t *pKernelBin = (uint8_t *)kernel.GetKernelBinPointer();
230     VP_RENDER_CHK_NULL_RETURN(pKernelBin);
231 
232     m_kernelBinary = pKernelBin + kernel.GetKernelBinOffset();
233 
234     m_kernelArgs = kernel.GetKernelArgs();
235 
236     for (auto arg : m_kernelArgs)
237     {
238         arg.pData = nullptr;
239     }
240 
241     m_kernelBtis = kernel.GetKernelBtis();
242 
243     m_kernelEnv = kernel.GetKernelExeEnv();
244 
245     m_curbeSize = kernel.GetCurbeSize();
246 
247     return MOS_STATUS_SUCCESS;
248 }
249 
250 
SetupSurfaceState()251 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetupSurfaceState()
252 {
253     VP_FUNC_CALL();
254     VP_RENDER_CHK_STATUS_RETURN(InitCoefSurface(m_maxDisplayLum, m_maxContentLevelLum, m_hdrMode));
255 
256     return MOS_STATUS_SUCCESS;
257 }
258 
CpPrepareResources()259 MOS_STATUS VpRenderHdr3DLutL0Kernel::CpPrepareResources()
260 {
261     VP_FUNC_CALL();
262 
263     PMOS_RESOURCE source[VPHAL_MAX_SOURCES] = {nullptr};
264     PMOS_RESOURCE target[VPHAL_MAX_TARGETS] = {nullptr};
265 
266     if ((nullptr != m_hwInterface->m_osInterface) &&
267         (nullptr != m_hwInterface->m_osInterface->osCpInterface))
268     {
269         auto        it   = m_surfaceGroup->find(SurfaceType3DLutCoef);
270         VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
271         VP_RENDER_CHK_NULL_RETURN(surf);
272         source[0] = &(surf->osSurface->OsResource);
273 
274         it   = m_surfaceGroup->find(SurfaceType3DLut);
275         surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
276         VP_RENDER_CHK_NULL_RETURN(surf);
277 
278         target[0] = &(surf->osSurface->OsResource);
279         m_hwInterface->m_osInterface->osCpInterface->PrepareResources((void **)source, 1, (void **)target, 1);
280     }
281     return MOS_STATUS_SUCCESS;
282 }
283 
SetupStatelessBuffer()284 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetupStatelessBuffer()
285 {
286     VP_FUNC_CALL();
287     m_statelessArray.clear();
288     VP_RENDER_CHK_STATUS_RETURN(SetupStatelessBufferResource(SurfaceType3DLutCoef));
289     VP_RENDER_CHK_STATUS_RETURN(SetupStatelessBufferResource(SurfaceType3DLut));
290     return MOS_STATUS_SUCCESS;
291 }
292 
GetCurbeState(void * & curbe,uint32_t & curbeLength)293 MOS_STATUS VpRenderHdr3DLutL0Kernel::GetCurbeState(void *&curbe, uint32_t &curbeLength)
294 {
295     VP_FUNC_CALL();
296     curbeLength = m_curbeSize;
297 
298     VP_RENDER_NORMALMESSAGE("KernelID %d, Curbe Size %d\n", m_kernelId, curbeLength);
299     if (curbeLength == 0)
300     {
301         return MOS_STATUS_INVALID_PARAMETER;
302     }
303 
304     uint8_t *pCurbe = (uint8_t *)MOS_AllocAndZeroMemory(curbeLength);
305     VP_RENDER_CHK_NULL_RETURN(pCurbe);
306     MOS_FreeMemAndSetNull(m_curbe);
307     m_curbe = pCurbe;
308 
309     for (auto &arg : m_kernelArgs)
310     {
311         if (arg.eArgKind == ARG_KIND_GENERAL)
312         {
313             if (arg.pData != nullptr)
314             {
315                 MOS_SecureMemcpy(pCurbe + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize);
316                 VP_RENDER_NORMALMESSAGE("Setting Curbe State KernelID %d, index %d , value %d, argKind %d", m_kernelId, arg.uIndex, *(uint32_t *)arg.pData, arg.eArgKind);
317             }
318             else
319             {
320                 VP_RENDER_NORMALMESSAGE("KernelID %d, index %d, argKind %d is empty", m_kernelId, arg.uIndex, arg.eArgKind);
321             }
322         }
323         else if (arg.eArgKind == ARG_KIND_SURFACE)
324         {
325             if (arg.addressMode == AddressingModeStateless && arg.pData != nullptr)
326             {
327                 for (uint32_t idx = 0; idx < arg.uSize / sizeof(SurfaceType); idx++)
328                 {
329                     uint32_t   *pSurfaceindex  = (uint32_t *)(arg.pData) + idx;
330                     SurfaceType surf           = (SurfaceType)*pSurfaceindex;
331 
332                     if (surf != SurfaceTypeInvalid)
333                     {
334                         auto it = m_statelessArray.find(surf);
335                         uint64_t ui64GfxAddress                              = (m_statelessArray.end() != it) ? it->second : 0xFFFF;
336                         *((uint64_t *)(pCurbe + arg.uOffsetInPayload) + idx) = ui64GfxAddress;
337                         break;
338                     }
339                     else
340                     {
341                         *((uint64_t *)(pCurbe + arg.uOffsetInPayload) + idx) = 0xFFFF;
342                     }
343                 }
344             }
345         }
346         else if (arg.eArgKind == ARG_KIND_INLINE)
347         {
348             VP_RENDER_NORMALMESSAGE("Skip inline data here");
349         }
350         else
351         {
352             return MOS_STATUS_UNIMPLEMENTED;
353         }
354     }
355 
356     curbe = pCurbe;
357 
358     return MOS_STATUS_SUCCESS;
359 }
360 
361 
GetWalkerSetting(KERNEL_WALKER_PARAMS & walkerParam,KERNEL_PACKET_RENDER_DATA & renderData)362 MOS_STATUS VpRenderHdr3DLutL0Kernel::GetWalkerSetting(KERNEL_WALKER_PARAMS &walkerParam, KERNEL_PACKET_RENDER_DATA &renderData)
363 {
364 
365     VP_FUNC_CALL();
366 
367     walkerParam = m_walkerParam;
368     walkerParam.iBindingTable = renderData.bindingTable;
369     walkerParam.iMediaID      = renderData.mediaID;
370     walkerParam.iCurbeOffset  = renderData.iCurbeOffset;
371     // Should use renderData.iCurbeLength instead of kernelSettings.CURBE_Length.
372     // kernelSettings.CURBE_Length is 32 aligned with 5 bits shift.
373     // renderData.iCurbeLength is RENDERHAL_CURBE_BLOCK_ALIGN(64) aligned.
374     walkerParam.iCurbeLength = renderData.iCurbeLength;
375     return MOS_STATUS_SUCCESS;
376 }
377 
378 // Only for Adv kernels.
SetWalkerSetting(KERNEL_THREAD_SPACE & threadSpace,bool bSyncFlag,bool flushL1)379 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetWalkerSetting(KERNEL_THREAD_SPACE &threadSpace, bool bSyncFlag, bool flushL1)
380 {
381     VP_FUNC_CALL();
382     MOS_ZeroMemory(&m_walkerParam, sizeof(KERNEL_WALKER_PARAMS));
383 
384     m_walkerParam.iBlocksX          = threadSpace.uWidth;
385     m_walkerParam.iBlocksY          = threadSpace.uHeight;
386     m_walkerParam.threadWidth       = threadSpace.uLocalWidth;
387     m_walkerParam.threadHeight      = threadSpace.uLocalHeight;
388     m_walkerParam.threadDepth       = 1;
389     m_walkerParam.isVerticalPattern = false;
390     m_walkerParam.bSyncFlag         = bSyncFlag;
391 
392     m_walkerParam.pipeControlParams.bUpdateNeeded              = true;
393     m_walkerParam.pipeControlParams.bEnableDataPortFlush       = true;
394     m_walkerParam.pipeControlParams.bUnTypedDataPortCacheFlush = true;
395     m_walkerParam.pipeControlParams.bFlushRenderTargetCache    = false;
396     m_walkerParam.pipeControlParams.bInvalidateTextureCache    = false;
397 
398     for (auto &arg : m_kernelArgs)
399     {
400         if (arg.eArgKind == ARG_KIND_INLINE)
401         {
402             if (arg.pData != nullptr)
403             {
404                 MOS_SecureMemcpy(m_inlineData + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize);
405                 VP_RENDER_NORMALMESSAGE("Setting Inline Data KernelID %d, index %d , value %d, argKind %d", m_kernelId, arg.uIndex, *(uint32_t *)arg.pData, arg.eArgKind);
406             }
407             else
408             {
409                 VP_RENDER_NORMALMESSAGE("KernelID %d, index %d, argKind %d is empty", m_kernelId, arg.uIndex, arg.eArgKind);
410             }
411         }
412     }
413     m_walkerParam.inlineDataLength = sizeof(m_inlineData);
414     m_walkerParam.inlineData       = m_inlineData;
415 
416     if (m_kernelEnv.uSimdSize != 1 &&
417         (m_kernelEnv.uiWorkGroupWalkOrderDimensions[0] != 0 ||
418             m_kernelEnv.uiWorkGroupWalkOrderDimensions[1] != 0 ||
419             m_kernelEnv.uiWorkGroupWalkOrderDimensions[2] != 0))
420     {
421         m_walkerParam.isEmitInlineParameter = true;
422         m_walkerParam.isGenerateLocalID     = true;
423         m_walkerParam.emitLocal             = MHW_EMIT_LOCAL_XYZ;
424     }
425 
426     return MOS_STATUS_SUCCESS;
427 }
428 
InitCoefSurface(const uint32_t maxDLL,const uint32_t maxCLL,const VPHAL_HDR_MODE hdrMode)429 MOS_STATUS VpRenderHdr3DLutL0Kernel::InitCoefSurface(const uint32_t maxDLL, const uint32_t maxCLL, const VPHAL_HDR_MODE hdrMode)
430 {
431     VP_FUNC_CALL();
432     float  *hdrcoefBuffer = nullptr;
433     int32_t oetfCurve = 0, tmMode = 0, tmSrcType = 0;
434     float   *ccmMatrix = m_ccmMatrix;
435     float   tmMaxCLL = 0.0f, tmMaxDLL = 0.0f;
436 
437     MOS_ZeroMemory(m_ccmMatrix, sizeof(m_ccmMatrix));
438 
439     // Get surface addr
440     auto        it   = m_surfaceGroup->find(SurfaceType3DLutCoef);
441     VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
442     VP_RENDER_CHK_NULL_RETURN(surf);
443 
444     tmMaxCLL = (float)maxCLL;
445     tmMaxDLL = (float)maxDLL;
446 
447     // Lock surface
448     uint8_t *lockedAddr = (uint8_t *)m_allocator->LockResourceForWrite(&surf->osSurface->OsResource);
449 
450     VP_RENDER_CHK_NULL_RETURN(lockedAddr);
451 
452     hdrcoefBuffer = (float *)lockedAddr;
453 
454     if (hdrMode == VPHAL_HDR_MODE_TONE_MAPPING)  // H2S
455     {
456         CalcCCMMatrix();
457         MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, color_matrix_calculation, sizeof(float) * 12);
458 
459         tmMode    = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2S;
460         oetfCurve = (OETF_CURVE_TYPE)OETF_SRGB;
461         tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709;
462     }
463     else  // H2H
464     {
465         MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, ccm_identity, sizeof(float) * 12);
466 
467         tmMode    = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2H;
468         oetfCurve = (OETF_CURVE_TYPE)OETF_CURVE_HDR_2084;
469         tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709;
470     }
471 
472     // Fill Coefficient Surface: Media kernel define the layout of coefficients. Please don't change it.
473     const uint32_t pos_coef[17] = {7, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 54, 55, 62, 63};
474 
475     // OETF curve
476     ((int *)hdrcoefBuffer)[pos_coef[0]] = oetfCurve;
477     // CCM
478     for (uint32_t i = 0; i < VP_CCM_MATRIX_SIZE; ++i)
479     {
480         hdrcoefBuffer[pos_coef[i + 1]] = ccmMatrix[i];
481     }
482     // TM Source Type
483     ((int *)hdrcoefBuffer)[pos_coef[13]] = tmSrcType;
484     // TM Mode
485     ((int *)hdrcoefBuffer)[pos_coef[14]] = tmMode;
486     // Max CLL and DLL
487     hdrcoefBuffer[pos_coef[15]] = tmMaxCLL;
488     hdrcoefBuffer[pos_coef[16]] = tmMaxDLL;
489 
490     //Unlock
491     VP_RENDER_CHK_STATUS_RETURN(m_allocator->UnLock(&surf->osSurface->OsResource));
492 
493     return MOS_STATUS_SUCCESS;
494 }
495 
SetKernelConfigs(KERNEL_CONFIGS & kernelConfigs)496 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetKernelConfigs(KERNEL_CONFIGS &kernelConfigs)
497 {
498     VP_FUNC_CALL();
499     auto it = kernelConfigs.find((VpKernelID)kernelHdr3DLutCalcL0);
500 
501     if (kernelConfigs.end() == it || nullptr == it->second)
502     {
503         VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
504     }
505 
506     PRENDER_HDR_3DLUT_CAL_PARAMS params = (PRENDER_HDR_3DLUT_CAL_PARAMS)it->second;
507 
508     if (m_maxDisplayLum == params->maxDisplayLum && m_maxContentLevelLum == params->maxContentLevelLum &&
509         m_hdrMode == params->hdrMode && m_hdrLutSize == params->threadWidth)
510     {
511         // For such case, 3DLut calculation should be skipped in Policy::GetHdrExecutionCaps.
512         VP_RENDER_ASSERTMESSAGE("No change in 3D Lut parameters!");
513     }
514     else
515     {
516         m_maxDisplayLum      = params->maxDisplayLum;
517         m_maxContentLevelLum = params->maxContentLevelLum;
518         m_hdrMode            = params->hdrMode;
519         m_hdrLutSize         = params->threadWidth;
520         VP_RENDER_NORMALMESSAGE("Maximum Display Luminance %d, Maximum Content Level Luminance %d, HDR mode %d, Lut size %d",
521             m_maxDisplayLum,
522             m_maxContentLevelLum,
523             m_hdrMode,
524             m_hdrLutSize);
525     }
526 
527     return MOS_STATUS_SUCCESS;
528 }
529 
530 
531 
SetKernelArgs(KERNEL_ARGS & kernelArgs,VP_PACKET_SHARED_CONTEXT * sharedContext)532 MOS_STATUS VpRenderHdr3DLutL0Kernel::SetKernelArgs(KERNEL_ARGS &kernelArgs, VP_PACKET_SHARED_CONTEXT *sharedContext)
533 {
534     VP_FUNC_CALL();
535 
536     //All pData will be free in VpSrFilter::Destroy so no need to free here
537     for (KRN_ARG &srcArg : kernelArgs)
538     {
539         for (KRN_ARG &dstArg : m_kernelArgs)
540         {
541             if (srcArg.uIndex == dstArg.uIndex)
542             {
543                 if (dstArg.eArgKind == ARG_KIND_GENERAL || dstArg.eArgKind == ARG_KIND_INLINE || srcArg.eArgKind == ARG_KIND_SURFACE)
544                 {
545                     if (srcArg.pData == nullptr)
546                     {
547                         VP_RENDER_ASSERTMESSAGE("The Kernel Argument General Data is null! KernelID %d, argIndex %d", m_kernelId, dstArg.uIndex);
548                         return MOS_STATUS_INVALID_PARAMETER;
549                     }
550                     else
551                     {
552                         dstArg.eArgKind = srcArg.eArgKind;
553                         dstArg.pData    = srcArg.pData;
554                         srcArg.pData    = nullptr;
555                     }
556                 }
557             }
558         }
559 
560         if (srcArg.pData != nullptr)
561         {
562             srcArg.pData = nullptr;
563             VP_RENDER_ASSERTMESSAGE("The Kernel Argument is set but not used. KernelID %d, argIndex %d", m_kernelId, srcArg.uIndex);
564         }
565     }
566 
567     return MOS_STATUS_SUCCESS;
568 }
569 
DumpSurfaces()570 void VpRenderHdr3DLutL0Kernel::DumpSurfaces()
571 {
572     VP_FUNC_CALL();
573     for (auto &arg : m_kernelArgs)
574     {
575         if (arg.eArgKind == ARG_KIND_SURFACE)
576         {
577             for (uint32_t idx = 0; idx < arg.uSize / (sizeof(SurfaceType) * 2); idx++)
578             {
579                 uint32_t   *pSurfaceindex = (uint32_t *)(arg.pData) + idx;
580                 SurfaceType surfType      = (SurfaceType)*pSurfaceindex;
581                 if (surfType == SurfaceTypeInvalid)
582                 {
583                     VP_RENDER_ASSERTMESSAGE("Surf type was invalid");
584                     return;
585                 }
586                 auto surf = m_surfaceGroup->find(surfType);
587                 if (m_surfaceGroup->end() == surf)
588                 {
589                     VP_RENDER_ASSERTMESSAGE("Surf was not found");
590                     return;
591                 }
592 
593                 char bufName[MAX_PATH] = {};
594 
595                 MOS_SecureStringPrint(
596                     bufName,
597                     MAX_PATH,
598                     sizeof(bufName),
599                     "k_%d_%s_argi_%d",
600                     m_kernelIndex,
601                     m_kernelName.c_str(),
602                     idx);
603 
604                 if (surf->second == nullptr)
605                 {
606                     return;
607                 }
608                 if (surf->second->osSurface == nullptr)
609                 {
610                     return;
611                 }
612 
613                 DumpSurface(surf->second, bufName);
614 
615             }
616         }
617     }
618 
619     return;
620 }