1 /*
2 * Copyright (c) 2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     vp_render_vebox_hdr_3dlut_kernel.cpp
24 //! \brief    render packet which used in by mediapipline.
25 //! \details  render packet provide the structures and generate the cmd buffer which mediapipline will used.
26 //!
27 #include "vp_render_vebox_hdr_3dlut_kernel.h"
28 #include "vp_dumper.h"
29 #include "vp_kernelset.h"
30 
31 using namespace vp;
32 
33 //!
34 //! \brief Binding Table Index for HDR 3DLut kernel
35 //!
36 #define BI_VEBOX_HDR_3DLUT_3DLUT 1
37 #define BI_VEBOX_HDR_3DLUT_COEF  2
38 
39 #define BI_VEBOX_HDR_3DLUT_3DLUT_CM 0
40 #define BI_VEBOX_HDR_3DLUT_COEF_CM  1
41 
42 static const float ccm_identity[12]               = {1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f};
43 static float       color_matrix_calculation[3][4] = {0.0f};
44 
45 template <typename T>
mat_3by3_inv(T * m1,T * m2)46 void mat_3by3_inv(T *m1, T *m2)
47 {
48     double a11, a12, a13, a21, a22, a23, a31, a32, a33;
49     double det, det_inv;
50 
51     a11 = *m1;
52     a12 = *(m1 + 1);
53     a13 = *(m1 + 2);
54     a21 = *(m1 + 3);
55     a22 = *(m1 + 4);
56     a23 = *(m1 + 5);
57     a31 = *(m1 + 6);
58     a32 = *(m1 + 7);
59     a33 = *(m1 + 8);
60 
61     det = a11 * (a33 * a22 - a32 * a23) - a21 * (a33 * a12 - a32 * a13) + a31 * (a12 * a23 - a13 * a22);
62 
63     det_inv = 1 / det;
64 
65     *m2       = (float)(det_inv * (a33 * a22 - a32 * a23));
66     *(m2 + 1) = (float)(-det_inv * (a33 * a12 - a32 * a13));
67     *(m2 + 2) = (float)(det_inv * (a12 * a23 - a13 * a22));
68     *(m2 + 3) = (float)(-det_inv * (a33 * a21 - a31 * a23));
69     *(m2 + 4) = (float)(det_inv * (a33 * a11 - a31 * a13));
70     *(m2 + 5) = (float)(-det_inv * (a23 * a11 - a21 * a13));
71     *(m2 + 6) = (float)(det_inv * (a32 * a21 - a31 * a22));
72     *(m2 + 7) = (float)(-det_inv * (a32 * a11 - a31 * a12));
73     *(m2 + 8) = (float)(det_inv * (a22 * a11 - a21 * a12));
74 }
75 
76 template <typename T>
mat_mul_float(T * m1,T * m2,T * m3,short m_size,short n_size,short l_size)77 void mat_mul_float(T *m1, T *m2, T *m3, short m_size, short n_size, short l_size)
78 {
79     int    i, j, k, Tjn, Tjl, off3;
80     float *m1_offs, *m2_offs, *m2_k_offs, *m3_offs;
81 
82     for (j = 0; j < m_size; j++)
83     {
84         Tjn = j * n_size;
85         Tjl = j * l_size;
86         for (k = 0; k < l_size; k++)
87         {
88             off3      = Tjl + k;
89             m3_offs   = m3 + off3;
90             *m3_offs  = 0;
91             m2_k_offs = m2 + k;
92             m1_offs   = m1 + Tjn;
93             for (i = 0; i < n_size; i++)
94             {
95                 m2_offs = m2_k_offs + i * l_size;
96                 *m3_offs += *m1_offs * *m2_offs;
97                 m1_offs++;
98             }
99         }
100     }
101 }
CalcCCMMatrix()102 static void CalcCCMMatrix()
103 {
104     float mat_rgb2xyz_src[3][3], mat_rgb2xyz_dst[3][3];
105     float mat_xyz[3][3], mat_invxyz[3][3];
106     float matW[3];
107     float matE[3] = {};
108 
109     float fccmSrcZR, fccmSrcZG, fccmSrcZB, fccmSrcZW;
110     float fccmDstZR, fccmDstZG, fccmDstZB, fccmDstZW;
111 
112     const float fccmSrcXR = 0.708000000000000f;
113     const float fccmSrcYR = 0.292000000000000f;
114     const float fccmSrcXG = 0.170000000000000f;
115     const float fccmSrcYG = 0.797000000000000f;
116     const float fccmSrcXB = 0.131000000000000f;
117     const float fccmSrcYB = 0.046000000000000f;
118     const float fccmSrcXW = 0.312700000000000f;
119     const float fccmSrcYW = 0.329000000000000f;
120     const float fccmDstXR = 0.640000000000000f;
121     const float fccmDstYR = 0.330000000000000f;
122     const float fccmDstXG = 0.300000000000000f;
123     const float fccmDstYG = 0.600000000000000f;
124     const float fccmDstXB = 0.150000000000000f;
125     const float fccmDstYB = 0.060000000000000f;
126     const float fccmDstXW = 0.312700000000000f;
127     const float FccmDstYW = 0.329000000000000f;
128 
129     fccmSrcZR = 1 - fccmSrcXR - fccmSrcYR;
130     fccmSrcZG = 1 - fccmSrcXG - fccmSrcYG;
131     fccmSrcZB = 1 - fccmSrcXB - fccmSrcYB;
132     fccmSrcZW = 1 - fccmSrcXW - fccmSrcYW;
133 
134     mat_xyz[0][0] = fccmSrcXR;
135     mat_xyz[0][1] = fccmSrcXG;
136     mat_xyz[0][2] = fccmSrcXB;
137     mat_xyz[1][0] = fccmSrcYR;
138     mat_xyz[1][1] = fccmSrcYG;
139     mat_xyz[1][2] = fccmSrcYB;
140     mat_xyz[2][0] = fccmSrcZR;
141     mat_xyz[2][1] = fccmSrcZG;
142     mat_xyz[2][2] = fccmSrcZB;
143 
144     mat_3by3_inv(mat_xyz[0], mat_invxyz[0]);
145 
146     matW[0] = fccmSrcXW / fccmSrcYW;
147     matW[1] = 1;
148     matW[2] = fccmSrcZW / fccmSrcYW;
149 
150     mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1);
151 
152     mat_rgb2xyz_src[0][0] = matE[0] * fccmSrcXR;
153     mat_rgb2xyz_src[1][0] = matE[0] * fccmSrcYR;
154     mat_rgb2xyz_src[2][0] = matE[0] * fccmSrcZR;
155     mat_rgb2xyz_src[0][1] = matE[1] * fccmSrcXG;
156     mat_rgb2xyz_src[1][1] = matE[1] * fccmSrcYG;
157     mat_rgb2xyz_src[2][1] = matE[1] * fccmSrcZG;
158     mat_rgb2xyz_src[0][2] = matE[2] * fccmSrcXB;
159     mat_rgb2xyz_src[1][2] = matE[2] * fccmSrcYB;
160     mat_rgb2xyz_src[2][2] = matE[2] * fccmSrcZB;
161 
162     fccmDstZR = 1 - fccmDstXR - fccmDstYR;
163     fccmDstZG = 1 - fccmDstXG - fccmDstYG;
164     fccmDstZB = 1 - fccmDstXB - fccmDstYB;
165     fccmDstZW = 1 - fccmDstXW - FccmDstYW;
166 
167     mat_xyz[0][0] = fccmDstXR;
168     mat_xyz[0][1] = fccmDstXG;
169     mat_xyz[0][2] = fccmDstXB;
170     mat_xyz[1][0] = fccmDstYR;
171     mat_xyz[1][1] = fccmDstYG;
172     mat_xyz[1][2] = fccmDstYB;
173     mat_xyz[2][0] = fccmDstZR;
174     mat_xyz[2][1] = fccmDstZG;
175     mat_xyz[2][2] = fccmDstZB;
176 
177     mat_3by3_inv(mat_xyz[0], mat_invxyz[0]);
178 
179     matW[0] = fccmDstXW / FccmDstYW;
180     matW[1] = 1;
181     matW[2] = fccmDstZW / FccmDstYW;
182 
183     mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1);
184 
185     mat_rgb2xyz_dst[0][0] = matE[0] * fccmDstXR;
186     mat_rgb2xyz_dst[1][0] = matE[0] * fccmDstYR;
187     mat_rgb2xyz_dst[2][0] = matE[0] * fccmDstZR;
188     mat_rgb2xyz_dst[0][1] = matE[1] * fccmDstXG;
189     mat_rgb2xyz_dst[1][1] = matE[1] * fccmDstYG;
190     mat_rgb2xyz_dst[2][1] = matE[1] * fccmDstZG;
191     mat_rgb2xyz_dst[0][2] = matE[2] * fccmDstXB;
192     mat_rgb2xyz_dst[1][2] = matE[2] * fccmDstYB;
193     mat_rgb2xyz_dst[2][2] = matE[2] * fccmDstZB;
194 
195     float mat_invrgb2xyz_dst[3][3];
196     mat_3by3_inv(mat_rgb2xyz_dst[0], mat_invrgb2xyz_dst[0]);
197 
198     float CCMmat[3][3];
199     mat_mul_float(mat_invrgb2xyz_dst[0], mat_rgb2xyz_src[0], CCMmat[0], 3, 3, 3);
200 
201     color_matrix_calculation[0][0] = CCMmat[0][0];
202     color_matrix_calculation[0][1] = CCMmat[0][1];
203     color_matrix_calculation[0][2] = CCMmat[0][2];
204     color_matrix_calculation[0][3] = 0.0f;
205     color_matrix_calculation[1][0] = CCMmat[1][0];
206     color_matrix_calculation[1][1] = CCMmat[1][1];
207     color_matrix_calculation[1][2] = CCMmat[1][2];
208     color_matrix_calculation[1][3] = 0.0f;
209     color_matrix_calculation[2][0] = CCMmat[2][0];
210     color_matrix_calculation[2][1] = CCMmat[2][1];
211     color_matrix_calculation[2][2] = CCMmat[2][2];
212     color_matrix_calculation[2][3] = 0.0f;
213 }
214 
215 // Used by ISA kernel
VpRenderHdr3DLutKernel(PVP_MHWINTERFACE hwInterface,VpKernelID kernelId,uint32_t kernelIndex,std::string kernelName,PVpAllocator allocator)216 VpRenderHdr3DLutKernel::VpRenderHdr3DLutKernel(PVP_MHWINTERFACE hwInterface, VpKernelID kernelId, uint32_t kernelIndex, std::string kernelName, PVpAllocator allocator) :
217     VpRenderKernelObj(hwInterface, kernelId, kernelIndex, VP_HDR_KERNEL_NAME, allocator)
218 {
219     VP_FUNC_CALL();
220 }
221 
222 // Used by L0 kernel
VpRenderHdr3DLutKernel(PVP_MHWINTERFACE hwInterface,PVpAllocator allocator)223 VpRenderHdr3DLutKernel::VpRenderHdr3DLutKernel(PVP_MHWINTERFACE hwInterface, PVpAllocator allocator) :
224     VpRenderKernelObj(hwInterface, (VpKernelID)kernelHdr3DLutCalc, 0, VP_HDR_KERNEL_NAME_L0, allocator)
225 {
226     VP_FUNC_CALL();
227     m_kernelBinaryID = VP_ADV_KERNEL_BINARY_ID(kernelHdr3DLutCalc);
228 }
229 
~VpRenderHdr3DLutKernel()230 VpRenderHdr3DLutKernel::~VpRenderHdr3DLutKernel()
231 {
232 }
233 
SetupSurfaceState()234 MOS_STATUS VpRenderHdr3DLutKernel::SetupSurfaceState()
235 {
236     VP_FUNC_CALL();
237     VP_RENDER_CHK_NULL_RETURN(m_surfaceGroup);
238     VP_RENDER_CHK_NULL_RETURN(m_hwInterface);
239 
240     PRENDERHAL_INTERFACE renderHal   = m_hwInterface->m_renderHal;
241     PMOS_INTERFACE       osInterface = m_hwInterface->m_osInterface;
242     m_surfaceBindingIndex.clear();
243     m_surfaceState.clear();
244     KERNEL_SURFACE_STATE_PARAM kernelSurfaceParam            = {};
245     // Only need to specify binding index in surface parameters.
246     kernelSurfaceParam.surfaceOverwriteParams.updatedSurfaceParams = true;
247     kernelSurfaceParam.surfaceOverwriteParams.bindedKernel   = true;
248     kernelSurfaceParam.surfaceOverwriteParams.bufferResource = true;
249 
250     UpdateCurbeBindingIndex(SurfaceType3DLut, BI_VEBOX_HDR_3DLUT_3DLUT);
251     kernelSurfaceParam.isOutput                         = true;
252     m_surfaceState.insert(std::make_pair(SurfaceType3DLut, kernelSurfaceParam));
253     UpdateCurbeBindingIndex(SurfaceType3DLutCoef, BI_VEBOX_HDR_3DLUT_COEF);
254     kernelSurfaceParam.isOutput                         = false;
255     m_surfaceState.insert(std::make_pair(SurfaceType3DLutCoef, kernelSurfaceParam));
256 
257     VP_RENDER_CHK_STATUS_RETURN(InitCoefSurface(m_maxDisplayLum, m_maxContentLevelLum, m_hdrMode));
258 
259     return MOS_STATUS_SUCCESS;
260 }
261 
CpPrepareResources()262 MOS_STATUS VpRenderHdr3DLutKernel::CpPrepareResources()
263 {
264     VP_FUNC_CALL();
265 
266     PMOS_RESOURCE source[VPHAL_MAX_SOURCES] = {nullptr};
267     PMOS_RESOURCE target[VPHAL_MAX_TARGETS] = {nullptr};
268 
269     if ((nullptr != m_hwInterface->m_osInterface) &&
270         (nullptr != m_hwInterface->m_osInterface->osCpInterface))
271     {
272         auto        it   = m_surfaceGroup->find(SurfaceType3DLutCoef);
273         VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
274         VP_RENDER_CHK_NULL_RETURN(surf);
275         source[0] = &(surf->osSurface->OsResource);
276 
277         it   = m_surfaceGroup->find(SurfaceType3DLut);
278         surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
279         VP_RENDER_CHK_NULL_RETURN(surf);
280 
281         target[0] = &(surf->osSurface->OsResource);
282         m_hwInterface->m_osInterface->osCpInterface->PrepareResources((void **)source, 1, (void **)target, 1);
283     }
284     return MOS_STATUS_SUCCESS;
285 }
286 
GetCurbeState(void * & curbe,uint32_t & curbeLength)287 MOS_STATUS VpRenderHdr3DLutKernel::GetCurbeState(void *&curbe, uint32_t &curbeLength)
288 {
289     VP_FUNC_CALL();
290     // init the hdr 3dlut static data
291     auto bindingMap3DLut = GetSurfaceBindingIndex(SurfaceType3DLut);
292     auto bindingMapCoef  = GetSurfaceBindingIndex(SurfaceType3DLutCoef);
293     if (bindingMap3DLut.empty() || bindingMapCoef.empty())
294     {
295         VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
296     }
297 
298     MOS_ZeroMemory(&m_curbe, sizeof(m_curbe));
299     m_curbe.DW02.hdr3DLutSurface       = *bindingMap3DLut.begin();
300     m_curbe.DW04.hdrCoefSurface        = *bindingMapCoef.begin();
301     m_curbe.DW06.hdr3DLutSurfaceWidth  = m_hdrLutSize;
302     m_curbe.DW06.hdr3DLutSurfaceHeight = m_hdrLutSize;
303 
304     curbeLength = sizeof(VEBOX_HDR_3DLUT_STATIC_DATA);
305     curbe = (uint8_t *) & m_curbe;
306     VP_RENDER_NORMALMESSAGE("HDR 3DLut Kernel curbelength %d", curbeLength);
307     return MOS_STATUS_SUCCESS;
308 }
309 
SetKernelConfigs(KERNEL_CONFIGS & kernelConfigs)310 MOS_STATUS VpRenderHdr3DLutKernel::SetKernelConfigs(KERNEL_CONFIGS &kernelConfigs)
311 {
312     VP_FUNC_CALL();
313     auto it = kernelConfigs.find((VpKernelID)kernelHdr3DLutCalc);
314 
315     if (kernelConfigs.end() == it || nullptr == it->second)
316     {
317         VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
318     }
319 
320     PRENDER_HDR_3DLUT_CAL_PARAMS params = (PRENDER_HDR_3DLUT_CAL_PARAMS)it->second;
321 
322     if (m_maxDisplayLum == params->maxDisplayLum && m_maxContentLevelLum == params->maxContentLevelLum &&
323         m_hdrMode == params->hdrMode && m_hdrLutSize == params->threadWidth)
324     {
325         // For such case, 3DLut calculation should be skipped in Policy::GetHdrExecutionCaps.
326         VP_RENDER_ASSERTMESSAGE("No change in 3D Lut parameters!");
327     }
328     else
329     {
330         m_maxDisplayLum      = params->maxDisplayLum;
331         m_maxContentLevelLum = params->maxContentLevelLum;
332         m_hdrMode            = params->hdrMode;
333         m_hdrLutSize         = params->threadWidth;
334         VP_RENDER_NORMALMESSAGE("Maximum Display Luminance %d, Maximum Content Level Luminance %d, HDR mode %d, Lut size %d",
335             m_maxDisplayLum,
336             m_maxContentLevelLum,
337             m_hdrMode,
338             m_hdrLutSize);
339     }
340 
341     return MOS_STATUS_SUCCESS;
342 }
343 
344 
GetWalkerSetting(KERNEL_WALKER_PARAMS & walkerParam,KERNEL_PACKET_RENDER_DATA & renderData)345 MOS_STATUS VpRenderHdr3DLutKernel::GetWalkerSetting(KERNEL_WALKER_PARAMS &walkerParam, KERNEL_PACKET_RENDER_DATA &renderData)
346 {
347 
348     VP_FUNC_CALL();
349     RENDERHAL_KERNEL_PARAM kernelSettings;
350 
351     VP_RENDER_CHK_STATUS_RETURN(GetKernelSettings(kernelSettings));
352     MOS_ZeroMemory(&walkerParam, sizeof(KERNEL_WALKER_PARAMS));
353 
354     VP_RENDER_CHK_STATUS_RETURN(VpRenderKernelObj::GetWalkerSetting(m_walkerParam, renderData));
355     m_walkerParam.iBlocksX = m_hdrLutSize;
356     m_walkerParam.iBlocksY = m_hdrLutSize;
357     m_walkerParam.isVerticalPattern = false;
358     m_walkerParam.bSyncFlag         = true;
359     walkerParam = m_walkerParam;
360     return MOS_STATUS_SUCCESS;
361 }
362 
InitCoefSurface(const uint32_t maxDLL,const uint32_t maxCLL,const VPHAL_HDR_MODE hdrMode)363 MOS_STATUS VpRenderHdr3DLutKernel::InitCoefSurface(const uint32_t maxDLL, const uint32_t maxCLL, const VPHAL_HDR_MODE hdrMode)
364 {
365     VP_FUNC_CALL();
366     float  *hdrcoefBuffer = nullptr;
367     int32_t oetfCurve = 0, tmMode = 0, tmSrcType = 0;
368     float   *ccmMatrix = m_ccmMatrix;
369     float   tmMaxCLL = 0.0f, tmMaxDLL = 0.0f;
370 
371     MOS_ZeroMemory(m_ccmMatrix, sizeof(m_ccmMatrix));
372 
373     // Get surface addr
374     auto        it   = m_surfaceGroup->find(SurfaceType3DLutCoef);
375     VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
376     VP_RENDER_CHK_NULL_RETURN(surf);
377 
378     tmMaxCLL = (float)maxCLL;
379     tmMaxDLL = (float)maxDLL;
380 
381     // Lock surface
382     uint8_t *lockedAddr = (uint8_t *)m_allocator->LockResourceForWrite(&surf->osSurface->OsResource);
383 
384     VP_RENDER_CHK_NULL_RETURN(lockedAddr);
385 
386     hdrcoefBuffer = (float *)lockedAddr;
387 
388     if (hdrMode == VPHAL_HDR_MODE_TONE_MAPPING)  // H2S
389     {
390         CalcCCMMatrix();
391         MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, color_matrix_calculation, sizeof(float) * 12);
392 
393         tmMode    = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2S;
394         oetfCurve = (OETF_CURVE_TYPE)OETF_SRGB;
395         tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709;
396     }
397     else  // H2H
398     {
399         MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, ccm_identity, sizeof(float) * 12);
400 
401         tmMode    = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2H;
402         oetfCurve = (OETF_CURVE_TYPE)OETF_CURVE_HDR_2084;
403         tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709;
404     }
405 
406     // Fill Coefficient Surface: Media kernel define the layout of coefficients. Please don't change it.
407     const uint32_t pos_coef[17] = {7, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 54, 55, 62, 63};
408 
409     // OETF curve
410     ((int *)hdrcoefBuffer)[pos_coef[0]] = oetfCurve;
411     // CCM
412     for (uint32_t i = 0; i < VP_CCM_MATRIX_SIZE; ++i)
413     {
414         hdrcoefBuffer[pos_coef[i + 1]] = ccmMatrix[i];
415     }
416     // TM Source Type
417     ((int *)hdrcoefBuffer)[pos_coef[13]] = tmSrcType;
418     // TM Mode
419     ((int *)hdrcoefBuffer)[pos_coef[14]] = tmMode;
420     // Max CLL and DLL
421     hdrcoefBuffer[pos_coef[15]] = tmMaxCLL;
422     hdrcoefBuffer[pos_coef[16]] = tmMaxDLL;
423 
424     //Unlock
425     VP_RENDER_CHK_STATUS_RETURN(m_allocator->UnLock(&surf->osSurface->OsResource));
426 
427     return MOS_STATUS_SUCCESS;
428 }
429 
VpRenderHdr3DLutKernelCM(PVP_MHWINTERFACE hwInterface,VpKernelID kernelID,uint32_t kernelIndex,PVpAllocator allocator)430 VpRenderHdr3DLutKernelCM::VpRenderHdr3DLutKernelCM(PVP_MHWINTERFACE hwInterface, VpKernelID kernelID, uint32_t kernelIndex, PVpAllocator allocator) :
431     VpRenderHdr3DLutKernel(hwInterface, kernelID, kernelIndex, VP_HDR_KERNEL_NAME, allocator)
432 {
433     m_kernelBinaryID = VP_ADV_KERNEL_BINARY_ID(kernelID);
434     m_isAdvKernel    = true;
435 }
436 
~VpRenderHdr3DLutKernelCM()437 VpRenderHdr3DLutKernelCM::~VpRenderHdr3DLutKernelCM()
438 {
439     // No need to destroy dstArg.pData, which points to the local variable
440     // in VpHdrFilter.
441 }
442 
Init(VpRenderKernel & kernel)443 MOS_STATUS VpRenderHdr3DLutKernelCM::Init(VpRenderKernel &kernel)
444 {
445     VP_FUNC_CALL();
446     m_kernelSize = kernel.GetKernelSize() + KERNEL_BINARY_PADDING_SIZE;
447 
448     uint8_t *pKernelBin = (uint8_t *)kernel.GetKernelBinPointer();
449     VP_RENDER_CHK_NULL_RETURN(pKernelBin);
450 
451     m_kernelBinary = pKernelBin + kernel.GetKernelBinOffset();
452     m_kernelArgs = kernel.GetKernelArgs();
453 
454     return MOS_STATUS_SUCCESS;
455 }
456 
GetWalkerSetting(KERNEL_WALKER_PARAMS & walkerParam,KERNEL_PACKET_RENDER_DATA & renderData)457 MOS_STATUS VpRenderHdr3DLutKernelCM::GetWalkerSetting(KERNEL_WALKER_PARAMS &walkerParam, KERNEL_PACKET_RENDER_DATA &renderData)
458 {
459     VP_FUNC_CALL();
460 
461     VP_RENDER_CHK_STATUS_RETURN(VpRenderKernelObj::GetWalkerSetting(m_walkerParam, renderData));
462 
463     walkerParam = m_walkerParam;
464     return MOS_STATUS_SUCCESS;
465 }
466 
467 // Only for Adv kernels.
SetWalkerSetting(KERNEL_THREAD_SPACE & threadSpace,bool bSyncFlag,bool flushL1)468 MOS_STATUS VpRenderHdr3DLutKernelCM::SetWalkerSetting(KERNEL_THREAD_SPACE &threadSpace, bool bSyncFlag, bool flushL1)
469 {
470     VP_FUNC_CALL();
471     MOS_ZeroMemory(&m_walkerParam, sizeof(KERNEL_WALKER_PARAMS));
472 
473     m_walkerParam.iBlocksX          = threadSpace.uWidth;
474     m_walkerParam.iBlocksY          = threadSpace.uHeight;
475     m_walkerParam.isVerticalPattern = false;
476     m_walkerParam.bSyncFlag         = bSyncFlag;
477 
478     return MOS_STATUS_SUCCESS;
479 }
480 
SetKernelArgs(KERNEL_ARGS & kernelArgs,VP_PACKET_SHARED_CONTEXT * sharedContext)481 MOS_STATUS VpRenderHdr3DLutKernelCM::SetKernelArgs(KERNEL_ARGS &kernelArgs, VP_PACKET_SHARED_CONTEXT *sharedContext)
482 {
483     VP_FUNC_CALL();
484     if (kernelArgs.size() != m_kernelArgs.size())
485     {
486         VP_RENDER_ASSERTMESSAGE("The Kernel Arguments is not aligned!");
487         return MOS_STATUS_INVALID_PARAMETER;
488     }
489 
490     for (uint32_t i = 0; i < m_kernelArgs.size(); ++i)
491     {
492         if (i >= kernelArgs.size())
493         {
494             VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
495         }
496         KRN_ARG &srcArg = kernelArgs[i];
497         KRN_ARG &dstArg = m_kernelArgs[i];
498 
499         if (srcArg.uIndex   != dstArg.uIndex    ||
500             srcArg.uSize    != dstArg.uSize     ||
501             srcArg.eArgKind != dstArg.eArgKind  &&
502             dstArg.eArgKind != (srcArg.eArgKind & ~SURFACE_MASK)    ||
503             srcArg.pData == nullptr)
504         {
505             VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
506         }
507         dstArg.eArgKind = srcArg.eArgKind;
508         dstArg.pData    = srcArg.pData;
509         srcArg.pData    = nullptr;
510     }
511 
512     return MOS_STATUS_SUCCESS;
513 }
514 
GetCurbeState(void * & curbe,uint32_t & curbeLength)515 MOS_STATUS VpRenderHdr3DLutKernelCM::GetCurbeState(void *&curbe, uint32_t &curbeLength)
516 {
517     VP_FUNC_CALL();
518     curbeLength = 0;
519     for (auto arg : m_kernelArgs)
520     {
521         curbeLength += arg.uSize;
522     }
523 
524     if (sizeof(m_curbe) != curbeLength)
525     {
526         VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
527     }
528 
529     uint8_t *data = (uint8_t *)&m_curbe;
530 
531     for (auto &arg : m_kernelArgs)
532     {
533         if (arg.eArgKind == ARG_KIND_SURFACE)
534         {
535             // Resource need be added.
536             uint32_t *pSurfaceindex = static_cast<uint32_t *>(arg.pData);
537             auto      bindingMap    = GetSurfaceBindingIndex((SurfaceType)*pSurfaceindex);
538             if (bindingMap.empty())
539             {
540                 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
541             }
542             *((uint32_t *)(data + arg.uOffsetInPayload)) = *bindingMap.begin();
543         }
544         else if (arg.eArgKind == ARG_KIND_GENERAL)
545         {
546             MOS_SecureMemcpy(data + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize);
547         }
548         else
549         {
550             VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_UNIMPLEMENTED);
551         }
552     }
553 
554     curbe       = data;
555     VP_RENDER_NORMALMESSAGE("HDR 3DLut Kernel curbelength %d", curbeLength);
556     return MOS_STATUS_SUCCESS;
557 }
558 
SetupSurfaceState()559 MOS_STATUS VpRenderHdr3DLutKernelCM::SetupSurfaceState()
560 {
561     VP_FUNC_CALL();
562     VP_RENDER_CHK_NULL_RETURN(m_surfaceGroup);
563     VP_RENDER_CHK_NULL_RETURN(m_hwInterface);
564 
565     PRENDERHAL_INTERFACE renderHal = m_hwInterface->m_renderHal;
566     PMOS_INTERFACE osInterface = m_hwInterface->m_osInterface;
567     m_surfaceBindingIndex.clear();
568 
569     KERNEL_SURFACE_STATE_PARAM kernelSurfaceParam            = {};
570     kernelSurfaceParam.surfaceOverwriteParams.bindedKernel   = true;
571     kernelSurfaceParam.surfaceOverwriteParams.bufferResource = true;
572 
573     UpdateCurbeBindingIndex(SurfaceType3DLut, BI_VEBOX_HDR_3DLUT_3DLUT_CM);
574     kernelSurfaceParam.isOutput                         = true;
575     m_surfaceState.insert(std::make_pair(SurfaceType3DLut, kernelSurfaceParam));
576     UpdateCurbeBindingIndex(SurfaceType3DLutCoef, BI_VEBOX_HDR_3DLUT_COEF_CM);
577     kernelSurfaceParam.isOutput                         = false;
578     m_surfaceState.insert(std::make_pair(SurfaceType3DLutCoef, kernelSurfaceParam));
579 
580     VP_RENDER_CHK_STATUS_RETURN(InitCoefSurface(m_maxDisplayLum, m_maxContentLevelLum, m_hdrMode));
581 
582     return MOS_STATUS_SUCCESS;
583 }
584 
CpPrepareResources()585 MOS_STATUS VpRenderHdr3DLutKernelCM::CpPrepareResources()
586 {
587     VP_FUNC_CALL();
588 
589     PMOS_RESOURCE source[VPHAL_MAX_SOURCES] = {nullptr};
590     PMOS_RESOURCE target[VPHAL_MAX_TARGETS] = {nullptr};
591 
592     if ((nullptr != m_hwInterface->m_osInterface) &&
593         (nullptr != m_hwInterface->m_osInterface->osCpInterface))
594     {
595         auto        it   = m_surfaceGroup->find(SurfaceType3DLutCoef);
596         VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
597         VP_RENDER_CHK_NULL_RETURN(surf);
598         source[0] = &(surf->osSurface->OsResource);
599 
600         it   = m_surfaceGroup->find(SurfaceType3DLut);
601         surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
602         VP_RENDER_CHK_NULL_RETURN(surf);
603 
604         target[0] = &(surf->osSurface->OsResource);
605         m_hwInterface->m_osInterface->osCpInterface->PrepareResources((void **)source, 1, (void **)target, 1);
606     }
607     return MOS_STATUS_SUCCESS;
608 }
609 
SetKernelConfigs(KERNEL_CONFIGS & kernelConfigs)610 MOS_STATUS VpRenderHdr3DLutKernelCM::SetKernelConfigs(KERNEL_CONFIGS& kernelConfigs)
611 {
612     VP_FUNC_CALL();
613     auto it = kernelConfigs.find((VpKernelID)kernelHdr3DLutCalc);
614 
615     if (kernelConfigs.end() == it || nullptr == it->second)
616     {
617         VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
618     }
619 
620     PRENDER_HDR_3DLUT_CAL_PARAMS params = (PRENDER_HDR_3DLUT_CAL_PARAMS)it->second;
621 
622     if (m_maxDisplayLum == params->maxDisplayLum && m_maxContentLevelLum == params->maxContentLevelLum &&
623         m_hdrMode == params->hdrMode)
624     {
625         // For such case, 3DLut calculation should be skipped in Policy::GetHdrExecutionCaps.
626         VP_RENDER_ASSERTMESSAGE("No change in 3D Lut parameters!");
627     }
628     else
629     {
630         m_maxDisplayLum         = params->maxDisplayLum;
631         m_maxContentLevelLum    = params->maxContentLevelLum;
632         m_hdrMode               = params->hdrMode;
633         VP_RENDER_NORMALMESSAGE("Maximum Display Luminance %d, Maximum Content Level Luminance %d, HDR mode %d",
634             m_maxDisplayLum, m_maxContentLevelLum, m_hdrMode);
635     }
636 
637     return MOS_STATUS_SUCCESS;
638 }
639