1 /*
2 * Copyright (c) 2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file vp_render_vebox_hdr_3dlut_kernel.cpp
24 //! \brief render packet which used in by mediapipline.
25 //! \details render packet provide the structures and generate the cmd buffer which mediapipline will used.
26 //!
27 #include "vp_render_vebox_hdr_3dlut_kernel.h"
28 #include "vp_dumper.h"
29 #include "vp_kernelset.h"
30
31 using namespace vp;
32
33 //!
34 //! \brief Binding Table Index for HDR 3DLut kernel
35 //!
36 #define BI_VEBOX_HDR_3DLUT_3DLUT 1
37 #define BI_VEBOX_HDR_3DLUT_COEF 2
38
39 #define BI_VEBOX_HDR_3DLUT_3DLUT_CM 0
40 #define BI_VEBOX_HDR_3DLUT_COEF_CM 1
41
42 static const float ccm_identity[12] = {1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f};
43 static float color_matrix_calculation[3][4] = {0.0f};
44
45 template <typename T>
mat_3by3_inv(T * m1,T * m2)46 void mat_3by3_inv(T *m1, T *m2)
47 {
48 double a11, a12, a13, a21, a22, a23, a31, a32, a33;
49 double det, det_inv;
50
51 a11 = *m1;
52 a12 = *(m1 + 1);
53 a13 = *(m1 + 2);
54 a21 = *(m1 + 3);
55 a22 = *(m1 + 4);
56 a23 = *(m1 + 5);
57 a31 = *(m1 + 6);
58 a32 = *(m1 + 7);
59 a33 = *(m1 + 8);
60
61 det = a11 * (a33 * a22 - a32 * a23) - a21 * (a33 * a12 - a32 * a13) + a31 * (a12 * a23 - a13 * a22);
62
63 det_inv = 1 / det;
64
65 *m2 = (float)(det_inv * (a33 * a22 - a32 * a23));
66 *(m2 + 1) = (float)(-det_inv * (a33 * a12 - a32 * a13));
67 *(m2 + 2) = (float)(det_inv * (a12 * a23 - a13 * a22));
68 *(m2 + 3) = (float)(-det_inv * (a33 * a21 - a31 * a23));
69 *(m2 + 4) = (float)(det_inv * (a33 * a11 - a31 * a13));
70 *(m2 + 5) = (float)(-det_inv * (a23 * a11 - a21 * a13));
71 *(m2 + 6) = (float)(det_inv * (a32 * a21 - a31 * a22));
72 *(m2 + 7) = (float)(-det_inv * (a32 * a11 - a31 * a12));
73 *(m2 + 8) = (float)(det_inv * (a22 * a11 - a21 * a12));
74 }
75
76 template <typename T>
mat_mul_float(T * m1,T * m2,T * m3,short m_size,short n_size,short l_size)77 void mat_mul_float(T *m1, T *m2, T *m3, short m_size, short n_size, short l_size)
78 {
79 int i, j, k, Tjn, Tjl, off3;
80 float *m1_offs, *m2_offs, *m2_k_offs, *m3_offs;
81
82 for (j = 0; j < m_size; j++)
83 {
84 Tjn = j * n_size;
85 Tjl = j * l_size;
86 for (k = 0; k < l_size; k++)
87 {
88 off3 = Tjl + k;
89 m3_offs = m3 + off3;
90 *m3_offs = 0;
91 m2_k_offs = m2 + k;
92 m1_offs = m1 + Tjn;
93 for (i = 0; i < n_size; i++)
94 {
95 m2_offs = m2_k_offs + i * l_size;
96 *m3_offs += *m1_offs * *m2_offs;
97 m1_offs++;
98 }
99 }
100 }
101 }
CalcCCMMatrix()102 static void CalcCCMMatrix()
103 {
104 float mat_rgb2xyz_src[3][3], mat_rgb2xyz_dst[3][3];
105 float mat_xyz[3][3], mat_invxyz[3][3];
106 float matW[3];
107 float matE[3] = {};
108
109 float fccmSrcZR, fccmSrcZG, fccmSrcZB, fccmSrcZW;
110 float fccmDstZR, fccmDstZG, fccmDstZB, fccmDstZW;
111
112 const float fccmSrcXR = 0.708000000000000f;
113 const float fccmSrcYR = 0.292000000000000f;
114 const float fccmSrcXG = 0.170000000000000f;
115 const float fccmSrcYG = 0.797000000000000f;
116 const float fccmSrcXB = 0.131000000000000f;
117 const float fccmSrcYB = 0.046000000000000f;
118 const float fccmSrcXW = 0.312700000000000f;
119 const float fccmSrcYW = 0.329000000000000f;
120 const float fccmDstXR = 0.640000000000000f;
121 const float fccmDstYR = 0.330000000000000f;
122 const float fccmDstXG = 0.300000000000000f;
123 const float fccmDstYG = 0.600000000000000f;
124 const float fccmDstXB = 0.150000000000000f;
125 const float fccmDstYB = 0.060000000000000f;
126 const float fccmDstXW = 0.312700000000000f;
127 const float FccmDstYW = 0.329000000000000f;
128
129 fccmSrcZR = 1 - fccmSrcXR - fccmSrcYR;
130 fccmSrcZG = 1 - fccmSrcXG - fccmSrcYG;
131 fccmSrcZB = 1 - fccmSrcXB - fccmSrcYB;
132 fccmSrcZW = 1 - fccmSrcXW - fccmSrcYW;
133
134 mat_xyz[0][0] = fccmSrcXR;
135 mat_xyz[0][1] = fccmSrcXG;
136 mat_xyz[0][2] = fccmSrcXB;
137 mat_xyz[1][0] = fccmSrcYR;
138 mat_xyz[1][1] = fccmSrcYG;
139 mat_xyz[1][2] = fccmSrcYB;
140 mat_xyz[2][0] = fccmSrcZR;
141 mat_xyz[2][1] = fccmSrcZG;
142 mat_xyz[2][2] = fccmSrcZB;
143
144 mat_3by3_inv(mat_xyz[0], mat_invxyz[0]);
145
146 matW[0] = fccmSrcXW / fccmSrcYW;
147 matW[1] = 1;
148 matW[2] = fccmSrcZW / fccmSrcYW;
149
150 mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1);
151
152 mat_rgb2xyz_src[0][0] = matE[0] * fccmSrcXR;
153 mat_rgb2xyz_src[1][0] = matE[0] * fccmSrcYR;
154 mat_rgb2xyz_src[2][0] = matE[0] * fccmSrcZR;
155 mat_rgb2xyz_src[0][1] = matE[1] * fccmSrcXG;
156 mat_rgb2xyz_src[1][1] = matE[1] * fccmSrcYG;
157 mat_rgb2xyz_src[2][1] = matE[1] * fccmSrcZG;
158 mat_rgb2xyz_src[0][2] = matE[2] * fccmSrcXB;
159 mat_rgb2xyz_src[1][2] = matE[2] * fccmSrcYB;
160 mat_rgb2xyz_src[2][2] = matE[2] * fccmSrcZB;
161
162 fccmDstZR = 1 - fccmDstXR - fccmDstYR;
163 fccmDstZG = 1 - fccmDstXG - fccmDstYG;
164 fccmDstZB = 1 - fccmDstXB - fccmDstYB;
165 fccmDstZW = 1 - fccmDstXW - FccmDstYW;
166
167 mat_xyz[0][0] = fccmDstXR;
168 mat_xyz[0][1] = fccmDstXG;
169 mat_xyz[0][2] = fccmDstXB;
170 mat_xyz[1][0] = fccmDstYR;
171 mat_xyz[1][1] = fccmDstYG;
172 mat_xyz[1][2] = fccmDstYB;
173 mat_xyz[2][0] = fccmDstZR;
174 mat_xyz[2][1] = fccmDstZG;
175 mat_xyz[2][2] = fccmDstZB;
176
177 mat_3by3_inv(mat_xyz[0], mat_invxyz[0]);
178
179 matW[0] = fccmDstXW / FccmDstYW;
180 matW[1] = 1;
181 matW[2] = fccmDstZW / FccmDstYW;
182
183 mat_mul_float(mat_invxyz[0], matW, matE, 3, 3, 1);
184
185 mat_rgb2xyz_dst[0][0] = matE[0] * fccmDstXR;
186 mat_rgb2xyz_dst[1][0] = matE[0] * fccmDstYR;
187 mat_rgb2xyz_dst[2][0] = matE[0] * fccmDstZR;
188 mat_rgb2xyz_dst[0][1] = matE[1] * fccmDstXG;
189 mat_rgb2xyz_dst[1][1] = matE[1] * fccmDstYG;
190 mat_rgb2xyz_dst[2][1] = matE[1] * fccmDstZG;
191 mat_rgb2xyz_dst[0][2] = matE[2] * fccmDstXB;
192 mat_rgb2xyz_dst[1][2] = matE[2] * fccmDstYB;
193 mat_rgb2xyz_dst[2][2] = matE[2] * fccmDstZB;
194
195 float mat_invrgb2xyz_dst[3][3];
196 mat_3by3_inv(mat_rgb2xyz_dst[0], mat_invrgb2xyz_dst[0]);
197
198 float CCMmat[3][3];
199 mat_mul_float(mat_invrgb2xyz_dst[0], mat_rgb2xyz_src[0], CCMmat[0], 3, 3, 3);
200
201 color_matrix_calculation[0][0] = CCMmat[0][0];
202 color_matrix_calculation[0][1] = CCMmat[0][1];
203 color_matrix_calculation[0][2] = CCMmat[0][2];
204 color_matrix_calculation[0][3] = 0.0f;
205 color_matrix_calculation[1][0] = CCMmat[1][0];
206 color_matrix_calculation[1][1] = CCMmat[1][1];
207 color_matrix_calculation[1][2] = CCMmat[1][2];
208 color_matrix_calculation[1][3] = 0.0f;
209 color_matrix_calculation[2][0] = CCMmat[2][0];
210 color_matrix_calculation[2][1] = CCMmat[2][1];
211 color_matrix_calculation[2][2] = CCMmat[2][2];
212 color_matrix_calculation[2][3] = 0.0f;
213 }
214
215 // Used by ISA kernel
VpRenderHdr3DLutKernel(PVP_MHWINTERFACE hwInterface,VpKernelID kernelId,uint32_t kernelIndex,std::string kernelName,PVpAllocator allocator)216 VpRenderHdr3DLutKernel::VpRenderHdr3DLutKernel(PVP_MHWINTERFACE hwInterface, VpKernelID kernelId, uint32_t kernelIndex, std::string kernelName, PVpAllocator allocator) :
217 VpRenderKernelObj(hwInterface, kernelId, kernelIndex, VP_HDR_KERNEL_NAME, allocator)
218 {
219 VP_FUNC_CALL();
220 }
221
222 // Used by L0 kernel
VpRenderHdr3DLutKernel(PVP_MHWINTERFACE hwInterface,PVpAllocator allocator)223 VpRenderHdr3DLutKernel::VpRenderHdr3DLutKernel(PVP_MHWINTERFACE hwInterface, PVpAllocator allocator) :
224 VpRenderKernelObj(hwInterface, (VpKernelID)kernelHdr3DLutCalc, 0, VP_HDR_KERNEL_NAME_L0, allocator)
225 {
226 VP_FUNC_CALL();
227 m_kernelBinaryID = VP_ADV_KERNEL_BINARY_ID(kernelHdr3DLutCalc);
228 }
229
~VpRenderHdr3DLutKernel()230 VpRenderHdr3DLutKernel::~VpRenderHdr3DLutKernel()
231 {
232 }
233
SetupSurfaceState()234 MOS_STATUS VpRenderHdr3DLutKernel::SetupSurfaceState()
235 {
236 VP_FUNC_CALL();
237 VP_RENDER_CHK_NULL_RETURN(m_surfaceGroup);
238 VP_RENDER_CHK_NULL_RETURN(m_hwInterface);
239
240 PRENDERHAL_INTERFACE renderHal = m_hwInterface->m_renderHal;
241 PMOS_INTERFACE osInterface = m_hwInterface->m_osInterface;
242 m_surfaceBindingIndex.clear();
243 m_surfaceState.clear();
244 KERNEL_SURFACE_STATE_PARAM kernelSurfaceParam = {};
245 // Only need to specify binding index in surface parameters.
246 kernelSurfaceParam.surfaceOverwriteParams.updatedSurfaceParams = true;
247 kernelSurfaceParam.surfaceOverwriteParams.bindedKernel = true;
248 kernelSurfaceParam.surfaceOverwriteParams.bufferResource = true;
249
250 UpdateCurbeBindingIndex(SurfaceType3DLut, BI_VEBOX_HDR_3DLUT_3DLUT);
251 kernelSurfaceParam.isOutput = true;
252 m_surfaceState.insert(std::make_pair(SurfaceType3DLut, kernelSurfaceParam));
253 UpdateCurbeBindingIndex(SurfaceType3DLutCoef, BI_VEBOX_HDR_3DLUT_COEF);
254 kernelSurfaceParam.isOutput = false;
255 m_surfaceState.insert(std::make_pair(SurfaceType3DLutCoef, kernelSurfaceParam));
256
257 VP_RENDER_CHK_STATUS_RETURN(InitCoefSurface(m_maxDisplayLum, m_maxContentLevelLum, m_hdrMode));
258
259 return MOS_STATUS_SUCCESS;
260 }
261
CpPrepareResources()262 MOS_STATUS VpRenderHdr3DLutKernel::CpPrepareResources()
263 {
264 VP_FUNC_CALL();
265
266 PMOS_RESOURCE source[VPHAL_MAX_SOURCES] = {nullptr};
267 PMOS_RESOURCE target[VPHAL_MAX_TARGETS] = {nullptr};
268
269 if ((nullptr != m_hwInterface->m_osInterface) &&
270 (nullptr != m_hwInterface->m_osInterface->osCpInterface))
271 {
272 auto it = m_surfaceGroup->find(SurfaceType3DLutCoef);
273 VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
274 VP_RENDER_CHK_NULL_RETURN(surf);
275 source[0] = &(surf->osSurface->OsResource);
276
277 it = m_surfaceGroup->find(SurfaceType3DLut);
278 surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
279 VP_RENDER_CHK_NULL_RETURN(surf);
280
281 target[0] = &(surf->osSurface->OsResource);
282 m_hwInterface->m_osInterface->osCpInterface->PrepareResources((void **)source, 1, (void **)target, 1);
283 }
284 return MOS_STATUS_SUCCESS;
285 }
286
GetCurbeState(void * & curbe,uint32_t & curbeLength)287 MOS_STATUS VpRenderHdr3DLutKernel::GetCurbeState(void *&curbe, uint32_t &curbeLength)
288 {
289 VP_FUNC_CALL();
290 // init the hdr 3dlut static data
291 auto bindingMap3DLut = GetSurfaceBindingIndex(SurfaceType3DLut);
292 auto bindingMapCoef = GetSurfaceBindingIndex(SurfaceType3DLutCoef);
293 if (bindingMap3DLut.empty() || bindingMapCoef.empty())
294 {
295 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
296 }
297
298 MOS_ZeroMemory(&m_curbe, sizeof(m_curbe));
299 m_curbe.DW02.hdr3DLutSurface = *bindingMap3DLut.begin();
300 m_curbe.DW04.hdrCoefSurface = *bindingMapCoef.begin();
301 m_curbe.DW06.hdr3DLutSurfaceWidth = m_hdrLutSize;
302 m_curbe.DW06.hdr3DLutSurfaceHeight = m_hdrLutSize;
303
304 curbeLength = sizeof(VEBOX_HDR_3DLUT_STATIC_DATA);
305 curbe = (uint8_t *) & m_curbe;
306 VP_RENDER_NORMALMESSAGE("HDR 3DLut Kernel curbelength %d", curbeLength);
307 return MOS_STATUS_SUCCESS;
308 }
309
SetKernelConfigs(KERNEL_CONFIGS & kernelConfigs)310 MOS_STATUS VpRenderHdr3DLutKernel::SetKernelConfigs(KERNEL_CONFIGS &kernelConfigs)
311 {
312 VP_FUNC_CALL();
313 auto it = kernelConfigs.find((VpKernelID)kernelHdr3DLutCalc);
314
315 if (kernelConfigs.end() == it || nullptr == it->second)
316 {
317 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
318 }
319
320 PRENDER_HDR_3DLUT_CAL_PARAMS params = (PRENDER_HDR_3DLUT_CAL_PARAMS)it->second;
321
322 if (m_maxDisplayLum == params->maxDisplayLum && m_maxContentLevelLum == params->maxContentLevelLum &&
323 m_hdrMode == params->hdrMode && m_hdrLutSize == params->threadWidth)
324 {
325 // For such case, 3DLut calculation should be skipped in Policy::GetHdrExecutionCaps.
326 VP_RENDER_ASSERTMESSAGE("No change in 3D Lut parameters!");
327 }
328 else
329 {
330 m_maxDisplayLum = params->maxDisplayLum;
331 m_maxContentLevelLum = params->maxContentLevelLum;
332 m_hdrMode = params->hdrMode;
333 m_hdrLutSize = params->threadWidth;
334 VP_RENDER_NORMALMESSAGE("Maximum Display Luminance %d, Maximum Content Level Luminance %d, HDR mode %d, Lut size %d",
335 m_maxDisplayLum,
336 m_maxContentLevelLum,
337 m_hdrMode,
338 m_hdrLutSize);
339 }
340
341 return MOS_STATUS_SUCCESS;
342 }
343
344
GetWalkerSetting(KERNEL_WALKER_PARAMS & walkerParam,KERNEL_PACKET_RENDER_DATA & renderData)345 MOS_STATUS VpRenderHdr3DLutKernel::GetWalkerSetting(KERNEL_WALKER_PARAMS &walkerParam, KERNEL_PACKET_RENDER_DATA &renderData)
346 {
347
348 VP_FUNC_CALL();
349 RENDERHAL_KERNEL_PARAM kernelSettings;
350
351 VP_RENDER_CHK_STATUS_RETURN(GetKernelSettings(kernelSettings));
352 MOS_ZeroMemory(&walkerParam, sizeof(KERNEL_WALKER_PARAMS));
353
354 VP_RENDER_CHK_STATUS_RETURN(VpRenderKernelObj::GetWalkerSetting(m_walkerParam, renderData));
355 m_walkerParam.iBlocksX = m_hdrLutSize;
356 m_walkerParam.iBlocksY = m_hdrLutSize;
357 m_walkerParam.isVerticalPattern = false;
358 m_walkerParam.bSyncFlag = true;
359 walkerParam = m_walkerParam;
360 return MOS_STATUS_SUCCESS;
361 }
362
InitCoefSurface(const uint32_t maxDLL,const uint32_t maxCLL,const VPHAL_HDR_MODE hdrMode)363 MOS_STATUS VpRenderHdr3DLutKernel::InitCoefSurface(const uint32_t maxDLL, const uint32_t maxCLL, const VPHAL_HDR_MODE hdrMode)
364 {
365 VP_FUNC_CALL();
366 float *hdrcoefBuffer = nullptr;
367 int32_t oetfCurve = 0, tmMode = 0, tmSrcType = 0;
368 float *ccmMatrix = m_ccmMatrix;
369 float tmMaxCLL = 0.0f, tmMaxDLL = 0.0f;
370
371 MOS_ZeroMemory(m_ccmMatrix, sizeof(m_ccmMatrix));
372
373 // Get surface addr
374 auto it = m_surfaceGroup->find(SurfaceType3DLutCoef);
375 VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
376 VP_RENDER_CHK_NULL_RETURN(surf);
377
378 tmMaxCLL = (float)maxCLL;
379 tmMaxDLL = (float)maxDLL;
380
381 // Lock surface
382 uint8_t *lockedAddr = (uint8_t *)m_allocator->LockResourceForWrite(&surf->osSurface->OsResource);
383
384 VP_RENDER_CHK_NULL_RETURN(lockedAddr);
385
386 hdrcoefBuffer = (float *)lockedAddr;
387
388 if (hdrMode == VPHAL_HDR_MODE_TONE_MAPPING) // H2S
389 {
390 CalcCCMMatrix();
391 MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, color_matrix_calculation, sizeof(float) * 12);
392
393 tmMode = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2S;
394 oetfCurve = (OETF_CURVE_TYPE)OETF_SRGB;
395 tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709;
396 }
397 else // H2H
398 {
399 MOS_SecureMemcpy(ccmMatrix, sizeof(float) * 12, ccm_identity, sizeof(float) * 12);
400
401 tmMode = (TONE_MAPPING_MODE)TONE_MAPPING_MODE_H2H;
402 oetfCurve = (OETF_CURVE_TYPE)OETF_CURVE_HDR_2084;
403 tmSrcType = (TONE_MAPPING_SOURCE_TYPE)TONE_MAPPING_SOURCE_PSEUDO_Y_BT709;
404 }
405
406 // Fill Coefficient Surface: Media kernel define the layout of coefficients. Please don't change it.
407 const uint32_t pos_coef[17] = {7, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 54, 55, 62, 63};
408
409 // OETF curve
410 ((int *)hdrcoefBuffer)[pos_coef[0]] = oetfCurve;
411 // CCM
412 for (uint32_t i = 0; i < VP_CCM_MATRIX_SIZE; ++i)
413 {
414 hdrcoefBuffer[pos_coef[i + 1]] = ccmMatrix[i];
415 }
416 // TM Source Type
417 ((int *)hdrcoefBuffer)[pos_coef[13]] = tmSrcType;
418 // TM Mode
419 ((int *)hdrcoefBuffer)[pos_coef[14]] = tmMode;
420 // Max CLL and DLL
421 hdrcoefBuffer[pos_coef[15]] = tmMaxCLL;
422 hdrcoefBuffer[pos_coef[16]] = tmMaxDLL;
423
424 //Unlock
425 VP_RENDER_CHK_STATUS_RETURN(m_allocator->UnLock(&surf->osSurface->OsResource));
426
427 return MOS_STATUS_SUCCESS;
428 }
429
VpRenderHdr3DLutKernelCM(PVP_MHWINTERFACE hwInterface,VpKernelID kernelID,uint32_t kernelIndex,PVpAllocator allocator)430 VpRenderHdr3DLutKernelCM::VpRenderHdr3DLutKernelCM(PVP_MHWINTERFACE hwInterface, VpKernelID kernelID, uint32_t kernelIndex, PVpAllocator allocator) :
431 VpRenderHdr3DLutKernel(hwInterface, kernelID, kernelIndex, VP_HDR_KERNEL_NAME, allocator)
432 {
433 m_kernelBinaryID = VP_ADV_KERNEL_BINARY_ID(kernelID);
434 m_isAdvKernel = true;
435 }
436
~VpRenderHdr3DLutKernelCM()437 VpRenderHdr3DLutKernelCM::~VpRenderHdr3DLutKernelCM()
438 {
439 // No need to destroy dstArg.pData, which points to the local variable
440 // in VpHdrFilter.
441 }
442
Init(VpRenderKernel & kernel)443 MOS_STATUS VpRenderHdr3DLutKernelCM::Init(VpRenderKernel &kernel)
444 {
445 VP_FUNC_CALL();
446 m_kernelSize = kernel.GetKernelSize() + KERNEL_BINARY_PADDING_SIZE;
447
448 uint8_t *pKernelBin = (uint8_t *)kernel.GetKernelBinPointer();
449 VP_RENDER_CHK_NULL_RETURN(pKernelBin);
450
451 m_kernelBinary = pKernelBin + kernel.GetKernelBinOffset();
452 m_kernelArgs = kernel.GetKernelArgs();
453
454 return MOS_STATUS_SUCCESS;
455 }
456
GetWalkerSetting(KERNEL_WALKER_PARAMS & walkerParam,KERNEL_PACKET_RENDER_DATA & renderData)457 MOS_STATUS VpRenderHdr3DLutKernelCM::GetWalkerSetting(KERNEL_WALKER_PARAMS &walkerParam, KERNEL_PACKET_RENDER_DATA &renderData)
458 {
459 VP_FUNC_CALL();
460
461 VP_RENDER_CHK_STATUS_RETURN(VpRenderKernelObj::GetWalkerSetting(m_walkerParam, renderData));
462
463 walkerParam = m_walkerParam;
464 return MOS_STATUS_SUCCESS;
465 }
466
467 // Only for Adv kernels.
SetWalkerSetting(KERNEL_THREAD_SPACE & threadSpace,bool bSyncFlag,bool flushL1)468 MOS_STATUS VpRenderHdr3DLutKernelCM::SetWalkerSetting(KERNEL_THREAD_SPACE &threadSpace, bool bSyncFlag, bool flushL1)
469 {
470 VP_FUNC_CALL();
471 MOS_ZeroMemory(&m_walkerParam, sizeof(KERNEL_WALKER_PARAMS));
472
473 m_walkerParam.iBlocksX = threadSpace.uWidth;
474 m_walkerParam.iBlocksY = threadSpace.uHeight;
475 m_walkerParam.isVerticalPattern = false;
476 m_walkerParam.bSyncFlag = bSyncFlag;
477
478 return MOS_STATUS_SUCCESS;
479 }
480
SetKernelArgs(KERNEL_ARGS & kernelArgs,VP_PACKET_SHARED_CONTEXT * sharedContext)481 MOS_STATUS VpRenderHdr3DLutKernelCM::SetKernelArgs(KERNEL_ARGS &kernelArgs, VP_PACKET_SHARED_CONTEXT *sharedContext)
482 {
483 VP_FUNC_CALL();
484 if (kernelArgs.size() != m_kernelArgs.size())
485 {
486 VP_RENDER_ASSERTMESSAGE("The Kernel Arguments is not aligned!");
487 return MOS_STATUS_INVALID_PARAMETER;
488 }
489
490 for (uint32_t i = 0; i < m_kernelArgs.size(); ++i)
491 {
492 if (i >= kernelArgs.size())
493 {
494 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
495 }
496 KRN_ARG &srcArg = kernelArgs[i];
497 KRN_ARG &dstArg = m_kernelArgs[i];
498
499 if (srcArg.uIndex != dstArg.uIndex ||
500 srcArg.uSize != dstArg.uSize ||
501 srcArg.eArgKind != dstArg.eArgKind &&
502 dstArg.eArgKind != (srcArg.eArgKind & ~SURFACE_MASK) ||
503 srcArg.pData == nullptr)
504 {
505 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
506 }
507 dstArg.eArgKind = srcArg.eArgKind;
508 dstArg.pData = srcArg.pData;
509 srcArg.pData = nullptr;
510 }
511
512 return MOS_STATUS_SUCCESS;
513 }
514
GetCurbeState(void * & curbe,uint32_t & curbeLength)515 MOS_STATUS VpRenderHdr3DLutKernelCM::GetCurbeState(void *&curbe, uint32_t &curbeLength)
516 {
517 VP_FUNC_CALL();
518 curbeLength = 0;
519 for (auto arg : m_kernelArgs)
520 {
521 curbeLength += arg.uSize;
522 }
523
524 if (sizeof(m_curbe) != curbeLength)
525 {
526 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
527 }
528
529 uint8_t *data = (uint8_t *)&m_curbe;
530
531 for (auto &arg : m_kernelArgs)
532 {
533 if (arg.eArgKind == ARG_KIND_SURFACE)
534 {
535 // Resource need be added.
536 uint32_t *pSurfaceindex = static_cast<uint32_t *>(arg.pData);
537 auto bindingMap = GetSurfaceBindingIndex((SurfaceType)*pSurfaceindex);
538 if (bindingMap.empty())
539 {
540 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
541 }
542 *((uint32_t *)(data + arg.uOffsetInPayload)) = *bindingMap.begin();
543 }
544 else if (arg.eArgKind == ARG_KIND_GENERAL)
545 {
546 MOS_SecureMemcpy(data + arg.uOffsetInPayload, arg.uSize, arg.pData, arg.uSize);
547 }
548 else
549 {
550 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_UNIMPLEMENTED);
551 }
552 }
553
554 curbe = data;
555 VP_RENDER_NORMALMESSAGE("HDR 3DLut Kernel curbelength %d", curbeLength);
556 return MOS_STATUS_SUCCESS;
557 }
558
SetupSurfaceState()559 MOS_STATUS VpRenderHdr3DLutKernelCM::SetupSurfaceState()
560 {
561 VP_FUNC_CALL();
562 VP_RENDER_CHK_NULL_RETURN(m_surfaceGroup);
563 VP_RENDER_CHK_NULL_RETURN(m_hwInterface);
564
565 PRENDERHAL_INTERFACE renderHal = m_hwInterface->m_renderHal;
566 PMOS_INTERFACE osInterface = m_hwInterface->m_osInterface;
567 m_surfaceBindingIndex.clear();
568
569 KERNEL_SURFACE_STATE_PARAM kernelSurfaceParam = {};
570 kernelSurfaceParam.surfaceOverwriteParams.bindedKernel = true;
571 kernelSurfaceParam.surfaceOverwriteParams.bufferResource = true;
572
573 UpdateCurbeBindingIndex(SurfaceType3DLut, BI_VEBOX_HDR_3DLUT_3DLUT_CM);
574 kernelSurfaceParam.isOutput = true;
575 m_surfaceState.insert(std::make_pair(SurfaceType3DLut, kernelSurfaceParam));
576 UpdateCurbeBindingIndex(SurfaceType3DLutCoef, BI_VEBOX_HDR_3DLUT_COEF_CM);
577 kernelSurfaceParam.isOutput = false;
578 m_surfaceState.insert(std::make_pair(SurfaceType3DLutCoef, kernelSurfaceParam));
579
580 VP_RENDER_CHK_STATUS_RETURN(InitCoefSurface(m_maxDisplayLum, m_maxContentLevelLum, m_hdrMode));
581
582 return MOS_STATUS_SUCCESS;
583 }
584
CpPrepareResources()585 MOS_STATUS VpRenderHdr3DLutKernelCM::CpPrepareResources()
586 {
587 VP_FUNC_CALL();
588
589 PMOS_RESOURCE source[VPHAL_MAX_SOURCES] = {nullptr};
590 PMOS_RESOURCE target[VPHAL_MAX_TARGETS] = {nullptr};
591
592 if ((nullptr != m_hwInterface->m_osInterface) &&
593 (nullptr != m_hwInterface->m_osInterface->osCpInterface))
594 {
595 auto it = m_surfaceGroup->find(SurfaceType3DLutCoef);
596 VP_SURFACE *surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
597 VP_RENDER_CHK_NULL_RETURN(surf);
598 source[0] = &(surf->osSurface->OsResource);
599
600 it = m_surfaceGroup->find(SurfaceType3DLut);
601 surf = (m_surfaceGroup->end() != it) ? it->second : nullptr;
602 VP_RENDER_CHK_NULL_RETURN(surf);
603
604 target[0] = &(surf->osSurface->OsResource);
605 m_hwInterface->m_osInterface->osCpInterface->PrepareResources((void **)source, 1, (void **)target, 1);
606 }
607 return MOS_STATUS_SUCCESS;
608 }
609
SetKernelConfigs(KERNEL_CONFIGS & kernelConfigs)610 MOS_STATUS VpRenderHdr3DLutKernelCM::SetKernelConfigs(KERNEL_CONFIGS& kernelConfigs)
611 {
612 VP_FUNC_CALL();
613 auto it = kernelConfigs.find((VpKernelID)kernelHdr3DLutCalc);
614
615 if (kernelConfigs.end() == it || nullptr == it->second)
616 {
617 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
618 }
619
620 PRENDER_HDR_3DLUT_CAL_PARAMS params = (PRENDER_HDR_3DLUT_CAL_PARAMS)it->second;
621
622 if (m_maxDisplayLum == params->maxDisplayLum && m_maxContentLevelLum == params->maxContentLevelLum &&
623 m_hdrMode == params->hdrMode)
624 {
625 // For such case, 3DLut calculation should be skipped in Policy::GetHdrExecutionCaps.
626 VP_RENDER_ASSERTMESSAGE("No change in 3D Lut parameters!");
627 }
628 else
629 {
630 m_maxDisplayLum = params->maxDisplayLum;
631 m_maxContentLevelLum = params->maxContentLevelLum;
632 m_hdrMode = params->hdrMode;
633 VP_RENDER_NORMALMESSAGE("Maximum Display Luminance %d, Maximum Content Level Luminance %d, HDR mode %d",
634 m_maxDisplayLum, m_maxContentLevelLum, m_hdrMode);
635 }
636
637 return MOS_STATUS_SUCCESS;
638 }
639