1 /*
2 * Copyright (c) 2018-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file vphal_render_fast1ton.cpp
24 //! \brief Multi scaling output Surface alignment as 16 or non-16 bytes
25 //! \details Unified VP HAL multi scaling output Surface 16 or no-16 bytes alignment module interfaces
26 //!
27
28 #include "vphal_render_fast1ton.h"
29 #include "vphal_debug.h"
30 #include "vpkrnheader.h"
31 #include "vphal_render_composite.h"
32 #include "vphal_render_ief.h"
33 #include "vphal_renderer.h"
34
35 #define AVS_SAMPLE_INDEX0 1
36 #define AVS_SAMPLE_INDEX1 3
37 #define AVS_SAMPLE_INDEX2 5
38
39 #define FAST1TON_SRC_INDEX 0
40 // output1
41 #define FAST1TON_DST_INDEX0 1 // non-16 aligned
42 #define FAST1TON_DST_Y_INDEX0 1 // 16 aligned
43 #define FAST1TON_DST_UV_INDEX0 2 // 16 aligned
44 // output2
45 #define FAST1TON_DST_INDEX1 3 // non-16 aligned
46 #define FAST1TON_DST_Y_INDEX1 3 // 16 aligned
47 #define FAST1TON_DST_UV_INDEX1 4 // 16 aligned
48 // output3
49 #define FAST1TON_DST_INDEX2 5 // non-16 aligned
50 #define FAST1TON_DST_Y_INDEX2 5 // 16 aligned
51 #define FAST1TON_DST_UV_INDEX2 6 // 16 aligned
52
53 #define ALIGN16_DST0 1
54 #define ALIGN16_DST1 (1<<1)
55 #define ALIGN16_DST2 (1<<2)
56 //!
57 //! \brief fast 1toN Kernel params for Gen9 Media Walker
58 //!
59 static const RENDERHAL_KERNEL_PARAM g_fast1toN_MW_KernelParam[1] =
60 {
61 /* GRF_Count
62 | BT_Count
63 | | Sampler_Count
64 | | | Thread_Count
65 | | | | GRF_Start_Register
66 | | | | | CURBE_Length
67 | | | | | | block_width
68 | | | | | | | block_height
69 | | | | | | | | blocks_x
70 | | | | | | | | | blocks_y
71 | | | | | | | | | |*/
72 { 4, 34, 3, VPHAL_USE_MEDIA_THREADS_MAX, 0, 4, 16, 16, 1, 1 }, // R8
73 };
74
75 //!
76 //! \brief fast 1toN load the curbe data
77 //! \details Curbe data for fast 1toN
78 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
79 //! [in] Pointer to the fast 1toN State
80 //! \param PVPHAL_FAST1TON_RENDER_DATA pRenderData
81 //! [in] Pointer to fast 1toN render data
82 //! \param int32_t* piCurbeOffset
83 //! [out] Pointer to curbe data offset
84 //! \return MOS_STATUS
85 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
86 //!
VpHal_Fast1toNLoadStaticData(PVPHAL_FAST1TON_STATE pFast1toNState,PVPHAL_FAST1TON_RENDER_DATA pRenderData,int32_t * piCurbeOffset)87 MOS_STATUS VpHal_Fast1toNLoadStaticData(
88 PVPHAL_FAST1TON_STATE pFast1toNState,
89 PVPHAL_FAST1TON_RENDER_DATA pRenderData,
90 int32_t* piCurbeOffset)
91 {
92 PRENDERHAL_INTERFACE pRenderHal;
93 MEDIA_WALKER_FAST1TON_STATIC_DATA WalkerStatic;
94 MOS_STATUS eStatus;
95 int32_t iCurbeLength;
96
97 VPHAL_RENDER_CHK_NULL(pFast1toNState);
98 VPHAL_RENDER_CHK_NULL(pFast1toNState->pRenderHal);
99 eStatus = MOS_STATUS_SUCCESS;
100 pRenderHal = pFast1toNState->pRenderHal;
101
102 // Set relevant static data
103 MOS_ZeroMemory(&WalkerStatic, sizeof(MEDIA_WALKER_FAST1TON_STATIC_DATA));
104 if (pFast1toNState->pTarget[0])
105 {
106 WalkerStatic.DW0.Sampler_Index0 = AVS_SAMPLE_INDEX0;
107 if (pFast1toNState->Aligned16[0])
108 {
109 WalkerStatic.DW4.Dst_Index0 = FAST1TON_DST_INDEX0;
110 WalkerStatic.DW10.Dst_16Aligned |= ALIGN16_DST0;
111 #if defined(LINUX) && !defined(WDDM_LINUX)
112 WalkerStatic.DW10.Dst_pitch0 = pFast1toNState->pTarget[0]->OsResource.iPitch;
113 WalkerStatic.DW12.Dst_UVOffset0 = pFast1toNState->pTarget[0]->OsResource.iHeight;
114 #endif
115 }
116 else
117 {
118 WalkerStatic.DW4.Dst_Y_Index0 = FAST1TON_DST_Y_INDEX0;
119 WalkerStatic.DW5.Dst_UV_Index0 = FAST1TON_DST_UV_INDEX0;
120 WalkerStatic.DW10.Dst_pitch0 = pFast1toNState->pTarget[0]->dwPitch;
121 WalkerStatic.DW12.Dst_UVOffset0 = pFast1toNState->pTarget[0]->dwHeight;
122 }
123 WalkerStatic.DW14.ScalingStep_H0 = pRenderData->ScalingStep_H[0];
124 WalkerStatic.DW15.ScalingStep_V0 = pRenderData->ScalingStep_V[0];
125 }
126
127 if (pFast1toNState->pTarget[1])
128 {
129 WalkerStatic.DW1.Sampler_Index1 = AVS_SAMPLE_INDEX1;
130 if (pFast1toNState->Aligned16[1])
131 {
132 WalkerStatic.DW6.Dst_Index1 = FAST1TON_DST_INDEX1;
133 WalkerStatic.DW10.Dst_16Aligned |= ALIGN16_DST1;
134 #if defined(LINUX) && !defined(WDDM_LINUX)
135 WalkerStatic.DW11.Dst_pitch1 = pFast1toNState->pTarget[1]->OsResource.iPitch;
136 WalkerStatic.DW12.Dst_UVOffset1 = pFast1toNState->pTarget[1]->OsResource.iHeight;
137 #endif
138 }
139 else
140 {
141 WalkerStatic.DW6.Dst_Y_Index1 = FAST1TON_DST_Y_INDEX1;
142 WalkerStatic.DW7.Dst_UV_Index1 = FAST1TON_DST_UV_INDEX1;
143 WalkerStatic.DW11.Dst_pitch1 = pFast1toNState->pTarget[1]->dwPitch;
144 WalkerStatic.DW12.Dst_UVOffset1 = pFast1toNState->pTarget[1]->dwHeight;
145 }
146 WalkerStatic.DW16.ScalingStep_H1 = pRenderData->ScalingStep_H[1];
147 WalkerStatic.DW17.ScalingStep_V1 = pRenderData->ScalingStep_V[1];
148 }
149
150 if (pFast1toNState->pTarget[2])
151 {
152 WalkerStatic.DW2.Sampler_Index2 = AVS_SAMPLE_INDEX2;
153 if (pFast1toNState->Aligned16[2])
154 {
155 WalkerStatic.DW8.Dst_Index2 = FAST1TON_DST_INDEX2;
156 WalkerStatic.DW10.Dst_16Aligned |= ALIGN16_DST2;
157 #if defined(LINUX) && !defined(WDDM_LINUX)
158 WalkerStatic.DW11.Dst_pitch2 = pFast1toNState->pTarget[2]->OsResource.iPitch;
159 WalkerStatic.DW13.Dst_UVOffset2 = pFast1toNState->pTarget[2]->OsResource.iHeight;
160 #endif
161 }
162 else
163 {
164 WalkerStatic.DW8.Dst_Y_Index2 = FAST1TON_DST_Y_INDEX2;
165 WalkerStatic.DW9.Dst_UV_Index2 = FAST1TON_DST_UV_INDEX2;
166 WalkerStatic.DW11.Dst_pitch2 = pFast1toNState->pTarget[2]->dwPitch;
167 WalkerStatic.DW13.Dst_UVOffset2 = pFast1toNState->pTarget[2]->dwHeight;
168 }
169 WalkerStatic.DW18.ScalingStep_H2 = pRenderData->ScalingStep_H[2];
170 WalkerStatic.DW19.ScalingStep_V2 = pRenderData->ScalingStep_V[2];
171 }
172 WalkerStatic.DW3.Src_Index = FAST1TON_SRC_INDEX;
173
174 iCurbeLength = sizeof(MEDIA_WALKER_FAST1TON_STATIC_DATA);
175
176 *piCurbeOffset = pRenderHal->pfnLoadCurbeData(
177 pRenderHal,
178 pRenderData->pMediaState,
179 &WalkerStatic,
180 iCurbeLength);
181
182 if (*piCurbeOffset < 0)
183 {
184 eStatus = MOS_STATUS_UNKNOWN;
185 goto finish;
186 }
187
188 finish:
189 VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
190 return eStatus;
191 }
192
193 //!
194 //! \brief fast 1toN kernel setup
195 //! \details Kernel setup for bitcopy
196 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
197 //! [in] Pointer to the fast 1toN State
198 //! \param PVPHAL_FAST1TON_RENDER_DATA pRenderData
199 //! [in] Pointer to fast 1toN render data
200 //! \return MOS_STATUS
201 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
202 //!
VpHal_Fast1toNSetupKernel(PVPHAL_FAST1TON_STATE pFast1toNState,PVPHAL_FAST1TON_RENDER_DATA pRenderData)203 MOS_STATUS VpHal_Fast1toNSetupKernel(
204 PVPHAL_FAST1TON_STATE pFast1toNState,
205 PVPHAL_FAST1TON_RENDER_DATA pRenderData)
206 {
207 MOS_STATUS eStatus;
208 Kdll_CacheEntry *pCacheEntryTable;
209
210 VPHAL_RENDER_CHK_NULL(pFast1toNState);
211 eStatus = MOS_STATUS_SUCCESS;
212 pCacheEntryTable =
213 pFast1toNState->pKernelDllState->ComponentKernelCache.pCacheEntries;
214
215 // Set the Kernel Parameters
216 pRenderData->pKernelParam = pFast1toNState->pKernelParamTable;
217 pRenderData->PerfTag = VPHAL_NONE;
218
219 // Set curbe & inline data size
220 pRenderData->iCurbeLength = pRenderData->pKernelParam->CURBE_Length * GRF_SIZE;
221
222 // Set Kernel entry
223 pRenderData->KernelEntry.iKUID = IDR_VP_fast_avs_1_to_n;
224 pRenderData->KernelEntry.iKCID = -1;
225 pRenderData->KernelEntry.iSize = pCacheEntryTable[IDR_VP_fast_avs_1_to_n].iSize;
226 pRenderData->KernelEntry.pBinary = pCacheEntryTable[IDR_VP_fast_avs_1_to_n].pBinary;
227
228 finish:
229 return eStatus;
230 }
231
232 //!
233 //! \brief Recalculate Sampler Avs 8x8 Horizontal/Vertical scaling table
234 //! \details Recalculate Sampler Avs 8x8 Horizontal/Vertical scaling table
235 //! \param MOS_FORMAT SrcFormat
236 //! [in] Source Format
237 //! \param float fScale
238 //! [in] Horizontal or Vertical Scale Factor
239 //! \param bool bVertical
240 //! [in] true if Vertical Scaling, else Horizontal Scaling
241 //! \param uint32_t dwChromaSiting
242 //! [in] Chroma Siting
243 //! \param bool bBalancedFilter
244 //! [in] true if Gen9+, balanced filter
245 //! \param bool b8TapAdaptiveEnable
246 //! [in] true if 8Tap Adaptive Enable
247 //! \param PVPHAL_AVS_PARAMS pAvsParams
248 //! [in/out] Pointer to AVS Params
249 //! \return MOS_STATUS
250 //!
VpHal_Fast1toNSamplerAvsCalcScalingTable(MOS_FORMAT SrcFormat,float fScale,bool bVertical,uint32_t dwChromaSiting,bool bBalancedFilter,bool b8TapAdaptiveEnable,PMHW_AVS_PARAMS pAvsParams)251 static MOS_STATUS VpHal_Fast1toNSamplerAvsCalcScalingTable(
252 MOS_FORMAT SrcFormat,
253 float fScale,
254 bool bVertical,
255 uint32_t dwChromaSiting,
256 bool bBalancedFilter,
257 bool b8TapAdaptiveEnable,
258 PMHW_AVS_PARAMS pAvsParams)
259 {
260 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
261 MHW_PLANE Plane;
262 int32_t iUvPhaseOffset;
263 uint32_t dwHwPhrase;
264 uint32_t YCoefTableSize;
265 uint32_t UVCoefTableSize;
266 float fScaleParam;
267 int32_t* piYCoefsParam;
268 int32_t* piUVCoefsParam;
269 float fHPStrength;
270
271 VPHAL_RENDER_CHK_NULL(pAvsParams);
272 VPHAL_RENDER_CHK_NULL(pAvsParams->piYCoefsY);
273 VPHAL_RENDER_CHK_NULL(pAvsParams->piYCoefsX);
274 VPHAL_RENDER_CHK_NULL(pAvsParams->piUVCoefsY);
275 VPHAL_RENDER_CHK_NULL(pAvsParams->piUVCoefsX);
276
277 if (bBalancedFilter)
278 {
279 YCoefTableSize = POLYPHASE_Y_COEFFICIENT_TABLE_SIZE_G9;
280 UVCoefTableSize = POLYPHASE_UV_COEFFICIENT_TABLE_SIZE_G9;
281 dwHwPhrase = NUM_HW_POLYPHASE_TABLES_G9;
282 }
283 else
284 {
285 YCoefTableSize = POLYPHASE_Y_COEFFICIENT_TABLE_SIZE_G8;
286 UVCoefTableSize = POLYPHASE_UV_COEFFICIENT_TABLE_SIZE_G8;
287 dwHwPhrase = MHW_NUM_HW_POLYPHASE_TABLES;
288 }
289
290 fHPStrength = 0.0F;
291 piYCoefsParam = bVertical ? pAvsParams->piYCoefsY : pAvsParams->piYCoefsX;
292 piUVCoefsParam = bVertical ? pAvsParams->piUVCoefsY : pAvsParams->piUVCoefsX;
293 fScaleParam = bVertical ? pAvsParams->fScaleY : pAvsParams->fScaleX;
294
295 // Recalculate Horizontal or Vertical scaling table
296 if (SrcFormat != pAvsParams->Format || fScale != fScaleParam)
297 {
298 MOS_ZeroMemory(piYCoefsParam, YCoefTableSize);
299 MOS_ZeroMemory(piUVCoefsParam, UVCoefTableSize);
300
301 // 4-tap filtering for RGB format G-channel if 8tap adaptive filter is not enabled.
302 Plane = (IS_RGB32_FORMAT(SrcFormat) && !b8TapAdaptiveEnable) ? MHW_U_PLANE : MHW_Y_PLANE;
303 if (bVertical)
304 {
305 pAvsParams->fScaleY = fScale;
306 }
307 else
308 {
309 pAvsParams->fScaleX = fScale;
310 }
311
312 // For 1x scaling in horizontal direction, use special coefficients for filtering
313 // we don't do this when bForcePolyPhaseCoefs flag is set
314 if (fScale == 1.0F && !pAvsParams->bForcePolyPhaseCoefs)
315 {
316 VPHAL_RENDER_CHK_STATUS(Mhw_SetNearestModeTable(
317 piYCoefsParam,
318 Plane,
319 bBalancedFilter));
320 // If the 8-tap adaptive is enabled for all channel, then UV/RB use the same coefficient as Y/G
321 // So, coefficient for UV/RB channels caculation can be passed
322 if (!b8TapAdaptiveEnable)
323 {
324 VPHAL_RENDER_CHK_STATUS(Mhw_SetNearestModeTable(
325 piUVCoefsParam,
326 MHW_U_PLANE,
327 bBalancedFilter));
328 }
329 }
330 else
331 {
332 // Clamp the Scaling Factor if > 1.0x
333 fScale = MOS_MIN(1.0F, fScale);
334
335 VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesY(
336 piYCoefsParam,
337 fScale,
338 Plane,
339 SrcFormat,
340 fHPStrength,
341 true,
342 dwHwPhrase,
343 0));
344
345 // If the 8-tap adaptive is enabled for all channel, then UV/RB use the same coefficient as Y/G
346 // So, coefficient for UV/RB channels caculation can be passed
347 if (!b8TapAdaptiveEnable)
348 {
349 if (!bBalancedFilter)
350 {
351 VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesY(
352 piUVCoefsParam,
353 fScale,
354 MHW_U_PLANE,
355 SrcFormat,
356 fHPStrength,
357 true,
358 dwHwPhrase,
359 0));
360 }
361 else
362 {
363 // If Chroma Siting info is present
364 if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_TOP : MHW_CHROMA_SITING_HORZ_LEFT))
365 {
366 // No Chroma Siting
367 VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesUV(
368 piUVCoefsParam,
369 2.0F,
370 fScale));
371 }
372 else
373 {
374 // Chroma siting offset needs to be added
375 if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_CENTER : MHW_CHROMA_SITING_HORZ_CENTER))
376 {
377 iUvPhaseOffset = MOS_UF_ROUND(0.5F * 16.0F); // U0.4
378 }
379 else //if (ChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_BOTTOM : MHW_CHROMA_SITING_HORZ_RIGHT))
380 {
381 iUvPhaseOffset = MOS_UF_ROUND(1.0F * 16.0F); // U0.4
382 }
383
384 VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesUVOffset(
385 piUVCoefsParam,
386 3.0F,
387 fScale,
388 iUvPhaseOffset));
389 }
390 }
391 }
392 }
393 }
394
395 finish:
396 return eStatus;
397 }
398
399 //!
400 //! \brief Set Sampler Avs 8x8 Table for LGCA
401 //! \details Set Sampler Avs 8x8 Table for LGCA
402 //! \param PRENDERHAL_INTERFACE pRenderHal
403 //! [in] Pointer to RenderHal Interface Structure
404 //! \param PMHW_SAMPLER_STATE_PARAM pSamplerStateParams
405 //! [in] Pointer to Sampler State Params
406 //! \param PMHW_AVS_PARAMS pAvsParams
407 //! [in/out] Pointer to AVS Params
408 //! \param MOS_FORMAT SrcFormat
409 //! [in] Source Format
410 //! \return MOS_STATUS
411 //!
VpHal_Fast1toNSetSamplerAvsTableParam(PRENDERHAL_INTERFACE pRenderHal,PMHW_SAMPLER_STATE_PARAM pSamplerStateParams,PMHW_AVS_PARAMS pAvsParams,MOS_FORMAT SrcFormat,float fScaleX,float fScaleY,uint32_t dwChromaSiting)412 static MOS_STATUS VpHal_Fast1toNSetSamplerAvsTableParam(
413 PRENDERHAL_INTERFACE pRenderHal,
414 PMHW_SAMPLER_STATE_PARAM pSamplerStateParams,
415 PMHW_AVS_PARAMS pAvsParams,
416 MOS_FORMAT SrcFormat,
417 float fScaleX,
418 float fScaleY,
419 uint32_t dwChromaSiting)
420 {
421 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
422 bool bBalancedFilter;
423 PMHW_SAMPLER_AVS_TABLE_PARAM pMhwSamplerAvsTableParam;
424
425 VPHAL_RENDER_CHK_NULL(pRenderHal);
426 VPHAL_RENDER_CHK_NULL(pSamplerStateParams);
427 VPHAL_RENDER_CHK_NULL(pAvsParams);
428
429 pMhwSamplerAvsTableParam = pSamplerStateParams->Avs.pMhwSamplerAvsTableParam;
430
431 pMhwSamplerAvsTableParam->b8TapAdaptiveEnable = pSamplerStateParams->Avs.b8TapAdaptiveEnable;
432 pMhwSamplerAvsTableParam->byteTransitionArea8Pixels = MEDIASTATE_AVS_TRANSITION_AREA_8_PIXELS;
433 pMhwSamplerAvsTableParam->byteTransitionArea4Pixels = MEDIASTATE_AVS_TRANSITION_AREA_4_PIXELS;
434 pMhwSamplerAvsTableParam->byteMaxDerivative8Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_8_PIXELS;
435 pMhwSamplerAvsTableParam->byteMaxDerivative4Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_4_PIXELS;
436 pMhwSamplerAvsTableParam->byteDefaultSharpnessLevel = MEDIASTATE_AVS_SHARPNESS_LEVEL_SHARP;
437
438 // Enable Adaptive Filtering, if it is being upscaled
439 // in either direction. we must check for this before clamping the SF.
440 if ((IS_YUV_FORMAT(SrcFormat) && (fScaleX > 1.0F || fScaleY > 1.0F)) ||
441 pMhwSamplerAvsTableParam->b8TapAdaptiveEnable)
442 {
443 pMhwSamplerAvsTableParam->bBypassXAdaptiveFiltering = false;
444 pMhwSamplerAvsTableParam->bBypassYAdaptiveFiltering = false;
445 if (pMhwSamplerAvsTableParam->b8TapAdaptiveEnable)
446 {
447 pMhwSamplerAvsTableParam->bAdaptiveFilterAllChannels = true;
448
449 if (IS_RGB_FORMAT(SrcFormat))
450 {
451 pMhwSamplerAvsTableParam->bEnableRGBAdaptive = true;
452 }
453 }
454 }
455 else
456 {
457 pMhwSamplerAvsTableParam->bBypassXAdaptiveFiltering = true;
458 pMhwSamplerAvsTableParam->bBypassYAdaptiveFiltering = true;
459 }
460
461 // No changes to AVS parameters -> skip
462 if (SrcFormat == pAvsParams->Format &&
463 fScaleX == pAvsParams->fScaleX &&
464 fScaleY == pAvsParams->fScaleY)
465 {
466 goto finish;
467 }
468
469 // AVS Coefficients don't change for Scaling Factors > 1.0x
470 // Hence recalculation is avoided
471 if (fScaleX > 1.0F && pAvsParams->fScaleX > 1.0F)
472 {
473 pAvsParams->fScaleX = fScaleX;
474 }
475
476 // AVS Coefficients don't change for Scaling Factors > 1.0x
477 // Hence recalculation is avoided
478 if (fScaleY > 1.0F && pAvsParams->fScaleY > 1.0F)
479 {
480 pAvsParams->fScaleY = fScaleY;
481 }
482
483 bBalancedFilter = true;
484 // Recalculate Horizontal scaling table
485 VPHAL_HW_CHK_STATUS(VpHal_Fast1toNSamplerAvsCalcScalingTable(
486 SrcFormat,
487 fScaleX,
488 false,
489 dwChromaSiting,
490 bBalancedFilter,
491 pMhwSamplerAvsTableParam->b8TapAdaptiveEnable ? true : false,
492 pAvsParams));
493
494 // Recalculate Vertical scaling table
495 VPHAL_HW_CHK_STATUS(VpHal_Fast1toNSamplerAvsCalcScalingTable(
496 SrcFormat,
497 fScaleY,
498 true,
499 dwChromaSiting,
500 bBalancedFilter,
501 pMhwSamplerAvsTableParam->b8TapAdaptiveEnable ? true : false,
502 pAvsParams));
503
504 pMhwSamplerAvsTableParam->bIsCoeffExtraEnabled = true;
505 // Save format used to calculate AVS parameters
506 pAvsParams->Format = SrcFormat;
507 pMhwSamplerAvsTableParam->b4TapGY = (IS_RGB32_FORMAT(SrcFormat) && !pMhwSamplerAvsTableParam->b8TapAdaptiveEnable);
508 pMhwSamplerAvsTableParam->b4TapRBUV = (!pMhwSamplerAvsTableParam->b8TapAdaptiveEnable);
509
510 VpHal_RenderCommonSetAVSTableParam(pAvsParams, pMhwSamplerAvsTableParam);
511
512 finish:
513 return eStatus;
514 }
515
516 //!
517 //! \brief fast 1toN setup HW states
518 //! \details Setup HW states for fast 1toN
519 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
520 //! [in] Pointer to the fast 1toN State
521 //! \param PVPHAL_FAST1TON_RENDER_DATA pRenderData
522 //! [in/out] Pointer to fast 1toN render data
523 //! \return MOS_STATUS
524 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
525 //!
VpHal_Fast1toNSetSamplerStates(PVPHAL_FAST1TON_STATE pFast1toNState,PVPHAL_FAST1TON_RENDER_DATA pRenderData)526 MOS_STATUS VpHal_Fast1toNSetSamplerStates(
527 PVPHAL_FAST1TON_STATE pFast1toNState,
528 PVPHAL_FAST1TON_RENDER_DATA pRenderData)
529 {
530 MOS_STATUS eStatus;
531 PRENDERHAL_INTERFACE pRenderHal;
532 PMHW_SAMPLER_STATE_PARAM pSamplerStateParams;
533 uint32_t index;
534
535 VPHAL_PUBLIC_CHK_NULL(pFast1toNState);
536 VPHAL_PUBLIC_CHK_NULL(pRenderData);
537
538 pRenderHal = pFast1toNState->pRenderHal;
539 VPHAL_PUBLIC_CHK_NULL(pRenderHal);
540
541 for (index = 0; index < pFast1toNState->uDstCount; index++)
542 {
543 pSamplerStateParams = &pRenderData->SamplerStateParams[index];
544 pSamplerStateParams->bInUse = true;
545 pSamplerStateParams->SamplerType = MHW_SAMPLER_TYPE_AVS;
546 pSamplerStateParams->Avs.bHdcDwEnable = true;
547 pSamplerStateParams->Avs.b8TapAdaptiveEnable = false;
548 pSamplerStateParams->Avs.bEnableAVS = true;
549 pSamplerStateParams->Avs.WeakEdgeThr = DETAIL_WEAK_EDGE_THRESHOLD;
550 pSamplerStateParams->Avs.StrongEdgeThr = DETAIL_STRONG_EDGE_THRESHOLD;
551 pSamplerStateParams->Avs.StrongEdgeWght = DETAIL_STRONG_EDGE_WEIGHT;
552 pSamplerStateParams->Avs.RegularWght = DETAIL_REGULAR_EDGE_WEIGHT;
553 pSamplerStateParams->Avs.NonEdgeWght = DETAIL_NON_EDGE_WEIGHT;
554 pSamplerStateParams->Unorm.SamplerFilterMode = MHW_SAMPLER_FILTER_NEAREST;
555 pSamplerStateParams->Avs.pMhwSamplerAvsTableParam = &pFast1toNState->mhwSamplerAvsTableParam[index];
556
557 VPHAL_RENDER_CHK_STATUS(VpHal_Fast1toNSetSamplerAvsTableParam(
558 pRenderHal,
559 pSamplerStateParams,
560 pRenderData->pAVSParameters[index],
561 pFast1toNState->pSource->Format,
562 pRenderData->ScalingRatio_H[index],
563 pRenderData->ScalingRatio_V[index],
564 MHW_CHROMA_SITING_HORZ_LEFT | MHW_CHROMA_SITING_VERT_TOP));
565 }
566
567 eStatus = pRenderHal->pfnSetSamplerStates(
568 pRenderHal,
569 pRenderData->iMediaID,
570 &pRenderData->SamplerStateParams[0],
571 pFast1toNState->uDstCount);
572
573 finish:
574 return eStatus;
575 }
576
577 //!
578 //! \brief fast 1toN setup HW states
579 //! \details Setup HW states for fast 1toN
580 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
581 //! [in] Pointer to the fast 1toN State
582 //! \param PVPHAL_FAST1TON_RENDER_DATA pRenderData
583 //! [in/out] Pointer to fast 1toN render data
584 //! \return MOS_STATUS
585 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
586 //!
VpHal_Fast1toNSetupHwStates(PVPHAL_FAST1TON_STATE pFast1toNState,PVPHAL_FAST1TON_RENDER_DATA pRenderData)587 MOS_STATUS VpHal_Fast1toNSetupHwStates(
588 PVPHAL_FAST1TON_STATE pFast1toNState,
589 PVPHAL_FAST1TON_RENDER_DATA pRenderData)
590 {
591 PRENDERHAL_INTERFACE pRenderHal;
592 int32_t iKrnAllocation;
593 int32_t iCurbeOffset;
594 MOS_STATUS eStatus;
595 int32_t iThreadCount;
596 MHW_KERNEL_PARAM MhwKernelParam;
597
598 VPHAL_RENDER_CHK_NULL(pFast1toNState);
599 VPHAL_RENDER_CHK_NULL(pRenderData);
600
601 eStatus = MOS_STATUS_SUCCESS;
602 pRenderHal = pFast1toNState->pRenderHal;
603 VPHAL_RENDER_CHK_NULL(pRenderHal);
604
605 // Allocate and reset media state
606 pRenderData->pMediaState = pRenderHal->pfnAssignMediaState(pRenderHal, (RENDERHAL_COMPONENT)RENDERHAL_COMPONENT_FAST1TON);
607 VPHAL_RENDER_CHK_NULL(pRenderData->pMediaState);
608
609 // Allocate and reset SSH instance
610 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnAssignSshInstance(pRenderHal));
611
612 // Assign and Reset Binding Table
613 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnAssignBindingTable(
614 pRenderHal,
615 &pRenderData->iBindingTable));
616
617 // Setup surface states
618 VPHAL_RENDER_CHK_STATUS(pFast1toNState->pfnSetupSurfaceStates(
619 pFast1toNState,
620 pRenderData));
621
622 // load static data
623 VPHAL_RENDER_CHK_STATUS(pFast1toNState->pfnLoadStaticData(
624 pFast1toNState,
625 pRenderData,
626 &iCurbeOffset));
627
628 if (pFast1toNState->pPerfData->CompMaxThreads.bEnabled)
629 {
630 iThreadCount = pFast1toNState->pPerfData->CompMaxThreads.uiVal;
631 }
632 else
633 {
634 iThreadCount = pRenderData->pKernelParam->Thread_Count;
635 }
636
637 // Setup VFE State params.
638 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnSetVfeStateParams(
639 pRenderHal,
640 MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
641 iThreadCount,
642 pRenderData->iCurbeLength,
643 pRenderData->iInlineLength,
644 nullptr));
645
646 // Load kernel to GSH
647 INIT_MHW_KERNEL_PARAM(MhwKernelParam, &pRenderData->KernelEntry);
648 iKrnAllocation = pRenderHal->pfnLoadKernel(
649 pRenderHal,
650 pRenderData->pKernelParam,
651 &MhwKernelParam,
652 nullptr);
653
654 if (iKrnAllocation < 0)
655 {
656 eStatus = MOS_STATUS_UNKNOWN;
657 goto finish;
658 }
659
660 // Allocate Media ID, link to kernel
661 pRenderData->iMediaID = pRenderHal->pfnAllocateMediaID(
662 pRenderHal,
663 iKrnAllocation,
664 pRenderData->iBindingTable,
665 iCurbeOffset,
666 (pRenderData->pKernelParam->CURBE_Length << 5),
667 0,
668 nullptr);
669
670 if (pRenderData->iMediaID < 0)
671 {
672 eStatus = MOS_STATUS_UNKNOWN;
673 goto finish;
674 }
675
676 // Set Sampler states for this Media ID
677 VPHAL_RENDER_CHK_STATUS(pFast1toNState->pfnSetSamplerStates(
678 pFast1toNState,
679 pRenderData));
680
681 finish:
682 VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
683 return eStatus;
684 }
685
686 //!
687 //! \brief fast 1toN media walker setup
688 //! \details Media walker setup for bitcopy
689 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
690 //! [in] Pointer to the fast 1toN State
691 //! \param PVPHAL_FAST1TON_RENDER_DATA pRenderData
692 //! [in] Pointer to fast 1toN render data
693 //! \param PMHW_WALKER_PARAMS pWalkerParams
694 //! [in/out] Pointer to Walker params
695 //! \return MOS_STATUS
696 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
697 //!
VpHal_Fast1toNRenderMediaWalker(PVPHAL_FAST1TON_STATE pFast1toNState,PVPHAL_FAST1TON_RENDER_DATA pRenderData,PMHW_WALKER_PARAMS pWalkerParams)698 MOS_STATUS VpHal_Fast1toNRenderMediaWalker(
699 PVPHAL_FAST1TON_STATE pFast1toNState,
700 PVPHAL_FAST1TON_RENDER_DATA pRenderData,
701 PMHW_WALKER_PARAMS pWalkerParams)
702 {
703 PRENDERHAL_INTERFACE pRenderHal;
704 int32_t dwWidth = 0;
705 int32_t dwHeight = 0;
706 MOS_STATUS eStatus;
707
708 eStatus = MOS_STATUS_SUCCESS;
709 pRenderHal = pFast1toNState->pRenderHal;
710
711 // Calculate how many media object commands are needed.
712 // Using the Max output size to calculate the iBlock.
713 for (uint32_t i = 0; i < pFast1toNState->uDstCount; i++)
714 {
715 dwWidth = MOS_MAX(MOS_ALIGN_CEIL((pFast1toNState->pTarget[i]->rcSrc.right -
716 pFast1toNState->pTarget[i]->rcSrc.left),
717 pRenderData->pKernelParam->block_width), dwWidth);
718 dwHeight = MOS_MAX(MOS_ALIGN_CEIL((pFast1toNState->pTarget[i]->rcSrc.bottom -
719 pFast1toNState->pTarget[i]->rcSrc.top),
720 pRenderData->pKernelParam->block_height), dwHeight);
721 }
722
723 pRenderData->iBlocksX = dwWidth / pRenderData->pKernelParam->block_width;
724 pRenderData->iBlocksY = dwHeight / pRenderData->pKernelParam->block_height;
725
726 // Set walker cmd params - Rasterscan
727 MOS_ZeroMemory(pWalkerParams, sizeof(*pWalkerParams));
728
729 pWalkerParams->InterfaceDescriptorOffset = pRenderData->iMediaID;
730
731 pWalkerParams->dwGlobalLoopExecCount = 1;
732 pWalkerParams->dwLocalLoopExecCount = pRenderData->iBlocksY - 1;
733
734 pWalkerParams->GlobalResolution.x = pRenderData->iBlocksX;
735 pWalkerParams->GlobalResolution.y = pRenderData->iBlocksY;
736
737 pWalkerParams->GlobalStart.x = 0;
738 pWalkerParams->GlobalStart.y = 0;
739
740 pWalkerParams->GlobalOutlerLoopStride.x = pRenderData->iBlocksX;
741 pWalkerParams->GlobalOutlerLoopStride.y = 0;
742
743 pWalkerParams->GlobalInnerLoopUnit.x = 0;
744 pWalkerParams->GlobalInnerLoopUnit.y = pRenderData->iBlocksY;
745
746 pWalkerParams->BlockResolution.x = pRenderData->iBlocksX;
747 pWalkerParams->BlockResolution.y = pRenderData->iBlocksY;
748
749 pWalkerParams->LocalStart.x = 0;
750 pWalkerParams->LocalStart.y = 0;
751
752 pWalkerParams->LocalEnd.x = pRenderData->iBlocksX - 1;
753 pWalkerParams->LocalEnd.y = 0;
754
755 pWalkerParams->LocalOutLoopStride.x = 0;
756 pWalkerParams->LocalOutLoopStride.y = 1;
757
758 pWalkerParams->LocalInnerLoopUnit.x = 1;
759 pWalkerParams->LocalInnerLoopUnit.y = 0;
760
761 return eStatus;
762 }
763
764 //!
765 //! \brief fast 1toN renderer
766 //! \details Renderer function for fast 1toN
767 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
768 //! [in] Pointer to the fast 1toN State
769 //! \param PVPHAL_RENDER_PARAMS pRenderParams
770 //! [in] Pointer to fast 1toN render params
771 //! \return MOS_STATUS
772 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
773 //!
VpHal_Fast1toNRender(PVPHAL_FAST1TON_STATE pFast1toNState,PVPHAL_RENDER_PARAMS pRenderParams)774 MOS_STATUS VpHal_Fast1toNRender(
775 PVPHAL_FAST1TON_STATE pFast1toNState,
776 PVPHAL_RENDER_PARAMS pRenderParams)
777 {
778 MOS_STATUS eStatus;
779 PRENDERHAL_INTERFACE pRenderHal;
780 PMOS_INTERFACE pOsInterface;
781 MHW_WALKER_PARAMS WalkerParams;
782 VPHAL_FAST1TON_RENDER_DATA RenderData;
783 PRENDERHAL_L3_CACHE_SETTINGS pCacheSettings = nullptr;
784 uint32_t dwInputRegionHeight;
785 uint32_t dwInputRegionWidth;
786 uint32_t dwOutputRegionHeight;
787 uint32_t dwOutputRegionWidth;
788 uint32_t index;
789
790 VPHAL_RENDER_CHK_NULL(pFast1toNState);
791 VPHAL_RENDER_CHK_NULL(pRenderParams);
792 VPHAL_RENDER_CHK_NULL(pFast1toNState->pOsInterface);
793 VPHAL_RENDER_CHK_NULL(pFast1toNState->pRenderHal);
794 VPHAL_RENDER_CHK_NULL(pFast1toNState->pPerfData);
795
796 eStatus = MOS_STATUS_SUCCESS;
797 pOsInterface = pFast1toNState->pOsInterface;
798 pRenderHal = pFast1toNState->pRenderHal;
799 MOS_ZeroMemory(&RenderData, sizeof(RenderData));
800
801 // Reset reporting
802 pFast1toNState->Reporting.InitReportValue();
803
804 // Reset states before rendering
805 pOsInterface->pfnResetOsStates(pOsInterface);
806 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnReset(pRenderHal));
807 pOsInterface->pfnResetPerfBufferID(pOsInterface); // reset once per frame
808 for (index = 0; index < MAX_1TON_SUPPORT; index++)
809 {
810 pFast1toNState->pTarget[index] = nullptr;
811 pFast1toNState->Aligned16[index] = 0;
812 }
813
814 VPHAL_DBG_STATE_DUMPPER_SET_CURRENT_STAGE(VPHAL_DBG_STAGE_COMP);
815
816 // Configure cache settings for this render operation
817 pCacheSettings = &pRenderHal->L3CacheSettings;
818 MOS_ZeroMemory(pCacheSettings, sizeof(*pCacheSettings));
819 pCacheSettings->bOverride = true;
820 pCacheSettings->bL3CachingEnabled = pFast1toNState->SurfMemObjCtl.bL3CachingEnabled;
821
822 if (pFast1toNState->pPerfData->L3SQCReg1Override.bEnabled)
823 {
824 pCacheSettings->bSqcReg1Override = true;
825 pCacheSettings->dwSqcReg1 = pFast1toNState->pPerfData->L3SQCReg1Override.uiVal;
826 }
827
828 if (pFast1toNState->pPerfData->L3CntlReg2Override.bEnabled)
829 {
830 pCacheSettings->bCntlReg2Override = true;
831 pCacheSettings->dwCntlReg2 = pFast1toNState->pPerfData->L3CntlReg2Override.uiVal;
832 }
833
834 if (pFast1toNState->pPerfData->L3CntlReg3Override.bEnabled)
835 {
836 pCacheSettings->bCntlReg3Override = true;
837 pCacheSettings->dwCntlReg3 = pFast1toNState->pPerfData->L3CntlReg3Override.uiVal;
838 }
839
840 if (pFast1toNState->pPerfData->L3LRA1RegOverride.bEnabled)
841 {
842 pCacheSettings->bLra1RegOverride = true;
843 pCacheSettings->dwLra1Reg = pFast1toNState->pPerfData->L3LRA1RegOverride.uiVal;
844 }
845
846 // Setup Source/Target surface and get the Source width/height
847 pFast1toNState->pSource = pRenderParams->pSrc[0];
848 dwInputRegionWidth = pFast1toNState->pSource->rcSrc.right - pFast1toNState->pSource->rcSrc.left;
849 dwInputRegionHeight = pFast1toNState->pSource->rcSrc.bottom - pFast1toNState->pSource->rcSrc.top;
850 pFast1toNState->uDstCount = pRenderParams->uDstCount;
851 for (index = 0; index < pFast1toNState->uDstCount; index++)
852 {
853 pFast1toNState->pTarget[index] = pRenderParams->pTarget[index];
854 pFast1toNState->Aligned16[index] = pRenderParams->pTarget[index]->b16UsrPtr;
855 dwOutputRegionWidth = pFast1toNState->pTarget[index]->rcSrc.right - pFast1toNState->pTarget[index]->rcSrc.left;
856 dwOutputRegionHeight = pFast1toNState->pTarget[index]->rcSrc.bottom - pFast1toNState->pTarget[index]->rcSrc.top;
857 RenderData.ScalingStep_H[index] = (float)1.0 / (float)dwOutputRegionWidth;
858 RenderData.ScalingStep_V[index] = (float)1.0 / (float)dwOutputRegionHeight;
859 RenderData.ScalingRatio_H[index] = (float)dwOutputRegionWidth / (float)dwInputRegionWidth;
860 RenderData.ScalingRatio_V[index] = (float)dwOutputRegionHeight / (float)dwInputRegionHeight;
861 RenderData.pAVSParameters[index] = &pFast1toNState->AVSParameters[index];
862 RenderData.SamplerStateParams[index].Avs.pMhwSamplerAvsTableParam = &RenderData.mhwSamplerAvsTableParam[index];
863 }
864
865 // Ensure input can be read
866 pOsInterface->pfnSyncOnResource(
867 pOsInterface,
868 &pFast1toNState->pSource->OsResource,
869 pOsInterface->CurrentGpuContextOrdinal,
870 false);
871
872 // Ensure the output can be written
873 for (index = 0; index < pFast1toNState->uDstCount; index++)
874 {
875 pOsInterface->pfnSyncOnResource(
876 pOsInterface,
877 &pFast1toNState->pTarget[index]->OsResource,
878 pOsInterface->CurrentGpuContextOrdinal,
879 true);
880 }
881
882 // Setup copy kernel
883 VPHAL_RENDER_CHK_STATUS(pFast1toNState->pfnSetupKernel(
884 pFast1toNState,
885 &RenderData));
886
887 // Submit HW States and Commands
888 VPHAL_RENDER_CHK_STATUS(VpHal_Fast1toNSetupHwStates(
889 pFast1toNState,
890 &RenderData));
891
892 // Set perftag information
893 pOsInterface->pfnResetPerfBufferID(pOsInterface);
894 pOsInterface->pfnSetPerfTag(pOsInterface, RenderData.PerfTag);
895
896 VPHAL_RENDER_CHK_STATUS(VpHal_Fast1toNRenderMediaWalker(
897 pFast1toNState,
898 &RenderData,
899 &WalkerParams));
900
901 VPHAL_DBG_STATE_DUMPPER_DUMP_GSH(pRenderHal);
902 VPHAL_DBG_STATE_DUMPPER_DUMP_SSH(pRenderHal);
903
904 VPHAL_RENDER_CHK_STATUS(VpHal_RndrSubmitCommands(
905 pRenderHal,
906 nullptr,
907 pFast1toNState->bNullHwRenderfast1toN,
908 &WalkerParams,
909 nullptr,
910 &pFast1toNState->StatusTableUpdateParams,
911 kernelFast1toN,
912 0,
913 nullptr,
914 true));
915
916 finish:
917 MOS_ZeroMemory(pCacheSettings, sizeof(*pCacheSettings));
918 VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
919 return eStatus;
920 }
921
922 //!
923 //! \brief fast 1toN Destroy state
924 //! \details Function to destroy fast 1toN state
925 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
926 //! [in] Pointer to the fast 1toN State
927 //! \return MOS_STATUS
928 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
929 //!
VpHal_Fast1toNDestroy(PVPHAL_FAST1TON_STATE pFast1toNState)930 MOS_STATUS VpHal_Fast1toNDestroy(
931 PVPHAL_FAST1TON_STATE pFast1toNState)
932 {
933 MOS_STATUS eStatus;
934 uint32_t index;
935 eStatus = MOS_STATUS_SUCCESS;
936 VPHAL_RENDER_CHK_NULL(pFast1toNState);
937 for (index = 0; index < MAX_1TON_SUPPORT; index++)
938 {
939 VpHal_RenderDestroyAVSParams(&pFast1toNState->AVSParameters[index]);
940 }
941 MOS_UNUSED(pFast1toNState);
942
943 finish:
944 return eStatus;
945 }
946
947 //!
948 //! \brief fast 1toN kernel state Initializations
949 //! \details Kernel state Initializations for fast 1toN
950 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
951 //! [in] Pointer to the fast 1toN State
952 //! \param const VphalSettings* pSettings
953 //! [in] Pointer to VPHAL Setting
954 //! \param Kdll_State pKernelDllState
955 //! [in/out] Pointer to bitcopy kernel Dll state
956 //! \return MOS_STATUS
957 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
958 //!
VpHal_Fast1toNInitialize(PVPHAL_FAST1TON_STATE pFast1toNState,const VphalSettings * pSettings,Kdll_State * pKernelDllState)959 MOS_STATUS VpHal_Fast1toNInitialize(
960 PVPHAL_FAST1TON_STATE pFast1toNState,
961 const VphalSettings *pSettings,
962 Kdll_State *pKernelDllState)
963 {
964 MOS_NULL_RENDERING_FLAGS NullRenderingFlags;
965 uint32_t index;
966
967 VPHAL_RENDER_ASSERT(pFast1toNState);
968 VPHAL_RENDER_ASSERT(pFast1toNState->pOsInterface);
969
970 NullRenderingFlags =
971 pFast1toNState->pOsInterface->pfnGetNullHWRenderFlags(pFast1toNState->pOsInterface);
972 pFast1toNState->bNullHwRenderfast1toN =
973 NullRenderingFlags.VPLgca ||
974 NullRenderingFlags.VPGobal;
975
976 // Setup interface to KDLL
977 pFast1toNState->pKernelDllState = pKernelDllState;
978 for (index = 0; index < MAX_1TON_SUPPORT; index++)
979 {
980 VpHal_RenderInitAVSParams(&pFast1toNState->AVSParameters[index],
981 POLYPHASE_Y_COEFFICIENT_TABLE_SIZE_G9,
982 POLYPHASE_UV_COEFFICIENT_TABLE_SIZE_G9);
983 }
984
985 return MOS_STATUS_SUCCESS;
986 }
987
988 //!
989 //! \brief fast 1toN setup surface states
990 //! \details Setup surface states for fast 1toN
991 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
992 //! [in] Pointer to the fast 1toN State
993 //! \param PVPHAL_FAST1TON_RENDER_DATA pRenderData
994 //! [in] Pointer to fast 1toN render data
995 //! \return MOS_STATUS
996 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
997 //!
VpHal_Fast1toNSetupSurfaceStates(PVPHAL_FAST1TON_STATE pFast1toNState,PVPHAL_FAST1TON_RENDER_DATA pRenderData)998 MOS_STATUS VpHal_Fast1toNSetupSurfaceStates(
999 PVPHAL_FAST1TON_STATE pFast1toNState,
1000 PVPHAL_FAST1TON_RENDER_DATA pRenderData)
1001 {
1002 PRENDERHAL_INTERFACE pRenderHal;
1003 RENDERHAL_SURFACE_STATE_PARAMS SurfaceParams;
1004 MOS_STATUS eStatus;
1005 uint32_t index;
1006 uint32_t width = 0;
1007 MOS_FORMAT format = Format_NV12;
1008 int32_t iBTEntry;
1009 eStatus = MOS_STATUS_SUCCESS;
1010 pRenderHal = pFast1toNState->pRenderHal;
1011
1012 // Source surface
1013 MOS_ZeroMemory(&SurfaceParams, sizeof(SurfaceParams));
1014
1015 SurfaceParams.bAVS = true;
1016 SurfaceParams.Boundary = RENDERHAL_SS_BOUNDARY_SRCRECT;
1017 SurfaceParams.isOutput = false;
1018 SurfaceParams.MemObjCtl =
1019 pFast1toNState->SurfMemObjCtl.SourceSurfMemObjCtl;
1020 SurfaceParams.Type = RENDERHAL_SURFACE_TYPE_ADV_G9;
1021 SurfaceParams.bWidthInDword_Y = false;
1022 SurfaceParams.bWidthInDword_UV = false;
1023 SurfaceParams.bWidth16Align = false;
1024
1025 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetSurfaceForHwAccess(
1026 pRenderHal,
1027 pFast1toNState->pSource,
1028 &pFast1toNState->RenderHalSource,
1029 &SurfaceParams,
1030 pRenderData->iBindingTable,
1031 FAST1TON_SRC_INDEX,
1032 false));
1033
1034 // Target surface
1035 SurfaceParams.MemObjCtl =
1036 pFast1toNState->SurfMemObjCtl.TargetSurfMemObjCtl;
1037 SurfaceParams.Type = pRenderHal->SurfaceTypeDefault;
1038 SurfaceParams.isOutput = true;
1039 SurfaceParams.bAVS = false;
1040 SurfaceParams.Boundary = RENDERHAL_SS_BOUNDARY_DSTRECT;
1041
1042 for (index = 0; index < pFast1toNState->uDstCount; index++)
1043 {
1044 if (pFast1toNState->Aligned16[index])
1045 {
1046 // store and recalculate the target format and width
1047 format = pFast1toNState->pTarget[index]->Format;
1048 width = pFast1toNState->pTarget[index]->dwWidth;
1049 pFast1toNState->pTarget[index]->Format = Format_RAW;
1050 #if defined(LINUX) && !defined(WDDM_LINUX)
1051 pFast1toNState->pTarget[index]->dwWidth = (pFast1toNState->pTarget[index]->dwHeight * pFast1toNState->pTarget[index]->OsResource.iPitch) * 3/2;
1052 #endif
1053 pFast1toNState->pTarget[index]->dwWidth = MOS_ALIGN_CEIL(pFast1toNState->pTarget[index]->dwWidth, 128);
1054 iBTEntry = (index == 0)?FAST1TON_DST_INDEX0:((index == 1)?FAST1TON_DST_INDEX1:FAST1TON_DST_INDEX2);
1055
1056 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
1057 pRenderHal,
1058 pFast1toNState->pTarget[index],
1059 &pFast1toNState->RenderHalTarget[index],
1060 &SurfaceParams,
1061 pRenderData->iBindingTable,
1062 iBTEntry,
1063 true));
1064
1065 // restore the target format and width
1066 pFast1toNState->pTarget[index]->Format = format;
1067 pFast1toNState->pTarget[index]->dwWidth = width;
1068 }
1069 else
1070 {
1071 iBTEntry = (index == 0)?FAST1TON_DST_Y_INDEX0:((index == 1)?FAST1TON_DST_Y_INDEX1:FAST1TON_DST_Y_INDEX2);
1072 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetSurfaceForHwAccess(
1073 pRenderHal,
1074 pFast1toNState->pTarget[index],
1075 &pFast1toNState->RenderHalTarget[index],
1076 &SurfaceParams,
1077 pRenderData->iBindingTable,
1078 iBTEntry,
1079 true));
1080 }
1081 }
1082
1083 finish:
1084 VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
1085 return eStatus;
1086 }
1087
1088 //!
1089 //! \brief fast 1toN interface Initializations
1090 //! \details Interface Initializations for fast 1toN
1091 //! \param PVPHAL_FAST1TON_STATE pFast1toNState
1092 //! [in] Pointer to the fast 1toN State
1093 //! \param PRENDERHAL_INTERFACE pRenderHal
1094 //! [in/out] Pointer to RenderHal Interface Structure
1095 //! \return MOS_STATUS
1096 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
1097 //!
VpHal_Fast1toNInitInterface(PVPHAL_FAST1TON_STATE pFast1toNState,PRENDERHAL_INTERFACE pRenderHal)1098 MOS_STATUS VpHal_Fast1toNInitInterface(
1099 PVPHAL_FAST1TON_STATE pFast1toNState,
1100 PRENDERHAL_INTERFACE pRenderHal)
1101 {
1102 PMOS_INTERFACE pOsInterface;
1103 MOS_STATUS eStatus;
1104
1105 eStatus = MOS_STATUS_SUCCESS;
1106 pOsInterface = pRenderHal->pOsInterface;
1107
1108 // Connect renderer to other VPHAL components (HW/OS interfaces)
1109 pFast1toNState->pRenderHal = pRenderHal;
1110 pFast1toNState->pOsInterface = pOsInterface;
1111 pFast1toNState->pSkuTable = pRenderHal->pSkuTable;
1112
1113 // Setup functions
1114 pFast1toNState->pfnInitialize = VpHal_Fast1toNInitialize;
1115 pFast1toNState->pfnDestroy = VpHal_Fast1toNDestroy;
1116 pFast1toNState->pfnRender = VpHal_Fast1toNRender;
1117 pFast1toNState->pfnSetupSurfaceStates = VpHal_Fast1toNSetupSurfaceStates;
1118
1119 // States
1120 pFast1toNState->pKernelParamTable = (PRENDERHAL_KERNEL_PARAM)g_fast1toN_MW_KernelParam;
1121 pFast1toNState->bFtrMediaWalker =
1122 pFast1toNState->pRenderHal->pfnGetMediaWalkerStatus(pFast1toNState->pRenderHal) ? true : false;
1123
1124 pFast1toNState->pfnLoadStaticData = VpHal_Fast1toNLoadStaticData;
1125 pFast1toNState->pfnSetupKernel = VpHal_Fast1toNSetupKernel;
1126 pFast1toNState->pfnSetSamplerStates = VpHal_Fast1toNSetSamplerStates;
1127
1128 return eStatus;
1129 }
1130
1131 //!
1132 //! \brief check if intput/output is a fast 1toN case
1133 //! \param [in] pRenderParams
1134 //! Pointer to VPHAL render parameter
1135 //! \param [in] pSrcSurface
1136 //! Pointer to input surface.
1137 //! \return ture if this case match fas 1toN condition, otherwise return fasle.
1138 //!
VpHal_RndrIsFast1toNSupport(PVPHAL_FAST1TON_STATE pFast1toNState,PVPHAL_RENDER_PARAMS pRenderParams,PVPHAL_SURFACE pSrcSurface)1139 bool VpHal_RndrIsFast1toNSupport(
1140 PVPHAL_FAST1TON_STATE pFast1toNState,
1141 PVPHAL_RENDER_PARAMS pRenderParams,
1142 PVPHAL_SURFACE pSrcSurface)
1143 {
1144 bool ret = true;
1145 uint32_t uiIndex_out;
1146 float ScalingRatio_H;
1147 float ScalingRatio_V;
1148
1149 if (!GFX_IS_RENDERCORE(pFast1toNState->pRenderHal->Platform, IGFX_GEN9_CORE))
1150 {
1151 ret = false;
1152 goto finish;
1153 }
1154
1155 // fast 1toN only support scaling now.
1156 if (pRenderParams->pConstriction != nullptr ||
1157 pSrcSurface->pLumaKeyParams != nullptr ||
1158 pSrcSurface->pProcampParams != nullptr ||
1159 pSrcSurface->pIEFParams != nullptr ||
1160 pSrcSurface->bInterlacedScaling ||
1161 pSrcSurface->bFieldWeaving ||
1162 pSrcSurface->pDenoiseParams != nullptr ||
1163 pSrcSurface->pColorPipeParams != nullptr ||
1164 pSrcSurface->pDeinterlaceParams != nullptr ||
1165 !(pSrcSurface->pBlendingParams == nullptr ||
1166 (pSrcSurface->pBlendingParams != nullptr &&
1167 pSrcSurface->pBlendingParams->BlendType == BLEND_NONE)))
1168 {
1169 ret = false;
1170 goto finish;
1171 }
1172 if ((pSrcSurface->Format != Format_NV12) || (pRenderParams->uDstCount > MAX_1TON_SUPPORT)
1173 || (pRenderParams->uDstCount < 2))
1174 {
1175 ret = false;
1176 goto finish;
1177 }
1178 for (uiIndex_out = 0; uiIndex_out < pRenderParams->uDstCount; uiIndex_out++)
1179 {
1180 if (pRenderParams->pTarget[uiIndex_out]->Format != Format_NV12)
1181 {
1182 ret = false;
1183 goto finish;
1184 }
1185 // check scaling ratio
1186 ScalingRatio_H = (float)pRenderParams->pTarget[uiIndex_out]->dwHeight/(float)(pSrcSurface->rcSrc.bottom - pSrcSurface->rcSrc.top);
1187 ScalingRatio_V = (float)pRenderParams->pTarget[uiIndex_out]->dwWidth/(float)(pSrcSurface->rcSrc.right - pSrcSurface->rcSrc.left);
1188 if (ScalingRatio_H < 0.0625f || ScalingRatio_V < 0.0625f)
1189 {
1190 ret = false;
1191 goto finish;
1192 }
1193 }
1194
1195 finish:
1196 return ret;
1197 }
1198
1199
1200