1 /*
2 * Copyright (c) 2017-2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 /*
24 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
25 *
26 * Use of this source code is governed by a BSD-style license
27 * that can be found in the LICENSE file in the root of the source
28 * tree. An additional intellectual property rights grant can be found
29 * in the file PATENTS. All contributing project authors may
30 * be found in the AUTHORS file in the root of the source tree.
31 */
32
33 //!
34 //! \file codechal_vdenc_vp9_g11.cpp
35 //! \brief VP9 VDENC encoder for GEN11.
36 //!
37 #include "codechal_vdenc_vp9_g11.h"
38 #include "codechal_kernel_header_g11.h"
39 #include "codeckrnheader.h"
40 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
41 #include "igcodeckrn_g11.h"
42 #endif
43 #include "mhw_vdbox_hcp_g11_X.h"
44 #include "mhw_vdbox_vdenc_g11_X.h"
45 #include "mhw_vdbox_g11_X.h"
46 #include "mhw_vdbox_vdenc_hwcmd_g11_X.h"
47 #include "codechal_huc_cmd_initializer_g11.h"
48
49 const uint32_t CodechalVdencVp9StateG11::meCurbeInit[48] =
50 {
51 0x00000000, 0x00200010, 0x00003939, 0x77a43000, 0x00000000, 0x28300000, 0x00000000, 0x00000000,
52 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
53 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
54 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
55 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
56 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
57 };
58
UserFeatureKeyReport()59 MOS_STATUS CodechalVdencVp9StateG11::UserFeatureKeyReport()
60 {
61 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
62
63 CODECHAL_ENCODE_FUNCTION_ENTER;
64
65 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::UserFeatureKeyReport());
66
67 #if (_DEBUG || _RELEASE_INTERNAL)
68 CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
69 CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH, m_enableTileStitchByHW, m_osInterface->pOsContext);
70 CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
71 #endif
72
73 return eStatus;
74 }
75
CodechalVdencVp9StateG11(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)76 CodechalVdencVp9StateG11::CodechalVdencVp9StateG11(
77 CodechalHwInterface* hwInterface,
78 CodechalDebugInterface* debugInterface,
79 PCODECHAL_STANDARD_INFO standardInfo)
80 :CodechalVdencVp9State(hwInterface, debugInterface, standardInfo)
81 {
82 m_useCommonKernel = true;
83 m_isTilingSupported = true;
84
85 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
86 m_kernelBase = (uint8_t *)IGCODECKRN_G11;
87 #endif
88
89 // KUID for HME + DS + SW SCOREBOARD Kernel
90 m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
91
92 // We need the DYS kernel inside AllVP9Enc_CNLA0, for SHME we need kernels inside
93 // HME_DS_SCOREBOARD_KERNEL, so we need to allocate enough size in ISH for both.
94 pfnGetKernelHeaderAndSize = GetCommonKernelHeaderAndSizeG11;
95
96 uint8_t* binary = nullptr;
97 uint32_t combinedKernelSize = 0;
98 m_scalabilityState = nullptr;
99
100 m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_VP9_NUM_SYNC_TAGS;
101 m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_ENCODE_VP9_INIT_DSH_SIZE;
102
103 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
104 if (m_useCommonKernel)
105 {
106 m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
107 eStatus = CodecHalGetKernelBinaryAndSize(
108 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
109 (uint8_t*)IGCODECKRN_G11,
110 #else
111 nullptr,
112 #endif
113 m_kuidCommon,
114 &binary,
115 &combinedKernelSize);
116 CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
117
118 m_hwInterface->GetStateHeapSettings()->dwIshSize +=
119 MOS_ALIGN_CEIL(combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
120 }
121
122 // Initialize to 0
123 MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
124 MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
125 MOS_ZeroMemory(&m_hcpScalabilitySyncBuffer, sizeof(m_hcpScalabilitySyncBuffer));
126
127 for (auto i = 0; i < m_numUncompressedSurface; i++)
128 {
129 MOS_ZeroMemory(&m_tileRecordBuffer[i].sResource, sizeof(m_tileRecordBuffer[i].sResource));
130 }
131 for (auto i = 0; i < m_numUncompressedSurface; i++)
132 {
133 MOS_ZeroMemory(&m_tileStatsPakIntegrationBuffer[i].sResource, sizeof(m_tileStatsPakIntegrationBuffer[i].sResource));
134 }
135 MOS_ZeroMemory(&m_frameStatsPakIntegrationBuffer.sResource, sizeof(m_frameStatsPakIntegrationBuffer.sResource));
136 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
137 {
138 for (auto j = 0; j < m_brcMaxNumPasses; j++)
139 {
140 MOS_ZeroMemory(&m_hucPakIntDmemBuffer[i][j], sizeof(m_hucPakIntDmemBuffer[i][j]));
141 }
142 }
143 MOS_ZeroMemory(&m_hucPakIntDummyBuffer, sizeof(m_hucPakIntDummyBuffer));
144 MOS_ZeroMemory(&m_hucPakIntBrcDataBuffer, sizeof(m_hucPakIntBrcDataBuffer));
145 MOS_ZeroMemory(&m_resPipeStartSync, sizeof(m_resPipeStartSync));
146 MOS_ZeroMemory(&m_resDelayMinus, sizeof(m_resDelayMinus));
147 for (auto i = 0; i < m_maxNumPipes; i++)
148 {
149 MOS_ZeroMemory(&m_stitchWaitSemaphoreMem[i], sizeof(m_stitchWaitSemaphoreMem[i]));
150 }
151
152 for (auto i = 0; i < 3; i++)
153 {
154 MOS_ZeroMemory(&m_refPicList0[i], sizeof(m_refPicList0[i]));
155 }
156 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
157 m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
158 Mos_SetVirtualEngineSupported(m_osInterface, true);
159 }
160
GetSystemPipeNumberCommon()161 MOS_STATUS CodechalVdencVp9StateG11::GetSystemPipeNumberCommon()
162 {
163 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
164 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
165
166 CODECHAL_ENCODE_FUNCTION_ENTER;
167
168 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
169 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
170 statusKey = MOS_UserFeature_ReadValue_ID(
171 NULL,
172 __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
173 &userFeatureData,
174 m_osInterface->pOsContext);
175
176 //Disable scalability temporarily
177 bool disableScalability = true; // m_hwInterface->IsDisableScalability()
178 if (statusKey == MOS_STATUS_SUCCESS)
179 {
180 disableScalability = userFeatureData.i32Data ? true : false;
181 }
182
183 MEDIA_SYSTEM_INFO *gtSystemInfo = m_gtSystemInfo;
184
185 if (gtSystemInfo && disableScalability == false)
186 {
187 // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
188 m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
189 }
190 else
191 {
192 m_numVdbox = 1;
193 }
194
195 return eStatus;
196 }
197
ExecuteDysSliceLevel()198 MOS_STATUS CodechalVdencVp9StateG11::ExecuteDysSliceLevel()
199 {
200 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
201
202 CODECHAL_ENCODE_FUNCTION_ENTER;
203
204 CODECHAL_ENCODE_CHK_NULL_RETURN(m_nalUnitParams);
205
206 MOS_COMMAND_BUFFER cmdBuffer;
207 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
208
209 if (!m_singleTaskPhaseSupported)
210 {
211 PerfTagSetting perfTag;
212 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
213 }
214
215 MHW_BATCH_BUFFER secondLevelBatchBuffer;
216 MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
217 secondLevelBatchBuffer.dwOffset = 0;
218 secondLevelBatchBuffer.bSecondLevel = true;
219 if (!m_hucEnabled)
220 {
221 secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
222 }
223 else
224 {
225 secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
226 }
227 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
228 &cmdBuffer,
229 &secondLevelBatchBuffer));
230
231 // Setup Tile level PAK commands
232 CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams);
233
234 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9StateG11::SetTileData());
235 CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG11 *>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[0]));
236
237 //Disbale Frame Tracking Header for this submission as this is not the last submission
238 bool isFrameTrackingHeaderSet = cmdBuffer.Attributes.bEnableMediaFrameTracking;
239 cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
240
241 MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(MHW_BATCH_BUFFER));
242 secondLevelBatchBuffer.OsResource = m_resMbCodeSurface;
243 secondLevelBatchBuffer.dwOffset = 0;
244 secondLevelBatchBuffer.bSecondLevel = true;
245 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &secondLevelBatchBuffer));
246
247 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
248 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
249 // MFXPipeDone should not be set for tail insertion
250 vdPipelineFlushParams.Flags.bWaitDoneMFX =
251 (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
252 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
253 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
254 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
255
256 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
257
258 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
259 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
260 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
261
262 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
263
264 if (!m_scalableMode) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
265 {
266 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
267 }
268
269 if (m_currPass >= (m_numPasses - 1)) // Last pass and the one before last
270 {
271 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
272 }
273
274 std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
275 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
276 &cmdBuffer,
277 CODECHAL_NUM_MEDIA_STATES,
278 currPassName.data())));
279
280 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
281
282 if (m_waitForEnc &&
283 !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
284 {
285 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
286 syncParams.GpuContext = m_videoContext;
287 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
288
289 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
290 m_waitForEnc = false;
291 }
292
293 if (m_currPass >= (m_numPasses - 1)) // Last pass and the one before last
294 {
295 bool renderFlags;
296
297 renderFlags = m_videoContextUsesNullHw;
298
299 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
300 }
301
302 //Restore the frame tracking header for the further passes and submissions
303 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
304 cmdBuffer.Attributes.bEnableMediaFrameTracking = isFrameTrackingHeaderSet;
305 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
306
307 CODECHAL_DEBUG_TOOL(
308 if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
309 //CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
310 //m_debugInterface->DumpBuffer(
311 // (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
312 // CodechalDbgAttr::attrOutput,
313 // "SegMap_Out",
314 // CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
315 // 0,
316 // CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
317 } if (m_mmcState) {
318 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
319 });
320
321 return eStatus;
322 }
323
InitKernelStateMe()324 MOS_STATUS CodechalVdencVp9StateG11::InitKernelStateMe()
325 {
326 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
327
328 CODECHAL_ENCODE_FUNCTION_ENTER;
329
330 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
331 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderEngineInterface->GetHwCaps());
332
333 uint32_t combinedKernelSize = 0;
334 uint8_t *binary = nullptr;
335 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
336 m_kernelBase,
337 m_kuidCommon,
338 &binary,
339 &combinedKernelSize));
340
341 for (uint32_t krnStateIdx = 0; krnStateIdx < CodechalEncoderState::CODECHAL_ENCODE_ME_IDX_NUM; krnStateIdx++)
342 {
343 CODECHAL_KERNEL_HEADER currKrnHeader;
344 PMHW_KERNEL_STATE kernelStatePtr = &m_meKernelStates[krnStateIdx];
345 uint32_t kernelSize = combinedKernelSize;
346 // For dual pipe HME-P kernel state is loaded for both ids
347 // Non legacy streamin is a new hevc vp9 streamin kernel
348 EncOperation encOperation = (krnStateIdx > 0 && m_vdencEnabled) ? (m_useNonLegacyStreamin ? VDENC_STREAMIN_HEVC : VDENC_ME) : ENC_ME;
349 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
350 binary,
351 encOperation,
352 (encOperation == ENC_ME) ? krnStateIdx : 0,
353 &currKrnHeader,
354 &kernelSize));
355
356 kernelStatePtr->KernelParams.iBTCount = CODECHAL_ENCODE_ME_NUM_SURFACES_G11;
357 kernelStatePtr->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
358 kernelStatePtr->KernelParams.iCurbeLength = sizeof(MeCurbe);
359 kernelStatePtr->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
360 kernelStatePtr->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
361 kernelStatePtr->KernelParams.iIdCount = 1;
362
363 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
364 kernelStatePtr->KernelParams.pBinary = binary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
365 kernelStatePtr->KernelParams.iSize = kernelSize;
366 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
367 m_stateHeapInterface,
368 kernelStatePtr->KernelParams.iBTCount,
369 &kernelStatePtr->dwSshSize,
370 &kernelStatePtr->dwBindingTableSize));
371
372 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
373
374 if (m_noMeKernelForPFrame)
375 {
376 m_meKernelStates[1] = m_meKernelStates[0];
377 break;
378 }
379 }
380
381 // Until a better way can be found, maintain old binding table structures
382 MeKernelBindingTable *bindingTable = &m_meBindingTable;
383 bindingTable->dwMEMVDataSurface = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G11;
384 bindingTable->dw16xMEMVDataSurface = CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G11;
385 bindingTable->dw32xMEMVDataSurface = CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G11;
386 bindingTable->dwMEDist = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G11;
387 bindingTable->dwMEBRCDist = CODECHAL_ENCODE_ME_BRC_DISTORTION_G11;
388 bindingTable->dwMECurrForFwdRef = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G11;
389 bindingTable->dwMEFwdRefPicIdx[0] = CODECHAL_ENCODE_ME_FWD_REF_IDX0_G11;
390 bindingTable->dwMEFwdRefPicIdx[1] = CODECHAL_ENCODE_ME_FWD_REF_IDX1_G11;
391 bindingTable->dwMEFwdRefPicIdx[2] = CODECHAL_ENCODE_ME_FWD_REF_IDX2_G11;
392 bindingTable->dwMEFwdRefPicIdx[3] = CODECHAL_ENCODE_ME_FWD_REF_IDX3_G11;
393 bindingTable->dwMEFwdRefPicIdx[4] = CODECHAL_ENCODE_ME_FWD_REF_IDX4_G11;
394 bindingTable->dwMEFwdRefPicIdx[5] = CODECHAL_ENCODE_ME_FWD_REF_IDX5_G11;
395 bindingTable->dwMEFwdRefPicIdx[6] = CODECHAL_ENCODE_ME_FWD_REF_IDX6_G11;
396 bindingTable->dwMEFwdRefPicIdx[7] = CODECHAL_ENCODE_ME_FWD_REF_IDX7_G11;
397 bindingTable->dwMECurrForBwdRef = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G11;
398 bindingTable->dwMEBwdRefPicIdx[0] = CODECHAL_ENCODE_ME_BWD_REF_IDX0_G11;
399 bindingTable->dwMEBwdRefPicIdx[1] = CODECHAL_ENCODE_ME_BWD_REF_IDX1_G11;
400 bindingTable->dwVdencStreamInSurface = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G11;
401 bindingTable->dwVdencStreamInInputSurface = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G11;
402 #endif
403
404 return eStatus;
405 }
406
InitKernelStates()407 MOS_STATUS CodechalVdencVp9StateG11::InitKernelStates()
408 {
409 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
410
411 CODECHAL_ENCODE_FUNCTION_ENTER;
412
413 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
414 // DYS
415 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateDys());
416
417 // G11 VDEnc SHME (16x) and 4x/streamin
418 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
419 #endif
420
421 return eStatus;
422 }
423
GetMaxBtCount()424 uint32_t CodechalVdencVp9StateG11::GetMaxBtCount()
425 {
426 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
427
428 CODECHAL_ENCODE_FUNCTION_ENTER;
429 uint32_t maxBtCount = 0;
430
431 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
432 if (m_hmeSupported)
433 {
434 uint32_t scalingBtCount = 0;
435 uint32_t numKernelsToLoad = m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
436 uint16_t btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
437 for(uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
438 {
439 scalingBtCount += MOS_ALIGN_CEIL(
440 m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
441 btIdxAlignment);
442 }
443 uint32_t meBtCount = 0;
444 // 4xME + Streamin kernel btcount
445 meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_VDENC].KernelParams.iBTCount, btIdxAlignment);
446
447 //16xME streamin kernel count added to ME count and scaling kernel 16x added to scaling count
448 if (m_16xMeSupported)
449 {
450 meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_P].KernelParams.iBTCount, btIdxAlignment);
451 for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
452 {
453 scalingBtCount += MOS_ALIGN_CEIL(
454 m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
455 btIdxAlignment);
456 }
457 }
458 maxBtCount = scalingBtCount + meBtCount;
459 }
460 #endif
461
462 return maxBtCount;
463 }
464
465 // DYS kernel state init
InitKernelStateDys()466 MOS_STATUS CodechalVdencVp9StateG11::InitKernelStateDys()
467 {
468 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
469
470 CODECHAL_ENCODE_FUNCTION_ENTER;
471
472 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
473 uint32_t combinedKernelSize = 0;
474 uint8_t* binary = nullptr;
475 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
476 (uint8_t*)IGCODECKRN_G11,
477 m_kuidCommon,
478 &binary,
479 &combinedKernelSize));
480
481 uint32_t kernelSize = combinedKernelSize;
482 CODECHAL_KERNEL_HEADER currKrnHeader;
483 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
484 binary,
485 ENC_DYS,
486 0,
487 &currKrnHeader,
488 &kernelSize));
489
490 PMHW_KERNEL_STATE kernelState = &m_dysKernelState;
491 kernelState->KernelParams.iBTCount = MOS_ALIGN_CEIL(m_dysNumSurfaces, m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment());
492 kernelState->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
493 kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL(m_dysStaticDataSize, m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
494 kernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
495 kernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
496 kernelState->KernelParams.iIdCount = 1;
497 kernelState->KernelParams.iSamplerCount = 1;
498 kernelState->KernelParams.iSamplerLength = m_stateHeapInterface->pStateHeapInterface->GetSizeofSamplerStateAvs();
499
500 kernelState->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
501 kernelState->dwSamplerOffset = MOS_ALIGN_CEIL(kernelState->dwCurbeOffset + kernelState->KernelParams.iCurbeLength, MHW_SAMPLER_STATE_AVS_ALIGN_G9);
502 kernelState->KernelParams.pBinary =
503 binary +
504 (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
505 kernelState->KernelParams.iSize = kernelSize;
506 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
507 m_stateHeapInterface,
508 kernelState->KernelParams.iBTCount,
509 &kernelState->dwSshSize,
510 &kernelState->dwBindingTableSize));
511
512 m_dysDshSize = kernelState->dwSamplerOffset +
513 MOS_ALIGN_CEIL(kernelState->KernelParams.iSamplerLength * kernelState->KernelParams.iSamplerCount, MHW_SAMPLER_STATE_AVS_ALIGN);
514
515 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelState));
516 #endif
517
518 return eStatus;
519 }
520
SetupSegmentationStreamIn()521 MOS_STATUS CodechalVdencVp9StateG11::SetupSegmentationStreamIn()
522 {
523 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
524
525 CODECHAL_ENCODE_FUNCTION_ENTER;
526
527 if (!m_segmentMapProvided && !m_hmeEnabled) // If we're not going to use the streamin surface leave now
528 {
529 return eStatus;
530 }
531
532 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
533 MOS_LOCK_PARAMS lockFlagsWriteOnly;
534 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
535 lockFlagsWriteOnly.WriteOnly = 1;
536
537 MOS_LOCK_PARAMS lockFlagsReadOnly;
538 MOS_ZeroMemory(&lockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
539 lockFlagsReadOnly.ReadOnly = 1;
540
541 mhw_vdbox_vdenc_g11_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *
542 streamIn = (mhw_vdbox_vdenc_g11_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *)m_osInterface->pfnLockResource(
543 m_osInterface,
544 &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
545 &lockFlagsWriteOnly);
546 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
547
548 // align to cache line size is OK since streamin state is padded to cacheline size - HW uses cacheline size to read, not command size
549 uint32_t blockWidth = MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
550 uint32_t blockHeight = MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
551 uint32_t streamInSize = blockHeight * blockWidth * CODECHAL_CACHELINE_SIZE;
552 MOS_ZeroMemory(streamIn, streamInSize);
553
554 // If segment map isn't provided then we unlock surface and exit function here.
555 // Reason why check isn't done before function call is to take advantage of the fact that
556 // we need the surface locked here if seg map is provided and we want it 0'd either way.
557 // This saves us from doing 2 locks on this buffer per frame.
558 if (!m_segmentMapProvided)
559 {
560 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
561 m_osInterface,
562 &m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
563 return eStatus;
564 }
565
566 char *data = (char *)m_osInterface->pfnLockResource(
567 m_osInterface,
568 &m_mbSegmentMapSurface.OsResource,
569 &lockFlagsReadOnly);
570 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
571
572 // Rasterization is done within a tile and then for each tile within the frame in raster order.
573 uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
574 uint32_t numTileRows = (1 << m_vp9PicParams->log2_tile_rows);
575 uint32_t numTiles = numTileColumns * numTileRows;
576 uint32_t currTileStartX64Aligned = 0, dwCurrTileStartY64Aligned = 0; //Set tile Y coordinate 0
577 m_32BlocksRasterized = 0; //Count of rasterized blocks for this frame
578 uint32_t tileX = 0;
579 uint32_t tileY = 0;
580 for (uint32_t tileIdx = 0; tileIdx < numTiles; tileIdx++)
581 {
582 tileX = tileIdx % numTileColumns; //Current tile column position
583 tileY = tileIdx / numTileColumns; //Current tile row position
584
585 currTileStartX64Aligned = ((tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns) * CODEC_VP9_SUPER_BLOCK_WIDTH;
586 dwCurrTileStartY64Aligned = ((tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows) * CODEC_VP9_SUPER_BLOCK_HEIGHT;
587
588 uint32_t tileWidth64Aligned = (((tileX == (numTileColumns - 1)) ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) *
589 CODEC_VP9_SUPER_BLOCK_WIDTH) -
590 currTileStartX64Aligned;
591
592 uint32_t tileHeight64Aligned = (((tileY == (numTileRows - 1)) ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) *
593 CODEC_VP9_SUPER_BLOCK_HEIGHT) -
594 dwCurrTileStartY64Aligned;
595
596 // last tile col raw width and raw height not necessarily 64 aligned, use this length to duplicate values from segmap for empty padding blocks in last tiles.
597 uint32_t lastTileColWidth = (tileX == (numTileColumns - 1)) ? (m_frameWidth - currTileStartX64Aligned) : tileWidth64Aligned;
598 uint32_t lastTileRowHeight = (tileY == (numTileRows - 1)) ? (m_frameHeight - dwCurrTileStartY64Aligned) : tileHeight64Aligned;
599
600 uint32_t tileWidth = (tileX == (numTileColumns - 1)) ? lastTileColWidth : tileWidth64Aligned;
601 uint32_t tileHeight = (tileY == (numTileRows - 1)) ? lastTileRowHeight : tileHeight64Aligned;
602
603 // Recreate the mapbuffer and remap it if, for this frame, tile height and width have changed from previous tile
604 // which was processed from this frame or previous,
605 // or if map buffer is created for previous frame and tile map has changed from previous frame (numtilerows and cols)
606 if (!m_mapBuffer ||
607 tileHeight != m_segStreamInHeight ||
608 tileWidth != m_segStreamInWidth ||
609 numTileColumns != m_tileParams[tileIdx].NumOfTileColumnsInFrame ||
610 m_tileParams[tileIdx].NumOfTilesInFrame != numTiles)
611 {
612 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitZigZagToRasterLUTPerTile(tileHeight,
613 tileWidth,
614 dwCurrTileStartY64Aligned,
615 currTileStartX64Aligned));
616 }
617 m_tileParams[tileIdx].NumOfTileColumnsInFrame = numTileColumns;
618 m_tileParams[tileIdx].NumOfTilesInFrame = numTiles;
619 }
620
621
622 uint32_t dwPitch = m_mbSegmentMapSurface.dwPitch;
623 if (m_osInterface->pfnGetResType(&m_mbSegmentMapSurface.OsResource) == MOS_GFXRES_BUFFER)
624 {
625 //application can send 1D or 2D buffer, based on that change the pitch to correctly access the map buffer
626 //driver reads the seg ids from the buffer for each 16x16 block. Reads 4 values for each 32x32 block
627 dwPitch = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) / CODECHAL_MACROBLOCK_WIDTH;
628 }
629 // set seg ID's of streamin states
630 for (uint32_t i = 0 ; i < blockHeight * blockWidth ; ++i)
631 {
632 uint32_t addrOffset = CalculateBufferOffset(
633 m_mapBuffer[i],
634 m_frameWidth,
635 m_vp9PicParams->PicFlags.fields.seg_id_block_size,
636 dwPitch);
637 uint32_t segId = *(data + addrOffset);
638 streamIn[i].DW7.SegidEnable = 1;
639 streamIn[i].DW7.Segid32X32016X1603Vp9Only = segId | (segId << 4) | (segId << 8) | (segId << 12);
640
641 // TU functions copied from there.
642 streamIn[i].DW0.Maxtusize = 3;
643
644 streamIn[i].DW0.Maxcusize = 3;
645 // For InterFrames we change the CUsize to 32x32 if we have sub 32 blocks with different segids in superblock
646 if ((i % 4) == 3 && m_pictureCodingType == P_TYPE)
647 {
648 if (!(streamIn[i - 3].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only &&
649 streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only &&
650 streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only == streamIn[i].DW7.Segid32X32016X1603Vp9Only))
651 {
652 streamIn[i - 3].DW0.Maxcusize = streamIn[i - 2].DW0.Maxcusize = streamIn[i - 1].DW0.Maxcusize = streamIn[i].DW0.Maxcusize = 2;
653 }
654 }
655
656 streamIn[i].DW0.Numimepredictors = CODECHAL_VDENC_NUMIMEPREDICTORS;
657
658 switch (m_vp9SeqParams->TargetUsage)
659 {
660 case 1: // Quality mode
661 case 4: // Normal mode
662 streamIn[i].DW6.Nummergecandidatecu8X8 = 1;
663 streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
664 streamIn[i].DW6.Nummergecandidatecu32X32 = 3;
665 streamIn[i].DW6.Nummergecandidatecu64X64 = 4;
666 break;
667 case 7: // Speed mode
668 streamIn[i].DW0.Numimepredictors = 4;
669 streamIn[i].DW6.Nummergecandidatecu8X8 = 0;
670 streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
671 streamIn[i].DW6.Nummergecandidatecu32X32 = 2;
672 streamIn[i].DW6.Nummergecandidatecu64X64 = 2;
673 break;
674 default:
675 MHW_ASSERTMESSAGE("Invalid TU provided!");
676 return MOS_STATUS_INVALID_PARAMETER;
677 }
678 }
679
680 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
681 m_osInterface,
682 &m_mbSegmentMapSurface.OsResource));
683
684 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
685 m_osInterface,
686 &m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
687
688 return eStatus;
689 }
690
SetMeSurfaceParams(MeSurfaceParams * meSurfaceParams)691 MOS_STATUS CodechalVdencVp9StateG11::SetMeSurfaceParams(MeSurfaceParams *meSurfaceParams)
692 {
693 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
694
695 CODECHAL_ENCODE_FUNCTION_ENTER;
696
697 CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
698
699 meSurfaceParams->bMbaff = false;
700 meSurfaceParams->b4xMeDistortionBufferSupported = true;
701 meSurfaceParams->dwNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
702 meSurfaceParams->dwNumRefIdxL1ActiveMinus1 = 0;
703
704 MOS_ZeroMemory(&m_refPicList0, sizeof(m_refPicList0));
705
706 if (m_lastRefPic)
707 {
708 m_refPicList0[0].FrameIdx = m_vp9PicParams->RefFlags.fields.LastRefIdx;
709 m_refPicList0[0].PicFlags = PICTURE_FRAME;
710 }
711 if (m_goldenRefPic)
712 {
713 m_refPicList0[1].FrameIdx = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
714 m_refPicList0[1].PicFlags = PICTURE_FRAME;
715 }
716 if (m_altRefPic)
717 {
718 m_refPicList0[2].FrameIdx = m_vp9PicParams->RefFlags.fields.AltRefIdx;
719 m_refPicList0[2].PicFlags = PICTURE_FRAME;
720 }
721
722 meSurfaceParams->pL0RefFrameList = &(m_refPicList0[0]);
723 meSurfaceParams->ppRefList = &m_refList[0];
724 meSurfaceParams->pPicIdx = &m_picIdx[0];
725 meSurfaceParams->pCurrOriginalPic = &m_currOriginalPic;
726 meSurfaceParams->ps4xMeMvDataBuffer = &m_4xMeMvDataBuffer;
727 meSurfaceParams->ps16xMeMvDataBuffer = &m_16xMeMvDataBuffer;
728 meSurfaceParams->psMeDistortionBuffer = &m_4xMeDistortionBuffer;
729 meSurfaceParams->dwVerticalLineStride = m_verticalLineStride;
730 meSurfaceParams->dwVerticalLineStrideOffset = m_verticalLineStrideOffset;
731 meSurfaceParams->b32xMeEnabled = m_32xMeSupported;
732 meSurfaceParams->b16xMeEnabled = m_16xMeEnabled;
733 meSurfaceParams->pMeBindingTable = &m_meBindingTable;
734 meSurfaceParams->bVdencStreamInEnabled = true;
735 meSurfaceParams->psMeVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
736 meSurfaceParams->dwVDEncStreamInSurfaceSize = MOS_BYTES_TO_DWORDS((MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
737 (MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
738 CODECHAL_CACHELINE_SIZE);
739 return eStatus;
740 }
741
SetMeCurbeParams(MeCurbeParams * meParams)742 MOS_STATUS CodechalVdencVp9StateG11::SetMeCurbeParams(MeCurbeParams *meParams)
743 {
744 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
745
746 CODECHAL_ENCODE_FUNCTION_ENTER;
747
748 CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
749
750 meParams->b16xMeEnabled = m_16xMeEnabled;
751 meParams->b32xMeEnabled = m_32xMeSupported;
752 meParams->TargetUsage = TU_QUALITY;
753 meParams->MaxMvLen = m_hmeMaxMvLength;
754 meParams->CurrOriginalPic.FrameIdx = m_vp9PicParams->CurrOriginalPic.FrameIdx;
755 meParams->CurrOriginalPic.PicEntry = m_vp9PicParams->CurrOriginalPic.PicEntry;
756 meParams->CurrOriginalPic.PicFlags = m_vp9PicParams->CurrOriginalPic.PicFlags;
757 meParams->pic_init_qp_minus26 = m_vp9PicParams->LumaACQIndex - 26;
758 meParams->num_ref_idx_l0_active_minus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
759 meParams->num_ref_idx_l1_active_minus1 = 0;
760
761 return eStatus;
762 }
763
SendMeSurfaces(PMOS_COMMAND_BUFFER cmdBuffer,MeSurfaceParams * params)764 MOS_STATUS CodechalVdencVp9StateG11::SendMeSurfaces(
765 PMOS_COMMAND_BUFFER cmdBuffer,
766 MeSurfaceParams * params)
767 {
768 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
769
770 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
771 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
772 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
773 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pCurrOriginalPic);
774 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps4xMeMvDataBuffer);
775 CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeDistortionBuffer);
776
777 if (!params->bVdencStreamInEnabled)
778 {
779 CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeBrcDistortionBuffer);
780 }
781 else
782 {
783 CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeVdencStreamInBuffer);
784 }
785
786 CODECHAL_MEDIA_STATE_TYPE encMediaStateType = (params->b32xMeInUse) ? CODECHAL_MEDIA_STATE_32X_ME : params->b16xMeInUse ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
787
788 if (params->bVdencStreamInEnabled && encMediaStateType == CODECHAL_MEDIA_STATE_4X_ME)
789 {
790 encMediaStateType = CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
791 }
792
793 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pMeBindingTable);
794 MeKernelBindingTable *meBindingTable = params->pMeBindingTable;
795
796 bool isFieldPicture = CodecHal_PictureIsField(*(params->pCurrOriginalPic)) ? 1 : 0;
797 bool isBottomField = CodecHal_PictureIsBottomField(*(params->pCurrOriginalPic)) ? 1 : 0;
798 uint8_t currVDirection = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME : ((isBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
799
800 PMOS_SURFACE currScaledSurface = nullptr, meMvDataBuffer = nullptr;
801 uint32_t meMvBottomFieldOffset = 0, currScaledBottomFieldOffset = 0;
802 if (params->b32xMeInUse)
803 {
804 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps32xMeMvDataBuffer);
805 currScaledSurface = m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
806 meMvDataBuffer = params->ps32xMeMvDataBuffer;
807 meMvBottomFieldOffset = params->dw32xMeMvBottomFieldOffset;
808 currScaledBottomFieldOffset = params->dw32xScaledBottomFieldOffset;
809 }
810 else if (params->b16xMeInUse)
811 {
812 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps16xMeMvDataBuffer);
813 currScaledSurface = m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
814 meMvDataBuffer = params->ps16xMeMvDataBuffer;
815 meMvBottomFieldOffset = params->dw16xMeMvBottomFieldOffset;
816 currScaledBottomFieldOffset = params->dw16xScaledBottomFieldOffset;
817 }
818 else
819 {
820 currScaledSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
821 meMvDataBuffer = params->ps4xMeMvDataBuffer;
822 meMvBottomFieldOffset = params->dw4xMeMvBottomFieldOffset;
823 currScaledBottomFieldOffset = params->dw4xScaledBottomFieldOffset;
824 }
825
826 // Reference height and width information should be taken from the current scaled surface rather
827 // than from the reference scaled surface in the case of PAFF.
828
829
830 uint32_t width = MOS_ALIGN_CEIL(params->dwDownscaledWidthInMb * 32, 64);
831 uint32_t height = params->dwDownscaledHeightInMb * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER;
832
833 // Force the values
834 meMvDataBuffer->dwWidth = width;
835 meMvDataBuffer->dwHeight = height;
836 meMvDataBuffer->dwPitch = width;
837
838 CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
839 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
840 surfaceParams.bIs2DSurface = true;
841 surfaceParams.bMediaBlockRW = true;
842 surfaceParams.psSurface = meMvDataBuffer;
843 surfaceParams.dwOffset = meMvBottomFieldOffset;
844 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
845 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEMVDataSurface;
846 surfaceParams.bIsWritable = true;
847 surfaceParams.bRenderTarget = true;
848 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
849 m_hwInterface,
850 cmdBuffer,
851 &surfaceParams,
852 params->pKernelState));
853
854 if (params->b16xMeInUse && params->b32xMeEnabled)
855 {
856 // Pass 32x MV to 16x ME operation
857 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
858 surfaceParams.bIs2DSurface = true;
859 surfaceParams.bMediaBlockRW = true;
860 surfaceParams.psSurface = params->ps32xMeMvDataBuffer;
861 surfaceParams.dwOffset =
862 isBottomField ? params->dw32xMeMvBottomFieldOffset : 0;
863 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
864 surfaceParams.dwBindingTableOffset = meBindingTable->dw32xMEMVDataSurface;
865 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
866 m_hwInterface,
867 cmdBuffer,
868 &surfaceParams,
869 params->pKernelState));
870 }
871 else if (!params->b32xMeInUse && params->b16xMeEnabled)
872 {
873 // Pass 16x MV to 4x ME operation
874 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
875 surfaceParams.bIs2DSurface = true;
876 surfaceParams.bMediaBlockRW = true;
877 surfaceParams.psSurface = params->ps16xMeMvDataBuffer;
878 surfaceParams.dwOffset =
879 isBottomField ? params->dw16xMeMvBottomFieldOffset : 0;
880 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
881 surfaceParams.dwBindingTableOffset = meBindingTable->dw16xMEMVDataSurface;
882 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
883 m_hwInterface,
884 cmdBuffer,
885 &surfaceParams,
886 params->pKernelState));
887 }
888
889 // Insert Distortion buffers only for 4xMe case
890 if (!params->b32xMeInUse && !params->b16xMeInUse)
891 {
892 if (!params->bVdencStreamInEnabled)
893 {
894 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
895 surfaceParams.bIs2DSurface = true;
896 surfaceParams.bMediaBlockRW = true;
897 surfaceParams.psSurface = params->psMeBrcDistortionBuffer;
898 surfaceParams.dwOffset = params->dwMeBrcDistortionBottomFieldOffset;
899 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBRCDist;
900 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
901 surfaceParams.bIsWritable = true;
902 surfaceParams.bRenderTarget = true;
903 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
904 m_hwInterface,
905 cmdBuffer,
906 &surfaceParams,
907 params->pKernelState));
908 }
909
910 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
911 surfaceParams.bIs2DSurface = true;
912 surfaceParams.bMediaBlockRW = true;
913 surfaceParams.psSurface = params->psMeDistortionBuffer;
914 surfaceParams.dwOffset = params->dwMeDistortionBottomFieldOffset;
915 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEDist;
916 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
917 surfaceParams.bIsWritable = true;
918 surfaceParams.bRenderTarget = true;
919 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
920 m_hwInterface,
921 cmdBuffer,
922 &surfaceParams,
923 params->pKernelState));
924 }
925
926 // Setup references 1...n
927 // LIST 0 references (not optional)
928 CODEC_PICTURE refPic;
929 bool isRefFieldPicture = false, isRefBottomField = false;
930 uint8_t refPicIdx = 0;
931 if (params->pL0RefFrameList)
932 {
933 for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL0ActiveMinus1; refIdx++)
934 {
935 refPic = params->pL0RefFrameList[refIdx];
936
937 if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
938 {
939 if (refIdx == 0)
940 {
941 // Current Picture Y - VME
942 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
943 surfaceParams.bUseAdvState = true;
944 surfaceParams.psSurface = currScaledSurface;
945 surfaceParams.dwOffset = isBottomField ? currScaledBottomFieldOffset : 0;
946 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
947 surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForFwdRef;
948 surfaceParams.ucVDirection = currVDirection;
949 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
950 m_hwInterface,
951 cmdBuffer,
952 &surfaceParams,
953 params->pKernelState));
954 }
955
956 isRefFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
957 isRefBottomField = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
958 refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
959 uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
960 uint32_t refScaledBottomFieldOffset = 0;
961 MOS_SURFACE *refScaledSurface;
962 if (params->b32xMeInUse)
963 {
964 refScaledSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
965 }
966 else if (params->b16xMeInUse)
967 {
968 refScaledSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
969 }
970 else
971 {
972 refScaledSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
973 }
974 refScaledBottomFieldOffset = isRefBottomField ? currScaledBottomFieldOffset : 0;
975
976
977 // L0 Reference Picture Y - VME
978 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
979 surfaceParams.bUseAdvState = true;
980 surfaceParams.psSurface = refScaledSurface;
981 surfaceParams.dwOffset = isRefBottomField ? refScaledBottomFieldOffset : 0;
982 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
983 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEFwdRefPicIdx[refIdx];
984 surfaceParams.ucVDirection = !isFieldPicture ? CODECHAL_VDIRECTION_FRAME : ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
985 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
986 m_hwInterface,
987 cmdBuffer,
988 &surfaceParams,
989 params->pKernelState));
990 }
991 }
992 }
993 else
994 {
995 return MOS_STATUS_NULL_POINTER;
996 }
997
998 // Setup references 1...n
999 // LIST 1 references (optional)
1000 if (params->pL1RefFrameList)
1001 {
1002 for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL1ActiveMinus1; refIdx++)
1003 {
1004 refPic = params->pL1RefFrameList[refIdx];
1005
1006 if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
1007 {
1008 if (refIdx == 0)
1009 {
1010 // Current Picture Y - VME
1011 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1012 surfaceParams.bUseAdvState = true;
1013 surfaceParams.psSurface = currScaledSurface;
1014 surfaceParams.dwOffset = isBottomField ? currScaledBottomFieldOffset : 0;
1015 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1016 surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForBwdRef;
1017 surfaceParams.ucVDirection = currVDirection;
1018 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1019 m_hwInterface,
1020 cmdBuffer,
1021 &surfaceParams,
1022 params->pKernelState));
1023 }
1024
1025 isRefFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
1026 isRefBottomField = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
1027 refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
1028 uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
1029 uint32_t refScaledBottomFieldOffset = 0;
1030 MOS_SURFACE *refScaledSurface;
1031 if (params->b32xMeInUse)
1032 {
1033 refScaledSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
1034 }
1035 else if (params->b16xMeInUse)
1036 {
1037 refScaledSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
1038 }
1039 else
1040 {
1041 refScaledSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
1042 }
1043 refScaledBottomFieldOffset = isRefBottomField ? currScaledBottomFieldOffset : 0;
1044
1045
1046 // L1 Reference Picture Y - VME
1047 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1048 surfaceParams.bUseAdvState = true;
1049 surfaceParams.psSurface = refScaledSurface;
1050 surfaceParams.dwOffset = isRefBottomField ? refScaledBottomFieldOffset : 0;
1051 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1052 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBwdRefPicIdx[refIdx];
1053 surfaceParams.ucVDirection = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME : ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1054 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1055 m_hwInterface,
1056 cmdBuffer,
1057 &surfaceParams,
1058 params->pKernelState));
1059 }
1060 }
1061 }
1062 if (encMediaStateType == CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN)
1063 {
1064 // Output buffer
1065 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1066 surfaceParams.dwSize = params->dwVDEncStreamInSurfaceSize;
1067 surfaceParams.bIs2DSurface = false;
1068 surfaceParams.presBuffer = params->psMeVdencStreamInBuffer;
1069 surfaceParams.dwBindingTableOffset = meBindingTable->dwVdencStreamInSurface;
1070 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1071 surfaceParams.bIsWritable = true;
1072 surfaceParams.bRenderTarget = true;
1073 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1074 m_hwInterface,
1075 cmdBuffer,
1076 &surfaceParams,
1077 params->pKernelState));
1078
1079 // Input buffer (for AVC case we only read the surface and update data)
1080 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1081 surfaceParams.dwSize = params->dwVDEncStreamInSurfaceSize;
1082 surfaceParams.bIs2DSurface = false;
1083 surfaceParams.presBuffer = params->psMeVdencStreamInBuffer;
1084 surfaceParams.dwBindingTableOffset = meBindingTable->dwVdencStreamInInputSurface;
1085 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1086 surfaceParams.bIsWritable = true;
1087 surfaceParams.bRenderTarget = true;
1088 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1089 m_hwInterface,
1090 cmdBuffer,
1091 &surfaceParams,
1092 params->pKernelState));
1093 }
1094
1095 return eStatus;
1096 }
1097
1098
1099 //------------------------------------------------------------------------------
1100 //| Purpose: Setup curbe for common ME kernels
1101 //| Return: N/A
1102 //------------------------------------------------------------------------------
SetCurbeMe(MeCurbeParams * params)1103 MOS_STATUS CodechalVdencVp9StateG11::SetCurbeMe(
1104 MeCurbeParams *params)
1105 {
1106 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1107
1108 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1109 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
1110
1111 CODECHAL_ENCODE_ASSERT(params->TargetUsage <= NUM_TARGET_USAGE_MODES);
1112
1113 uint32_t scaleFactor = 0;
1114 bool useMvFromPrevStep = false, writeDistortions = false;
1115 uint8_t mvShiftFactor = 0, prevMvReadPosFactor = 0;
1116 switch (params->hmeLvl)
1117 {
1118 case HME_LEVEL_32x:
1119 useMvFromPrevStep = CODECHAL_ENCODE_HME_FIRST_STEP_G11;
1120 writeDistortions = false;
1121 scaleFactor = SCALE_FACTOR_32x;
1122 mvShiftFactor = CODECHAL_ENCODE_MV_SHIFT_FACTOR_32x_G11;
1123 break;
1124 case HME_LEVEL_16x:
1125 useMvFromPrevStep = (params->b32xMeEnabled) ? CODECHAL_ENCODE_HME_FOLLOWING_STEP_G11 : CODECHAL_ENCODE_HME_FIRST_STEP_G11;
1126 writeDistortions = false;
1127 scaleFactor = SCALE_FACTOR_16x;
1128 mvShiftFactor = CODECHAL_ENCODE_MV_SHIFT_FACTOR_16x_G11;
1129 prevMvReadPosFactor = CODECHAL_ENCODE_PREV_MV_READ_POSITION_16x_G11;
1130 break;
1131 case HME_LEVEL_4x:
1132 useMvFromPrevStep = (params->b16xMeEnabled) ? CODECHAL_ENCODE_HME_FOLLOWING_STEP_G11 : CODECHAL_ENCODE_HME_FIRST_STEP_G11;
1133 writeDistortions = true;
1134 scaleFactor = SCALE_FACTOR_4x;
1135 mvShiftFactor = CODECHAL_ENCODE_MV_SHIFT_FACTOR_4x_G11;
1136 prevMvReadPosFactor = CODECHAL_ENCODE_PREV_MV_READ_POSITION_4x_G11;
1137 break;
1138 default:
1139 return MOS_STATUS_INVALID_PARAMETER;
1140 }
1141
1142 MeCurbe cmd;
1143 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1144 &cmd,
1145 sizeof(MeCurbe),
1146 meCurbeInit,
1147 sizeof(MeCurbe)));
1148
1149 cmd.DW3.SubPelMode = 3;
1150 if (m_fieldScalingOutputInterleaved)
1151 {
1152 cmd.DW3.SrcAccess =
1153 cmd.DW3.RefAccess = CodecHal_PictureIsField(params->CurrOriginalPic) ? 1 : 0;
1154 cmd.DW7.SrcFieldPolarity = CodecHal_PictureIsBottomField(params->CurrOriginalPic) ? 1 : 0;
1155 }
1156
1157 bool framePicture = CodecHal_PictureIsFrame(params->CurrOriginalPic);
1158 char qpPrimeY = (params->pic_init_qp_minus26 + 26) + params->slice_qp_delta;
1159
1160 cmd.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
1161 cmd.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
1162 cmd.DW5.QpPrimeY = qpPrimeY;
1163 cmd.DW6.WriteDistortions = writeDistortions;
1164 cmd.DW6.UseMvFromPrevStep = useMvFromPrevStep;
1165
1166 cmd.DW6.SuperCombineDist = m_superCombineDistGeneric[params->TargetUsage];
1167 cmd.DW6.MaxVmvR = (framePicture) ? params->MaxMvLen * 4 : (params->MaxMvLen >> 1) * 4;
1168
1169 if (m_pictureCodingType == B_TYPE)
1170 {
1171 // This field is irrelevant since we are not using the bi-direct search.
1172 // set it to 32 to match
1173 cmd.DW1.BiWeight = 32;
1174 cmd.DW13.NumRefIdxL1MinusOne = params->num_ref_idx_l1_active_minus1;
1175 }
1176
1177 if (m_pictureCodingType == P_TYPE ||
1178 m_pictureCodingType == B_TYPE)
1179 {
1180 if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
1181 {
1182 cmd.DW30.ActualMBHeight = m_frameHeight;
1183 cmd.DW30.ActualMBWidth = m_frameWidth;
1184 }
1185 else if (m_vdencEnabled && m_16xMeSupported)
1186 {
1187 cmd.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
1188 cmd.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
1189 }
1190 cmd.DW13.NumRefIdxL0MinusOne =
1191 params->num_ref_idx_l0_active_minus1;
1192 }
1193
1194 cmd.DW13.RefStreaminCost = 5;
1195 // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
1196 cmd.DW13.ROIEnable = 0;
1197
1198 if (!framePicture)
1199 {
1200 if (m_pictureCodingType != I_TYPE)
1201 {
1202 cmd.DW14.List0RefID0FieldParity = params->List0RefID0FieldParity;
1203 cmd.DW14.List0RefID1FieldParity = params->List0RefID1FieldParity;
1204 cmd.DW14.List0RefID2FieldParity = params->List0RefID2FieldParity;
1205 cmd.DW14.List0RefID3FieldParity = params->List0RefID3FieldParity;
1206 cmd.DW14.List0RefID4FieldParity = params->List0RefID4FieldParity;
1207 cmd.DW14.List0RefID5FieldParity = params->List0RefID5FieldParity;
1208 cmd.DW14.List0RefID6FieldParity = params->List0RefID6FieldParity;
1209 cmd.DW14.List0RefID7FieldParity = params->List0RefID7FieldParity;
1210 }
1211 if (m_pictureCodingType == B_TYPE)
1212 {
1213 cmd.DW14.List1RefID0FieldParity = params->List1RefID0FieldParity;
1214 cmd.DW14.List1RefID1FieldParity = params->List1RefID1FieldParity;
1215 }
1216 }
1217
1218 cmd.DW15.MvShiftFactor = mvShiftFactor;
1219 cmd.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
1220
1221 // r3 & r4
1222 uint8_t targetUsage = params->TargetUsage;
1223 uint8_t meMethod = 0;
1224 if (m_pictureCodingType == B_TYPE)
1225 {
1226 meMethod = params->pBMEMethodTable ? // use the ME table dependent on codec standard
1227 params->pBMEMethodTable[targetUsage]
1228 : m_bMeMethodGeneric[targetUsage];
1229 }
1230 else
1231 {
1232 meMethod = params->pMEMethodTable ? // use the ME table dependent on codec standard
1233 params->pMEMethodTable[targetUsage]
1234 : m_meMethodGeneric[targetUsage];
1235 }
1236
1237 uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0;
1238 eStatus = MOS_SecureMemcpy(&(cmd.SPDelta), 14 * sizeof(uint32_t), m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t));
1239 if (eStatus != MOS_STATUS_SUCCESS)
1240 {
1241 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
1242 return eStatus;
1243 }
1244
1245 // Non legacy stream in is for hevc vp9 streamin kernel
1246 if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
1247 {
1248 //StreamIn CURBE
1249 cmd.DW6.LCUSize = 1; //Only LCU64 supported by the VDEnc HW
1250 cmd.DW6.InputStreamInSurfaceEnable = params->segmapProvided;
1251 cmd.DW31.MaxCuSize = 3;
1252 cmd.DW31.MaxTuSize = 3;
1253 switch (params->TargetUsage)
1254 {
1255 case 1:
1256 case 4:
1257 cmd.DW36.NumMergeCandidateCu64x64 = 4;
1258 cmd.DW36.NumMergeCandidateCu32x32 = 3;
1259 cmd.DW36.NumMergeCandidateCu16x16 = 2;
1260 cmd.DW36.NumMergeCandidateCu8x8 = 1;
1261 cmd.DW31.NumImePredictors = 8;
1262 break;
1263 case 7:
1264 cmd.DW36.NumMergeCandidateCu64x64 = 2;
1265 cmd.DW36.NumMergeCandidateCu32x32 = 2;
1266 cmd.DW36.NumMergeCandidateCu16x16 = 2;
1267 cmd.DW36.NumMergeCandidateCu8x8 = 0;
1268 cmd.DW31.NumImePredictors = 4;
1269 break;
1270 }
1271 }
1272
1273 // r5
1274 cmd.DW40._4xMeMvOutputDataSurfIndex = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G11;
1275 cmd.DW41._16xOr32xMeMvInputDataSurfIndex = (params->hmeLvl == HME_LEVEL_32x) ? CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G11 : CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G11;
1276 cmd.DW42._4xMeOutputDistSurfIndex = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G11;
1277 cmd.DW43._4xMeOutputBrcDistSurfIndex = CODECHAL_ENCODE_ME_BRC_DISTORTION_G11;
1278 cmd.DW44.VMEFwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G11;
1279 cmd.DW45.VMEBwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G11;
1280 cmd.DW46.VDEncStreamInOutputSurfIndex = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G11;
1281 cmd.DW47.VDEncStreamInInputSurfIndex = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G11;
1282
1283 CODECHAL_ENCODE_CHK_STATUS_RETURN(params->pKernelState->m_dshRegion.AddData(
1284 &cmd,
1285 params->pKernelState->dwCurbeOffset,
1286 sizeof(cmd)));
1287
1288 return eStatus;
1289 }
1290
ExecuteMeKernel(MeCurbeParams * meParams,MeSurfaceParams * meSurfaceParams,HmeLevel hmeLevel)1291 MOS_STATUS CodechalVdencVp9StateG11::ExecuteMeKernel(
1292 MeCurbeParams * meParams,
1293 MeSurfaceParams *meSurfaceParams,
1294 HmeLevel hmeLevel)
1295 {
1296 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1297
1298 CODECHAL_ENCODE_FUNCTION_ENTER;
1299
1300 CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
1301 CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
1302
1303 PerfTagSetting perfTag;
1304 perfTag.Value = 0;
1305 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1306 perfTag.CallType = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL : CODECHAL_ENCODE_PERFTAG_CALL_ME_KERNEL;
1307 perfTag.PictureCodingType = m_pictureCodingType;
1308 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1309 // Each ME kernel buffer counts as a separate perf task
1310 m_osInterface->pfnResetPerfBufferID(m_osInterface);
1311
1312 CODECHAL_MEDIA_STATE_TYPE encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME : (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
1313
1314 bool vdencMeInUse = false;
1315 if (m_vdencEnabled && (encFunctionType == CODECHAL_MEDIA_STATE_4X_ME))
1316 {
1317 vdencMeInUse = true;
1318 // Non legacy stream in is for hevc vp9 streamin kernel
1319 encFunctionType = m_useNonLegacyStreamin ? CODECHAL_MEDIA_STATE_4X_ME : CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
1320 }
1321
1322 uint32_t krnStateIdx = vdencMeInUse ? CODECHAL_ENCODE_ME_IDX_VDENC : ((m_pictureCodingType == P_TYPE) ? CODECHAL_ENCODE_ME_IDX_P : CODECHAL_ENCODE_ME_IDX_B);
1323
1324 PMHW_KERNEL_STATE kernelState = &m_meKernelStates[krnStateIdx];
1325
1326 // If Single Task Phase is not enabled, use BT count for the kernel state.
1327 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
1328 {
1329 uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
1330 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
1331 m_stateHeapInterface,
1332 maxBtCount));
1333 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
1334 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
1335 }
1336
1337 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
1338 m_stateHeapInterface,
1339 kernelState,
1340 false,
1341 0,
1342 false,
1343 m_storeData));
1344 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
1345 MOS_ZeroMemory(&idParams, sizeof(idParams));
1346 idParams.pKernelState = kernelState;
1347 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
1348 m_stateHeapInterface,
1349 1,
1350 &idParams));
1351
1352 // Setup Additional MeParams (Most of them set up in codec specific function, so don't zero out here)
1353 meParams->hmeLvl = hmeLevel;
1354 meParams->pKernelState = kernelState;
1355
1356 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMe(meParams));
1357
1358 CODECHAL_DEBUG_TOOL(
1359 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1360 encFunctionType,
1361 MHW_DSH_TYPE,
1362 kernelState));
1363 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1364 encFunctionType,
1365 kernelState));
1366 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1367 encFunctionType,
1368 MHW_ISH_TYPE,
1369 kernelState));)
1370 MOS_COMMAND_BUFFER cmdBuffer;
1371 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
1372 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1373 {
1374 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
1375 }
1376 SendKernelCmdsParams sendKernelCmdsParams;
1377 sendKernelCmdsParams = SendKernelCmdsParams();
1378 sendKernelCmdsParams.EncFunctionType = encFunctionType;
1379 sendKernelCmdsParams.pKernelState = kernelState;
1380
1381 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
1382
1383 // Add binding table
1384 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
1385 m_stateHeapInterface,
1386 kernelState));
1387
1388 // Setup Additional ME surface params (Most of them set up in codec specific function, so don't zero out here)
1389 meSurfaceParams->dwDownscaledWidthInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledWidthInMb32x : (hmeLevel == HME_LEVEL_16x) ? m_downscaledWidthInMb16x : m_downscaledWidthInMb4x;
1390 meSurfaceParams->dwDownscaledHeightInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledFrameFieldHeightInMb32x : (hmeLevel == HME_LEVEL_16x) ? m_downscaledFrameFieldHeightInMb16x : m_downscaledFrameFieldHeightInMb4x;
1391 meSurfaceParams->b32xMeInUse = (hmeLevel == HME_LEVEL_32x) ? true : false;
1392 meSurfaceParams->b16xMeInUse = (hmeLevel == HME_LEVEL_16x) ? true : false;
1393 meSurfaceParams->pKernelState = kernelState;
1394
1395 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(&cmdBuffer, meSurfaceParams));
1396
1397 // Dump SSH for ME kernel
1398 CODECHAL_DEBUG_TOOL(
1399 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1400 encFunctionType,
1401 MHW_SSH_TYPE,
1402 kernelState)));
1403
1404 /* zero out the mv data memory and me distortion buffer for the driver ULT
1405 kernel only writes out this data used for current frame, in some cases the the data used for
1406 previous frames would be left in the buffer (for example, the L1 mv for B frame would still show
1407 in the P frame mv data buffer */
1408
1409 // Zeroing out the buffers has perf impact, so zero it out only when dumps are actually enabled
1410 CODECHAL_DEBUG_TOOL(
1411 CODECHAL_ENCODE_CHK_NULL_RETURN(m_debugInterface);
1412 uint8_t *data = NULL;
1413 uint32_t size = 0;
1414 bool driverMeDumpEnabled = m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrOutput, encFunctionType);
1415
1416 if (driverMeDumpEnabled) {
1417 MOS_LOCK_PARAMS lockFlags;
1418 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1419 lockFlags.WriteOnly = 1;
1420
1421 switch (hmeLevel)
1422 {
1423 case HME_LEVEL_32x:
1424 data = (uint8_t *)m_osInterface->pfnLockResource(
1425 m_osInterface,
1426 &meSurfaceParams->ps32xMeMvDataBuffer->OsResource,
1427 &lockFlags);
1428 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1429 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) *
1430 (m_downscaledHeightInMb32x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1431 MOS_ZeroMemory(data, size);
1432 m_osInterface->pfnUnlockResource(
1433 m_osInterface,
1434 &meSurfaceParams->ps32xMeMvDataBuffer->OsResource);
1435 break;
1436 case HME_LEVEL_16x:
1437 data = (uint8_t *)m_osInterface->pfnLockResource(
1438 m_osInterface,
1439 &meSurfaceParams->ps16xMeMvDataBuffer->OsResource,
1440 &lockFlags);
1441 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1442 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) *
1443 (m_downscaledHeightInMb16x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1444 MOS_ZeroMemory(data, size);
1445 m_osInterface->pfnUnlockResource(
1446 m_osInterface,
1447 &meSurfaceParams->ps16xMeMvDataBuffer->OsResource);
1448 break;
1449 case HME_LEVEL_4x:
1450 if (!m_vdencEnabled)
1451 {
1452 data = (uint8_t *)m_osInterface->pfnLockResource(
1453 m_osInterface,
1454 &meSurfaceParams->ps4xMeMvDataBuffer->OsResource,
1455 &lockFlags);
1456 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1457 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) *
1458 (m_downscaledHeightInMb4x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1459 MOS_ZeroMemory(data, size);
1460 m_osInterface->pfnUnlockResource(
1461 m_osInterface,
1462 &meSurfaceParams->ps4xMeMvDataBuffer->OsResource);
1463 }
1464 break;
1465 default:
1466 return MOS_STATUS_INVALID_PARAMETER;
1467 }
1468
1469 // zeroing out ME dist buffer
1470 if (meSurfaceParams->b4xMeDistortionBufferSupported)
1471 {
1472 data = (uint8_t *)m_osInterface->pfnLockResource(
1473 m_osInterface, &meSurfaceParams->psMeDistortionBuffer->OsResource, &lockFlags);
1474 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1475 size = meSurfaceParams->psMeDistortionBuffer->dwHeight * meSurfaceParams->psMeDistortionBuffer->dwPitch;
1476 MOS_ZeroMemory(data, size);
1477 m_osInterface->pfnUnlockResource(
1478 m_osInterface,
1479 &meSurfaceParams->psMeDistortionBuffer->OsResource);
1480 }
1481 });
1482
1483 uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x : (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
1484
1485 uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
1486 uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
1487
1488 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
1489 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
1490 walkerCodecParams.WalkerMode = m_walkerMode;
1491 walkerCodecParams.dwResolutionX = resolutionX;
1492 walkerCodecParams.dwResolutionY = resolutionY;
1493 walkerCodecParams.bNoDependency = true;
1494 walkerCodecParams.bMbaff = meSurfaceParams->bMbaff;
1495 walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
1496 walkerCodecParams.ucGroupId = m_groupId;
1497
1498 MHW_WALKER_PARAMS walkerParams;
1499 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
1500 m_hwInterface,
1501 &walkerParams,
1502 &walkerCodecParams));
1503
1504 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
1505 &cmdBuffer,
1506 &walkerParams));
1507
1508 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
1509
1510 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
1511 m_stateHeapInterface,
1512 kernelState));
1513 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1514 {
1515 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
1516 m_stateHeapInterface));
1517 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
1518 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1519 }
1520
1521 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1522 &cmdBuffer,
1523 encFunctionType,
1524 nullptr)));
1525
1526 m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
1527
1528 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1529
1530 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1531 {
1532 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
1533 m_lastTaskInPhase = false;
1534 }
1535
1536 return eStatus;
1537 }
1538
ExecuteKernelFunctions()1539 MOS_STATUS CodechalVdencVp9StateG11::ExecuteKernelFunctions()
1540 {
1541 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1542
1543 CODECHAL_ENCODE_FUNCTION_ENTER;
1544
1545 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
1546 uint32_t dumpFormat = 0;
1547 CODECHAL_DEBUG_TOOL(
1548 // CodecHal_DbgMapSurfaceFormatToDumpFormat(m_rawSurfaceToEnc->Format, &dumpFormat);
1549 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1550 m_rawSurfaceToEnc,
1551 CodechalDbgAttr::attrEncodeRawInputSurface,
1552 "SrcSurf"));
1553 if (m_lastRefPic)
1554 {
1555 // CodecHal_DbgMapSurfaceFormatToDumpFormat(m_lastRefPic->Format, &dumpFormat);
1556 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1557 m_lastRefPic,
1558 CodechalDbgAttr::attrReferenceSurfaces,
1559 "LastRefSurface"));
1560 }
1561
1562 if (m_goldenRefPic)
1563 {
1564 // CodecHal_DbgMapSurfaceFormatToDumpFormat(m_goldenRefPic->Format, &dumpFormat);
1565 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1566 m_goldenRefPic,
1567 CodechalDbgAttr::attrReferenceSurfaces,
1568 "GoldenRefSurface"));
1569 }
1570
1571 if (m_altRefPic)
1572 {
1573 // CodecHal_DbgMapSurfaceFormatToDumpFormat(m_altRefPic->Format, &dumpFormat);
1574 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1575 m_altRefPic,
1576 CodechalDbgAttr::attrReferenceSurfaces,
1577 "_AltRefSurface"));
1578 }
1579 );
1580
1581
1582 m_setRequestedEUSlices = ((m_frameHeight * m_frameWidth) >= m_ssdResolutionThreshold &&
1583 m_targetUsage <= m_ssdTargetUsageThreshold) ? true : false;
1584
1585 m_hwInterface->m_numRequestedEuSlices = (m_setRequestedEUSlices) ?
1586 m_sliceShutdownRequestState : m_sliceShutdownDefaultState;
1587
1588 // While this streamin isn't a kernel function, we 0 the surface here which is needed before HME kernel
1589 SetupSegmentationStreamIn();
1590
1591 // Super HME
1592 if (m_16xMeSupported)
1593 {
1594 //4x Downscaling
1595 CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
1596 MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
1597 cscScalingKernelParams.bLastTaskInPhaseCSC =
1598 cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled);
1599 cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled);
1600 cscScalingKernelParams.bLastTaskInPhase32xDS = !m_hmeEnabled;
1601
1602 m_firstTaskInPhase = true;
1603 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
1604 }
1605
1606 if (m_16xMeEnabled)
1607 {
1608 //Initialize the ME struct for HME kernel calls
1609 MeCurbeParams meParams;
1610 MOS_ZeroMemory(&meParams, sizeof(MeCurbeParams));
1611 SetMeCurbeParams(&meParams);
1612
1613 MeSurfaceParams meSurfaceParams;
1614 MOS_ZeroMemory(&meSurfaceParams, sizeof(MeSurfaceParams));
1615 SetMeSurfaceParams(&meSurfaceParams);
1616
1617 // P_HME kernel (16x HME)
1618 m_lastTaskInPhase = false;
1619 CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_16x));
1620
1621 //StreamIn kernel, 4xME
1622 m_lastTaskInPhase = true;
1623 meParams.segmapProvided = m_segmentMapProvided;
1624 CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_4x));
1625 }
1626 //CODECHAL_DEBUG_TOOL
1627 // (
1628 /*CodecHal_DbgDumpVp9VdEncHMEData(
1629 m_encoder,
1630 dumpFormat,
1631 MeSurfaceParams.ps4xMeMvDataBuffer,
1632 MeSurfaceParams.ps16xMeMvDataBuffer,
1633 MeSurfaceParams.psMeDistortionBuffer);
1634 */
1635
1636 //if (m_scalingEnabled)
1637 //{
1638 // // Dump 4x scaling and HME buffers
1639 // m_debugInterface->DumpYUVSurface(
1640 // m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER),
1641 // CodechalDbgAttr::attrReferenceSurfaces,
1642 // "4xScaledSurf");
1643 // m_debugInterface->DumpBuffer(
1644 // &meSurfaceParams.ps4xMeMvDataBuffer->OsResource,
1645 // CodechalDbgAttr::attrOutput,
1646 // "MvData",
1647 // meSurfaceParams.ps4xMeMvDataBuffer->dwHeight * meSurfaceParams.ps4xMeMvDataBuffer->dwPitch,
1648 // CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) * (m_downscaledFrameFieldHeightInMb4x * 4) : 0,
1649 // CODECHAL_MEDIA_STATE_4X_ME);
1650 // m_debugInterface->DumpBuffer(
1651 // &meSurfaceParams.psMeDistortionBuffer->OsResource,
1652 // CodechalDbgAttr::attrOutput,
1653 // "MeDist",
1654 // meSurfaceParams.psMeDistortionBuffer->dwHeight *meSurfaceParams.psMeDistortionBuffer->dwPitch,
1655 // CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4 * 10), 8) : 0,
1656 // CODECHAL_MEDIA_STATE_4X_ME);
1657
1658 // if (m_16xMeSupported)
1659 // {
1660 // // Dump 16x scaling and HME buffers
1661 // m_debugInterface->DumpYUVSurface(
1662 // m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER),
1663 // CodechalDbgAttr::attrReferenceSurfaces,
1664 // "16xScaledSurf");
1665 // if (m_16xMeEnabled)
1666 // {
1667 // m_debugInterface->DumpBuffer(
1668 // &meSurfaceParams.ps16xMeMvDataBuffer->OsResource,
1669 // CodechalDbgAttr::attrOutput,
1670 // "MvData",
1671 // meSurfaceParams.ps16xMeMvDataBuffer->dwHeight *meSurfaceParams.ps16xMeMvDataBuffer->dwPitch,
1672 // CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) * (m_downscaledFrameFieldHeightInMb16x * 4) : 0,
1673 // CODECHAL_MEDIA_STATE_16X_ME);
1674 // }
1675 // }
1676 //}
1677 // dump VDEncStreamin
1678 /* m_debugInterface->DumpBuffer(
1679 &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
1680 CodechalDbgAttr::attrOutput,
1681 "Output",
1682 (MOS_ALIGN_CEIL(m_frameHeight, 32) * (MOS_ALIGN_CEIL(m_frameFieldHeight, 32)) / 16),
1683 0,
1684 CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN);*/
1685
1686 if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
1687 {
1688 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
1689 syncParams.GpuContext = m_renderContext;
1690 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
1691
1692 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
1693 m_waitForEnc = true;
1694 }
1695 #endif
1696
1697 return eStatus;
1698 }
1699
StatusReportCleanup(EncodeStatusReport * encodeStatusReport,HCPPakHWTileSizeRecord_G11 * tileStatusReport,CODECHAL_ENCODE_BUFFER * tileSizeStreamoutBuffer,PMOS_INTERFACE osInterface,uint8_t * tempBsBuffer,uint8_t * bitstream)1700 static void StatusReportCleanup(
1701 EncodeStatusReport* encodeStatusReport,
1702 HCPPakHWTileSizeRecord_G11* tileStatusReport,
1703 CODECHAL_ENCODE_BUFFER* tileSizeStreamoutBuffer,
1704 PMOS_INTERFACE osInterface,
1705 uint8_t* tempBsBuffer,
1706 uint8_t* bitstream)
1707 {
1708
1709 if (tempBsBuffer)
1710 {
1711 MOS_FreeMemory(tempBsBuffer);
1712 }
1713
1714 if (bitstream)
1715 {
1716 osInterface->pfnUnlockResource(osInterface, &encodeStatusReport->pCurrRefList->resBitstreamBuffer);
1717 }
1718
1719 if (tileStatusReport)
1720 {
1721 // clean-up the tile status report buffer
1722 if (encodeStatusReport->CodecStatus == CODECHAL_STATUS_SUCCESSFUL)
1723 {
1724 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1725 {
1726 MOS_ZeroMemory(&tileStatusReport[i], sizeof(tileStatusReport[i]));
1727 }
1728 }
1729
1730 osInterface->pfnUnlockResource(osInterface, &tileSizeStreamoutBuffer->sResource);
1731 }
1732 }
1733
~CodechalVdencVp9StateG11()1734 CodechalVdencVp9StateG11::~CodechalVdencVp9StateG11()
1735 {
1736 CODECHAL_ENCODE_FUNCTION_ENTER;
1737
1738 if (m_scalabilityState)
1739 {
1740 MOS_FreeMemAndSetNull(m_scalabilityState);
1741 }
1742 //Note: virtual engine interface destroy is done in MOS layer
1743
1744 return;
1745 }
1746
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1747 MOS_STATUS CodechalVdencVp9StateG11::GetStatusReport(
1748 EncodeStatus* encodeStatus,
1749 EncodeStatusReport* encodeStatusReport)
1750 {
1751 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1752
1753 CODECHAL_ENCODE_FUNCTION_ENTER;
1754
1755 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1756 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1757
1758 if (encodeStatusReport->UsedVdBoxNumber == 1)
1759 {
1760 encodeStatusReport->bitstreamSize = encodeStatus->dwMFCBitstreamByteCountPerFrame + encodeStatus->dwHeaderBytesInserted;
1761 encodeStatusReport->NumberPasses = (uint8_t)encodeStatus->dwNumberPasses;
1762
1763 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1764 return eStatus;
1765 }
1766
1767 // Tile record always in m_tileRecordBuffer even in scala mode
1768 PCODECHAL_ENCODE_BUFFER presTileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1769
1770 MOS_LOCK_PARAMS lockFlags;
1771 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1772 HCPPakHWTileSizeRecord_G11* tileStatusReport = (HCPPakHWTileSizeRecord_G11*)m_osInterface->pfnLockResource(
1773 m_osInterface,
1774 &presTileSizeStatusReport->sResource,
1775 &lockFlags);
1776 CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1777
1778 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1779 encodeStatusReport->PanicMode = false;
1780 encodeStatusReport->AverageQp = 0;
1781 encodeStatusReport->QpY = 0;
1782 encodeStatusReport->SuggestedQpYDelta = 0;
1783 encodeStatusReport->NumberPasses = 1;
1784 encodeStatusReport->bitstreamSize = 0;
1785 encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1786
1787 double sum_qp = 0.0;
1788 uint32_t totalCU = 0;
1789 CODECHAL_ENCODE_CHK_COND_RETURN((encodeStatusReport->NumberTilesInFrame == 0), "ERROR - invalid number of tiles in frame");
1790 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1791 {
1792 if (tileStatusReport[i].Length == 0)
1793 {
1794 encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1795 StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
1796 return eStatus;
1797 }
1798
1799 encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1800 totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
1801 sum_qp += tileStatusReport[i].Hcp_Qp_Status_Count;
1802 }
1803
1804 encodeStatusReport->QpY = encodeStatusReport->AverageQp =
1805 (uint8_t)((sum_qp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
1806
1807 if (m_enableTileStitchByHW)
1808 {
1809 StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
1810 return eStatus;
1811 }
1812
1813 uint8_t* bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
1814 uint8_t* tempBsBuffer = bufPtr;
1815 CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
1816
1817 CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList);
1818 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1819 lockFlags.ReadOnly = 1;
1820 uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
1821 m_osInterface,
1822 &currRefList.resBitstreamBuffer,
1823 &lockFlags);
1824 if (bitstream == nullptr)
1825 {
1826 MOS_SafeFreeMemory(tempBsBuffer);
1827 CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
1828 }
1829
1830 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1831 {
1832 uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
1833 uint32_t len = tileStatusReport[i].Length;
1834
1835 if (offset + len >= m_bitstreamUpperBound)
1836 {
1837 eStatus = MOS_STATUS_INVALID_FILE_SIZE;
1838 CODECHAL_ENCODE_ASSERTMESSAGE("Error: Tile offset and length add up to more than bitstream upper bound");
1839 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
1840 encodeStatusReport->bitstreamSize = 0;
1841 StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
1842 return eStatus;
1843 }
1844
1845 MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
1846 bufPtr += len;
1847 }
1848
1849 MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
1850 MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],
1851 m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
1852
1853 StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
1854
1855 return eStatus;
1856 }
1857
DecideEncodingPipeNumber()1858 MOS_STATUS CodechalVdencVp9StateG11::DecideEncodingPipeNumber()
1859 {
1860 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1861
1862 CODECHAL_ENCODE_FUNCTION_ENTER;
1863
1864 m_numPipe = m_numVdbox;
1865
1866 uint8_t num_tile_columns = (1 << m_vp9PicParams->log2_tile_columns);
1867
1868 if (num_tile_columns > m_numPipe)
1869 {
1870 m_numPipe = 1;
1871 }
1872
1873 if (num_tile_columns < m_numPipe)
1874 {
1875 if (num_tile_columns >= 1 && num_tile_columns <= 4)
1876 {
1877 m_numPipe = num_tile_columns;
1878 }
1879 else
1880 {
1881 m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode
1882 }
1883 }
1884
1885 if (m_numPipe == 0 || m_numPipe > CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE)
1886 {
1887 m_numPipe = 1;
1888 }
1889
1890 if (m_numPipe > 1)
1891 {
1892 m_scalableMode = true; // KMD VE is now enabled by default. Mediasolo can also use the VE interface.
1893 }
1894 else
1895 {
1896 m_scalableMode = false;
1897 }
1898
1899 if (m_scalabilityState)
1900 {
1901 // Create/ re-use a GPU context with 2 pipes
1902 m_scalabilityState->ucScalablePipeNum = m_numPipe;
1903 }
1904
1905 return eStatus;
1906 }
1907
PlatformCapabilityCheck()1908 MOS_STATUS CodechalVdencVp9StateG11::PlatformCapabilityCheck()
1909 {
1910 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1911
1912 CODECHAL_ENCODE_FUNCTION_ENTER;
1913
1914 CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
1915
1916 if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
1917 {
1918 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
1919 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
1920 }
1921
1922 if (m_numPipe > 1)
1923 {
1924 m_singleTaskPhaseSupported = m_singleTaskPhaseSupportedInPak = false;
1925 }
1926
1927 //so far only validate Tiling for VDEnc VP9
1928 uint8_t col = (1 << (m_vp9PicParams->log2_tile_columns));
1929 uint8_t row = (1 << (m_vp9PicParams->log2_tile_rows));
1930
1931 // Single pipe cannot handle N X M or M X N tile row/column cases, either one has to be 1, return error
1932 if ((col > 1) && (row > 1) && (m_numPipe == 1))
1933 {
1934 CODECHAL_ENCODE_ASSERTMESSAGE("Single pipe cannot handle N X M or M X N tile row/column cases, either one has to be 1");
1935 return MOS_STATUS_INVALID_PARAMETER;
1936 }
1937
1938 // Handling invalid tiling and scalability cases. When NumTilingColumn does not match NumPipe fall back to single pipe mode
1939 if (m_numPipe > 1 && (col != m_numPipe))
1940 {
1941 if ((col == 1) || (row == 1))
1942 {
1943 m_numPipe = 1; // number of tile columns cannot be greater than number of pipes (VDBOX), run in single pipe mode
1944 m_scalableMode = false;
1945 }
1946 else
1947 {
1948 CODECHAL_ENCODE_ASSERTMESSAGE("Number of tile columns cannot be greater than number of pipes (VDBOX) when number of rows > 1");
1949 return MOS_STATUS_INVALID_PARAMETER;
1950 }
1951 }
1952
1953 //num columns must be either 2 or 4 for scalability mode, H/W limitation
1954 if ((m_numPipe > 1) && (m_numPipe != 2) && (m_numPipe != 4))
1955 {
1956 CODECHAL_ENCODE_ASSERTMESSAGE("Num pipes must be either 2 or 4 for scalability mode, H/W limitation");
1957 return MOS_STATUS_INVALID_PARAMETER;
1958 }
1959
1960 // Tile width needs to be minimum size 256, error out if less
1961 if ((col != 1) && ((m_vp9PicParams->SrcFrameWidthMinus1 + 1) < col * CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH))
1962 {
1963 CODECHAL_ENCODE_ASSERTMESSAGE("Incorrect number of columns input parameter, Tile width is < 256");
1964 return MOS_STATUS_INVALID_PARAMETER;
1965 }
1966
1967 if (row > 4)
1968 {
1969 CODECHAL_ENCODE_ASSERTMESSAGE("Max number of rows cannot exceeds 4 by VP9 Spec.");
1970 return MOS_STATUS_INVALID_PARAMETER;
1971 }
1972
1973 if (m_vdencEnabled && VP9_ENCODED_CHROMA_FORMAT_YUV444 == m_vp9SeqParams->SeqFlags.fields.EncodedFormat && m_vp9SeqParams->TargetUsage == 7)
1974 {
1975 CODECHAL_ENCODE_ASSERTMESSAGE("Speed mode is not supported in VDENC 444, resetting TargetUsage to Normal mode\n");
1976 m_vp9SeqParams->TargetUsage = 4;
1977 }
1978
1979 // number of tiles for this frame
1980 m_numberTilesInFrame = col * row;
1981 m_numUsedVdbox = m_numPipe;
1982
1983 if (!m_newSeq)
1984 {
1985 // If there is no new SEQ header, then the number of passes is decided here.
1986 // Otherwise, it is done in SetSequenceStructs. For example, BRC setting may be changed.
1987 m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
1988 }
1989 // Last place where scalable mode is decided
1990 if (m_frameNum == 0)
1991 {
1992 m_lastFrameScalableMode = m_scalableMode;
1993 }
1994 return eStatus;
1995 }
1996
SetGpuCtxCreatOption()1997 MOS_STATUS CodechalVdencVp9StateG11::SetGpuCtxCreatOption()
1998 {
1999 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2000
2001 CODECHAL_ENCODE_FUNCTION_ENTER;
2002
2003 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2004 {
2005 CodechalEncoderState::SetGpuCtxCreatOption();
2006 }
2007 else
2008 {
2009 m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
2010 CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
2011
2012 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
2013 m_scalabilityState,
2014 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2015 }
2016
2017 return eStatus;
2018 }
2019
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)2020 MOS_STATUS CodechalVdencVp9StateG11::SetAndPopulateVEHintParams(
2021 PMOS_COMMAND_BUFFER cmdBuffer)
2022 {
2023 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2024
2025 CODECHAL_ENCODE_FUNCTION_ENTER;
2026
2027 if (!MOS_VE_SUPPORTED(m_osInterface))
2028 {
2029 return eStatus;
2030 }
2031
2032 CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
2033 MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
2034
2035 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2036 {
2037 scalSetParms.bNeedSyncWithPrevious = true;
2038 }
2039
2040 int32_t currentPass = GetCurrentPass();
2041 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2042 // Scalable mode only
2043 if (m_scalableMode)
2044 {
2045 for (auto i = 0; i < m_numPipe; i++)
2046 {
2047 scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex].OsResource;
2048 }
2049 }
2050
2051 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
2052 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2053 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
2054
2055 return eStatus;
2056 }
2057
SetTileData()2058 MOS_STATUS CodechalVdencVp9StateG11::SetTileData()
2059 {
2060 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2061
2062 CODECHAL_ENCODE_FUNCTION_ENTER;
2063
2064 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 *tileCodingParams = m_tileParams;
2065
2066 tileCodingParams->Mode = CODECHAL_ENCODE_MODE_VP9;
2067
2068 uint32_t numTileRows = (1 << m_vp9PicParams->log2_tile_rows);
2069 uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
2070 uint32_t numTiles = numTileRows * numTileColumns;
2071
2072 uint32_t bitstreamSizePerTile = m_bitstreamUpperBound / (numTiles * CODECHAL_CACHELINE_SIZE);
2073 uint32_t numLcusInTiles = 0, numCuRecord = 64;
2074 uint32_t cuLevelStreamoutOffset = 0, sliceSizeStreamoutOffset = 0, bitstreamByteOffset = 0, sseRowstoreOffset = 0;
2075
2076 for (uint32_t tileCntr = 0; tileCntr < numTiles; tileCntr++)
2077 {
2078 uint32_t tileX, tileY, tileStartSbX, tileStartSbY, tileWidthInSb, tileHeightInSb, lastTileColWidth, lastTileRowHeight, numLcuInTile;
2079 bool isLastTileCol, isLastTileRow;
2080
2081 tileX = tileCntr % numTileColumns;
2082 tileY = tileCntr / numTileColumns;
2083
2084 isLastTileCol = ((numTileColumns - 1) == tileX);
2085 isLastTileRow = ((numTileRows - 1) == tileY);
2086
2087 tileStartSbX = (tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns;
2088 tileStartSbY = (tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows;
2089
2090 tileWidthInSb = (isLastTileCol ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) - tileStartSbX;
2091 tileHeightInSb = (isLastTileRow ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) - tileStartSbY;
2092
2093 lastTileColWidth = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameWidthMinus1 + 1 - tileStartSbX * CODEC_VP9_SUPER_BLOCK_WIDTH), CODEC_VP9_MIN_BLOCK_WIDTH) / CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
2094 lastTileRowHeight = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameHeightMinus1 + 1 - tileStartSbY * CODEC_VP9_SUPER_BLOCK_HEIGHT), CODEC_VP9_MIN_BLOCK_HEIGHT) / CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
2095
2096 numLcuInTile = tileWidthInSb * tileHeightInSb;
2097 tileCodingParams[tileCntr].NumberOfActiveBePipes = m_numPipe;
2098 tileCodingParams[tileCntr].NumOfTilesInFrame = numTiles;
2099 tileCodingParams[tileCntr].NumOfTileColumnsInFrame = numTileColumns;
2100 tileCodingParams[tileCntr].TileStartLCUX = tileStartSbX;
2101 tileCodingParams[tileCntr].TileStartLCUY = tileStartSbY;
2102 tileCodingParams[tileCntr].IsLastTileofColumn = isLastTileRow;
2103 tileCodingParams[tileCntr].IsLastTileofRow = isLastTileCol;
2104
2105 tileCodingParams[tileCntr].TileWidthInMinCbMinus1 = isLastTileCol ? lastTileColWidth : (tileWidthInSb * CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
2106 tileCodingParams[tileCntr].TileHeightInMinCbMinus1 = isLastTileRow ? lastTileRowHeight : (tileHeightInSb * CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
2107
2108 if (m_scalableMode)
2109 {
2110 sseRowstoreOffset = (tileStartSbX + (3 * tileX)) << 5;
2111
2112 tileCodingParams[tileCntr].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * 64),
2113 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
2114 tileCodingParams[tileCntr].presHcpSyncBuffer = &m_hcpScalabilitySyncBuffer.sResource;
2115 tileCodingParams[tileCntr].SliceSizeStreamoutOffset = sliceSizeStreamoutOffset;
2116 tileCodingParams[tileCntr].SseRowstoreOffset = sseRowstoreOffset;
2117 tileCodingParams[tileCntr].BitstreamByteOffset = bitstreamByteOffset;
2118 tileCodingParams[tileCntr].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
2119
2120 cuLevelStreamoutOffset += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
2121 sliceSizeStreamoutOffset += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
2122 sseRowstoreOffset += (numLcuInTile * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
2123 bitstreamByteOffset += bitstreamSizePerTile;
2124 numLcusInTiles += numLcuInTile;
2125
2126 tileCodingParams[tileCntr].TileSizeStreamoutOffset = (tileCntr*m_hcpInterface->GetPakHWTileSizeRecordSize() + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
2127
2128 //DW5
2129 const uint32_t frameStatsStreamoutSize = m_brcPakStatsBufSize;
2130 tileCodingParams[tileCntr].PakTileStatisticsOffset = (tileCntr*frameStatsStreamoutSize + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
2131
2132 //DW12
2133 tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = ((tileCntr * m_probabilityCounterBufferSize) + (CODECHAL_CACHELINE_SIZE - 1)) / CODECHAL_CACHELINE_SIZE;
2134 }
2135 else
2136 {
2137 tileCodingParams[tileCntr].CuRecordOffset = 0;
2138 tileCodingParams[tileCntr].presHcpSyncBuffer = nullptr;
2139 tileCodingParams[tileCntr].SliceSizeStreamoutOffset = 0;
2140 tileCodingParams[tileCntr].SseRowstoreOffset = 0;
2141 tileCodingParams[tileCntr].BitstreamByteOffset = 0;
2142 tileCodingParams[tileCntr].CuLevelStreamoutOffset = 0;
2143 tileCodingParams[tileCntr].TileSizeStreamoutOffset = 0;
2144
2145 //DW5
2146 tileCodingParams[tileCntr].PakTileStatisticsOffset = 0;
2147
2148 //DW12
2149 tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = 0;
2150 }
2151 }
2152
2153 return eStatus;
2154 }
2155
SetTileCommands(PMOS_COMMAND_BUFFER cmdBuffer)2156 MOS_STATUS CodechalVdencVp9StateG11::SetTileCommands(
2157 PMOS_COMMAND_BUFFER cmdBuffer)
2158 {
2159 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2160
2161 CODECHAL_ENCODE_FUNCTION_ENTER;
2162
2163 MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G11 vdencWalkerStateParams;
2164 vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_VP9;
2165 vdencWalkerStateParams.pVp9EncPicParams = m_vp9PicParams;
2166 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
2167
2168 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2169 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2170 // MFXPipeDone should not be set for tail insertion
2171 vdPipelineFlushParams.Flags.bWaitDoneMFX =
2172 (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
2173 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
2174 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
2175 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2176
2177 if (IsFirstPipe() && IsFirstPass())
2178 {
2179 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData());
2180 }
2181
2182 MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
2183 uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
2184 uint32_t numTileRows = (1 << m_vp9PicParams->log2_tile_rows);
2185 int currentPipe = GetCurrentPipe();
2186 for (uint32_t tileRow = 0, tileIdx = 0; tileRow < numTileRows; tileRow++)
2187 {
2188 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++, tileIdx++)
2189 {
2190 if (m_numPipe > 1)
2191 {
2192 if (tileCol != currentPipe)
2193 {
2194 continue;
2195 }
2196 }
2197
2198 // HCP_TILE_CODING commmand
2199 CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG11 *>(m_hcpInterface)->AddHcpTileCodingCmd(cmdBuffer, &m_tileParams[tileIdx]));
2200
2201 MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
2202 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(cmdBuffer, nullptr, &vdencWeightOffsetParams));
2203
2204 vdencWalkerStateParams.pTileCodingParams = &m_tileParams[tileIdx];
2205 vdencWalkerStateParams.dwTileId = tileIdx;
2206 switch (m_numPipe)
2207 {
2208 case 0:
2209 case 1:
2210 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
2211 break;
2212 case 2:
2213 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
2214 break;
2215 case 4:
2216 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
2217 break;
2218 default:
2219 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID;
2220 CODECHAL_ENCODE_ASSERTMESSAGE("Num Pipes invalid");
2221 return eStatus;
2222 break;
2223 }
2224 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
2225
2226 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipelineFlushParams));
2227 }
2228 }
2229
2230 return eStatus;
2231 }
2232
ExecuteTileLevel()2233 MOS_STATUS CodechalVdencVp9StateG11::ExecuteTileLevel()
2234 {
2235 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2236
2237 CODECHAL_ENCODE_FUNCTION_ENTER;
2238
2239 int currentPipe = GetCurrentPipe();
2240 int currentPass = GetCurrentPass();
2241
2242 if(currentPipe < 0 || currentPass < 0)
2243 {
2244 return MOS_STATUS_INVALID_PARAMETER;
2245 }
2246
2247 MOS_COMMAND_BUFFER cmdBuffer;
2248 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2249
2250 if (IsFirstPipe())
2251 {
2252 MHW_BATCH_BUFFER secondLevelBatchBuffer;
2253 MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
2254 secondLevelBatchBuffer.dwOffset = 0;
2255 secondLevelBatchBuffer.bSecondLevel = true;
2256
2257 if (!m_hucEnabled)
2258 {
2259 secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
2260 }
2261 else
2262 {
2263 secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
2264 }
2265 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
2266 &cmdBuffer,
2267 &secondLevelBatchBuffer));
2268 }
2269
2270 // Setup Tile level PAK commands
2271 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileCommands(&cmdBuffer));
2272
2273 // Send MI_FLUSH command
2274 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2275 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2276 flushDwParams.bVideoPipelineCacheInvalidate = true;
2277 if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[currentPipe].sResource))
2278 {
2279 flushDwParams.pOsResource = &m_stitchWaitSemaphoreMem[currentPipe].sResource;
2280 flushDwParams.dwDataDW1 = currentPass + 1;
2281 }
2282 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2283
2284 if (IsFirstPipe())
2285 {
2286 if (m_numPipe > 1 && m_enableTileStitchByHW)
2287 {
2288 for (auto i = 1; i < m_numPipe; i++)
2289 {
2290 if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource) && m_hucEnabled)
2291 {
2292 // This semaphore waits for all pipes except pipe 1 vdenc+pak to finish processing before stitching bitstream
2293 SendHWWaitCommand(&m_stitchWaitSemaphoreMem[i].sResource, &cmdBuffer, (currentPass + 1));
2294 }
2295 }
2296
2297 uint32_t index = m_virtualEngineBBIndex;
2298 HucCopyParams copyParams;
2299
2300 copyParams.size = m_tileRecordBuffer[index].sResource.iSize;
2301 copyParams.presSrc = &m_tileRecordBuffer[index].sResource;
2302
2303 copyParams.presDst = &m_resBitstreamBuffer;
2304 copyParams.lengthOfTable = (uint8_t)(m_numberTilesInFrame);
2305
2306 auto hucCmdInitializer = static_cast<CodechalCmdInitializerG11*>(m_hucCmdInitializer);
2307 CODECHAL_ENCODE_CHK_STATUS_RETURN(hucCmdInitializer->AddCopyCmds(&cmdBuffer, ©Params));
2308 }
2309 // PAK integration kernel to integrate stats for next HUC pass
2310 if (m_scalableMode && m_hucEnabled && m_isTilingSupported && IsFirstPipe())
2311 {
2312 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9PakInt(&cmdBuffer));
2313 }
2314
2315 if (m_scalableMode && IsLastPass())
2316 {
2317 // In scalablemode, singletaskPhaseSupported is off. On last pass we have to make sure to signal
2318 // m_lastTaskInPhase flag to true so that end status report can send a sync tag
2319 m_lastTaskInPhase = true;
2320 }
2321
2322 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2323
2324 if (!m_scalableMode) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2325 {
2326 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2327 }
2328 }
2329
2330 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2331 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2332 if (m_singleTaskPhaseSupported && m_hucEnabled && IsLastPass())
2333 {
2334 m_lastTaskInPhase = true; //HPU singletask phase mode only
2335 }
2336
2337 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2338 {
2339 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2340 }
2341
2342 std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
2343 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
2344 &cmdBuffer,
2345 CODECHAL_NUM_MEDIA_STATES,
2346 currPassName.data())));
2347
2348 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2349
2350 if (IsFirstPipe() &&
2351 m_waitForEnc &&
2352 IsFirstPass() &&
2353 !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2354 {
2355 MOS_SYNC_PARAMS syncParams;
2356 syncParams = g_cInitSyncParams;
2357 syncParams.GpuContext = m_videoContext;
2358 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
2359
2360 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
2361 m_waitForEnc = false;
2362 }
2363
2364 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2365 {
2366 bool renderFlags = m_videoContextUsesNullHw;
2367
2368 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
2369 m_lastTaskInPhase = false;
2370
2371 CODECHAL_DEBUG_TOOL(
2372 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2373 m_resVdencPakObjCmdStreamOutBuffer,
2374 CodechalDbgAttr::attrPakObjStreamout,
2375 currPassName.data(),
2376 m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE,
2377 0,
2378 CODECHAL_NUM_MEDIA_STATES));
2379
2380 if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
2381 //CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
2382 //m_debugInterface->DumpBuffer(
2383 // (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
2384 // CodechalDbgAttr::attrOutput,
2385 // "SegMap_Out",
2386 // CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
2387 // 0,
2388 // CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
2389 }
2390
2391 if (m_mmcState) {
2392 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2393 });
2394 }
2395
2396 if (IsFirstPipe() && IsLastPass())
2397 {
2398 if (m_vp9PicParams->PicFlags.fields.super_frame && m_tsEnabled)
2399 {
2400 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructSuperFrame());
2401 }
2402 }
2403
2404 // Increment the second level batch buffer index at the end of every pass
2405 if (IsLastPipe())
2406 {
2407 if (m_hucEnabled)
2408 {
2409 // We save the index of the 2nd level batch buffer in case there is a pass that needs the last SLBB
2410 m_lastVdencPictureState2ndLevelBBIndex = m_vdencPictureState2ndLevelBBIndex;
2411 }
2412 m_vdencPictureState2ndLevelBBIndex = (m_vdencPictureState2ndLevelBBIndex + 1) % CODECHAL_VP9_ENCODE_RECYCLED_BUFFER_NUM;
2413 }
2414
2415 // Reset parameters for next PAK execution
2416 if (IsLastPipe() && IsLastPass())
2417 {
2418
2419 if ((currentPipe == 0) &&
2420 m_signalEnc &&
2421 !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
2422 {
2423 // signal semaphore
2424 MOS_SYNC_PARAMS syncParams;
2425 syncParams = g_cInitSyncParams;
2426 syncParams.GpuContext = m_videoContext;
2427 syncParams.presSyncResource = &m_resSyncObjectVideoContextInUse;
2428
2429 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2430 m_semaphoreObjCount++;
2431 }
2432
2433 m_prevFrameInfo.KeyFrame = !m_vp9PicParams->PicFlags.fields.frame_type;
2434 m_prevFrameInfo.IntraOnly = (m_vp9PicParams->PicFlags.fields.frame_type == CODEC_VP9_KEY_FRAME) || m_vp9PicParams->PicFlags.fields.intra_only;
2435 m_prevFrameInfo.ShowFrame = m_vp9PicParams->PicFlags.fields.show_frame;
2436 m_prevFrameInfo.FrameWidth = m_oriFrameWidth;
2437 m_prevFrameInfo.FrameHeight = m_oriFrameHeight;
2438 m_currMvTemporalBufferIndex ^= 0x01;
2439 m_contextFrameTypes[m_vp9PicParams->PicFlags.fields.frame_context_idx] = m_vp9PicParams->PicFlags.fields.frame_type;
2440 m_prevFrameSegEnabled = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
2441
2442 // Reset parameters for next PAK execution
2443 if (!m_singleTaskPhaseSupported)
2444 {
2445 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2446 }
2447
2448 m_newPpsHeader = 0;
2449 m_newSeqHeader = 0;
2450 m_frameNum++;
2451 // Save the last frame's scalable mode flag to prevent switching buffers when doing next pass
2452 m_lastFrameScalableMode = m_scalableMode;
2453
2454 }
2455
2456 return eStatus;
2457 }
2458
ExecuteSliceLevel()2459 MOS_STATUS CodechalVdencVp9StateG11::ExecuteSliceLevel()
2460 {
2461 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2462
2463 CODECHAL_ENCODE_FUNCTION_ENTER;
2464
2465 return ExecuteTileLevel();
2466 }
2467
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & pipeModeSelectParams)2468 void CodechalVdencVp9StateG11::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& pipeModeSelectParams)
2469 {
2470 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2471
2472 CODECHAL_ENCODE_FUNCTION_ENTER;
2473
2474 CodechalVdencVp9State::SetHcpPipeModeSelectParams(pipeModeSelectParams);
2475
2476 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11& pipeModeSelectParamsG11 = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11&>(pipeModeSelectParams);
2477
2478 pipeModeSelectParamsG11.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2479 pipeModeSelectParamsG11.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
2480 if (m_scalableMode)
2481 {
2482 // Running in the multiple VDBOX mode
2483 if (IsFirstPipe())
2484 {
2485 pipeModeSelectParamsG11.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2486 }
2487 else
2488 {
2489 if (IsLastPipe())
2490 {
2491 pipeModeSelectParamsG11.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2492 }
2493 else
2494 {
2495 pipeModeSelectParamsG11.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2496 }
2497 }
2498
2499 pipeModeSelectParamsG11.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
2500 }
2501
2502 return;
2503 }
2504
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)2505 void CodechalVdencVp9StateG11::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
2506 {
2507 CodechalVdencVp9State::SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2508
2509 PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBBIndex];
2510 bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
2511
2512 if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
2513 {
2514 // overwrite presProbabilityCounterBuffer and it's params for scalable mode
2515 indObjBaseAddrParams.presProbabilityCounterBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
2516 indObjBaseAddrParams.dwProbabilityCounterOffset = m_tileStatsOffset.counterBuffer;
2517 indObjBaseAddrParams.dwProbabilityCounterSize = m_statsSize.counterBuffer;
2518 }
2519
2520 indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer? &tileRecordBuffer->sResource : nullptr;
2521 indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer? ((m_statsSize.tileSizeRecord) * GetNumTilesInFrame()) : 0;
2522 indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer? m_tileStatsOffset.tileSizeRecord: 0;
2523
2524 }
2525
VerifyCommandBufferSize()2526 MOS_STATUS CodechalVdencVp9StateG11::VerifyCommandBufferSize()
2527 {
2528 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2529
2530 CODECHAL_ENCODE_FUNCTION_ENTER;
2531
2532 if (UseLegacyCommandBuffer()) // legacy mode & resize CommandBuffer Size for every BRC pass
2533 {
2534 if (!m_singleTaskPhaseSupported)
2535 {
2536 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
2537 }
2538 }
2539 else // virtual engine
2540 {
2541 uint32_t requestedSize =
2542 m_pictureStatesSize +
2543 m_picturePatchListSize +
2544 m_extraPictureStatesSize +
2545 (m_sliceStatesSize * m_numSlices);
2546 requestedSize += requestedSize*m_numPassesInOnePipe;
2547 if (m_hucEnabled && m_brcEnabled)
2548 {
2549 requestedSize += m_brcMaxNumPasses*(m_defaultHucCmdsSize+m_defaultHucPatchListSize);
2550 }
2551 // Running in the multiple VDBOX mode
2552 int currentPipe = GetCurrentPipe();
2553 int currentPass = GetCurrentPass();
2554 if (currentPipe < 0 || currentPipe >= m_numPipe)
2555 {
2556 return MOS_STATUS_INVALID_PARAMETER;
2557 }
2558 if (currentPass < 0 || currentPass >= m_brcMaxNumPasses)
2559 {
2560 return MOS_STATUS_INVALID_PARAMETER;
2561 }
2562
2563 if (IsFirstPipe() && m_osInterface->bUsesPatchList)
2564 {
2565 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
2566 }
2567 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2568 PMOS_COMMAND_BUFFER cmdBuffer;
2569 if (m_osInterface->phasedSubmission)
2570 {
2571 cmdBuffer = &m_realCmdBuffer;
2572 }
2573 else
2574 {
2575 cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][(uint32_t)currentPipe][passIndex];
2576 }
2577
2578 if (Mos_ResourceIsNull(&cmdBuffer->OsResource) ||
2579 m_sizeOfVEBatchBuffer < requestedSize)
2580 {
2581 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
2582
2583 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
2584 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
2585 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
2586 allocParamsForBufferLinear.Format = Format_Buffer;
2587 allocParamsForBufferLinear.dwBytes = requestedSize;
2588 allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
2589
2590 if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
2591 {
2592 if (cmdBuffer->pCmdBase)
2593 {
2594 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
2595 }
2596 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
2597 }
2598
2599 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
2600 m_osInterface,
2601 &allocParamsForBufferLinear,
2602 &cmdBuffer->OsResource);
2603 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
2604
2605 m_sizeOfVEBatchBuffer = requestedSize;
2606 }
2607
2608 if (cmdBuffer->pCmdBase == 0)
2609 {
2610 MOS_LOCK_PARAMS lockParams;
2611 MOS_ZeroMemory(&lockParams, sizeof(lockParams));
2612 lockParams.WriteOnly = true;
2613 cmdBuffer->pCmdPtr = cmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &cmdBuffer->OsResource, &lockParams);
2614 cmdBuffer->iRemaining = m_sizeOfVEBatchBuffer;
2615 cmdBuffer->iOffset = 0;
2616
2617 if (cmdBuffer->pCmdBase == nullptr)
2618 {
2619 return MOS_STATUS_NULL_POINTER;
2620 }
2621 }
2622 }
2623
2624 return eStatus;
2625 }
2626
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)2627 MOS_STATUS CodechalVdencVp9StateG11::GetCommandBuffer(
2628 PMOS_COMMAND_BUFFER cmdBuffer)
2629 {
2630 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2631
2632 CODECHAL_ENCODE_FUNCTION_ENTER;
2633
2634 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2635
2636 if (UseLegacyCommandBuffer()) // legacy mode
2637 {
2638 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
2639 }
2640 else // virtual engine
2641 {
2642 if (m_osInterface->phasedSubmission)
2643 {
2644 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
2645 *cmdBuffer = m_realCmdBuffer;
2646 }
2647 else
2648 {
2649 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
2650
2651 int currentPipe = GetCurrentPipe();
2652 int currentPass = GetCurrentPass();
2653 if (currentPipe < 0 || currentPipe >= m_numPipe)
2654 {
2655 return MOS_STATUS_INVALID_PARAMETER;
2656 }
2657 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2658 *cmdBuffer = m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex];
2659 }
2660 }
2661
2662 return eStatus;
2663 }
2664
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)2665 MOS_STATUS CodechalVdencVp9StateG11::ReturnCommandBuffer(
2666 PMOS_COMMAND_BUFFER cmdBuffer)
2667 {
2668 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2669
2670 CODECHAL_ENCODE_FUNCTION_ENTER;
2671
2672 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2673
2674 if (UseLegacyCommandBuffer()) // legacy mode
2675 {
2676 m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
2677 }
2678 else // virtual engine
2679 {
2680 if (m_osInterface->phasedSubmission)
2681 {
2682 m_realCmdBuffer = *cmdBuffer;
2683 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
2684 }
2685 else
2686 {
2687 int currentPipe = GetCurrentPipe();
2688 int currentPass = GetCurrentPass();
2689 if (currentPipe < 0 || currentPipe >= m_numPipe)
2690 {
2691 return MOS_STATUS_INVALID_PARAMETER;
2692 }
2693
2694 if (eStatus == MOS_STATUS_SUCCESS)
2695 {
2696 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2697 m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex] = *cmdBuffer;
2698 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
2699 }
2700 }
2701 }
2702
2703 return eStatus;
2704 }
2705
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)2706 MOS_STATUS CodechalVdencVp9StateG11::SubmitCommandBuffer(
2707 PMOS_COMMAND_BUFFER cmdBuffer,
2708 bool bNullRendering)
2709 {
2710 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2711
2712 CODECHAL_ENCODE_FUNCTION_ENTER;
2713
2714 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2715
2716 if (UseLegacyCommandBuffer()) // legacy mode
2717 {
2718 if (!IsRenderContext()) // Set VE Hints for video contexts only
2719 {
2720 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
2721 }
2722 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
2723 }
2724 else // virtual engine
2725 {
2726 if (m_osInterface->phasedSubmission)
2727 {
2728 CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(),&m_realCmdBuffer);
2729 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
2730 }
2731 else
2732 {
2733 if (!IsLastPipe())
2734 {
2735 return eStatus;
2736 }
2737 int currentPass = GetCurrentPass();
2738 for (auto i = 0; i < m_numPipe; i++)
2739 {
2740 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2741 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex];
2742
2743 if (cmdBuffer->pCmdBase)
2744 {
2745 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
2746 }
2747
2748 cmdBuffer->pCmdBase = 0;
2749 cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
2750 }
2751
2752 if (eStatus == MOS_STATUS_SUCCESS)
2753 {
2754 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
2755 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
2756 }
2757 }
2758 }
2759
2760 return eStatus;
2761 }
2762
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)2763 MOS_STATUS CodechalVdencVp9StateG11::SendPrologWithFrameTracking(
2764 PMOS_COMMAND_BUFFER cmdBuffer,
2765 bool frameTrackingRequested,
2766 MHW_MI_MMIOREGISTERS *mmioRegister)
2767 {
2768 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2769
2770 CODECHAL_ENCODE_FUNCTION_ENTER;
2771
2772 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2773
2774 if (IsRenderContext()) //Render context only
2775 {
2776 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested));
2777 return eStatus;
2778 }
2779 else // Legacy mode or virtual engine mode
2780 {
2781 if (!IsLastPipe())
2782 {
2783 return eStatus;
2784 }
2785 PMOS_COMMAND_BUFFER commandBufferInUse;
2786 if (m_realCmdBuffer.pCmdBase && m_scalableMode)
2787 {
2788 commandBufferInUse = &m_realCmdBuffer; //virtual engine mode
2789 }
2790 else
2791 {
2792 if (cmdBuffer && cmdBuffer->pCmdBase)
2793 {
2794 commandBufferInUse = cmdBuffer; //legacy mode
2795 }
2796 else
2797 {
2798 eStatus = MOS_STATUS_INVALID_PARAMETER;
2799 return eStatus;
2800 }
2801 }
2802 commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
2803 commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
2804 commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
2805 commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
2806 commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
2807 if (frameTrackingRequested && m_frameTrackingEnabled)
2808 {
2809 commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
2810 commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
2811 &m_encodeStatusBuf.resStatusBuffer;
2812 commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
2813 // Set media frame tracking address offset(the offset from the encoder status buffer page)
2814 commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
2815 }
2816
2817 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
2818 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
2819 genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
2820 genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
2821 genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
2822 genericPrologParams.dwStoreDataValue = m_storeData - 1;
2823 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
2824 }
2825
2826 return eStatus;
2827 }
2828
SetSemaphoreMem(PMOS_RESOURCE semaphoreMem,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t value)2829 MOS_STATUS CodechalVdencVp9StateG11::SetSemaphoreMem(
2830 PMOS_RESOURCE semaphoreMem,
2831 PMOS_COMMAND_BUFFER cmdBuffer,
2832 uint32_t value)
2833 {
2834 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2835
2836 CODECHAL_ENCODE_FUNCTION_ENTER;
2837
2838 CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
2839 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2840 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2841 storeDataParams.pOsResource = semaphoreMem;
2842 storeDataParams.dwResourceOffset = 0;
2843 storeDataParams.dwValue = value;
2844
2845 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2846 cmdBuffer,
2847 &storeDataParams));
2848
2849 return eStatus;
2850 }
2851
SendHWWaitCommand(PMOS_RESOURCE semaphoreMem,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t value)2852 MOS_STATUS CodechalVdencVp9StateG11::SendHWWaitCommand(
2853 PMOS_RESOURCE semaphoreMem,
2854 PMOS_COMMAND_BUFFER cmdBuffer,
2855 uint32_t value)
2856 {
2857 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2858
2859 CODECHAL_ENCODE_FUNCTION_ENTER;
2860 CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
2861
2862 MHW_MI_SEMAPHORE_WAIT_PARAMS semaphoreWaitParams;
2863 MOS_ZeroMemory(&semaphoreWaitParams, sizeof(semaphoreWaitParams));
2864 semaphoreWaitParams.presSemaphoreMem = semaphoreMem;
2865 semaphoreWaitParams.bPollingWaitMode = true;
2866 semaphoreWaitParams.dwSemaphoreData = value;
2867 semaphoreWaitParams.CompareOperation = MHW_MI_SAD_EQUAL_SDD;
2868 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiSemaphoreWaitCmd(cmdBuffer, &semaphoreWaitParams));
2869
2870 return eStatus;
2871 }
2872
SetDmemHuCPakInt()2873 MOS_STATUS CodechalVdencVp9StateG11::SetDmemHuCPakInt()
2874 {
2875 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2876
2877 CODECHAL_ENCODE_FUNCTION_ENTER;
2878
2879 uint8_t currPass = (uint8_t)GetCurrentPass();
2880
2881 MOS_LOCK_PARAMS lockFlags;
2882 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2883 lockFlags.WriteOnly = 1;
2884 // All bytes in below dmem for fields not used by VP9 to be set to 0xFF.
2885 HucPakIntDmem* dmem = (HucPakIntDmem*)m_osInterface->pfnLockResource(
2886 m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass], &lockFlags);
2887 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
2888
2889 MOS_ZeroMemory(dmem, sizeof(HucPakIntDmem));
2890 // CODECHAL_VDENC_VP9_PAK_INT_DMEM_OFFSETS_SIZE size of offsets in the CODECHAL_VDENC_VP9_HUC_PAK_INT_DMEM struct.
2891 // Reset offsets to 0xFFFFFFFF as unavailable
2892 memset(dmem, 0xFF, m_pakIntDmemOffsetsSize);
2893
2894 dmem->totalSizeInCommandBuffer = 0;
2895 dmem->offsetInCommandBuffer = 0xFFFF; // Not used for VP9, all bytes in dmem for fields not used are 0xFF
2896 dmem->picWidthInPixel = (uint16_t)m_frameWidth;
2897 dmem->picHeightInPixel = (uint16_t)m_frameHeight;
2898 dmem->totalNumberOfPaks = m_numPipe;
2899 dmem->codec = m_pakIntVp9CodecId;
2900 dmem->maxPass = m_brcMaxNumPasses; // Only VDEnc CQP and BRC
2901 dmem->currentPass = currPass + 1;
2902 dmem->lastTileBSStartInBytes = 0xFFFF;
2903 dmem->picStateStartInBytes = 0xFFFF;
2904
2905 // Offset 0 is for region 1 - output of integrated frame stats from PAK integration kernel
2906
2907 dmem->tileSizeRecordOffset[0] = m_frameStatsOffset.tileSizeRecord;
2908 dmem->vdencStatOffset[0] = m_frameStatsOffset.vdencStats;
2909 dmem->vp9PakStatOffset[0] = m_frameStatsOffset.pakStats;
2910 dmem->vp9CounterBufferOffset[0] = m_frameStatsOffset.counterBuffer;
2911
2912 //Offset 1 - 4 is for region 0 - Input to PAK integration kernel for all tile statistics per pipe
2913 for (auto i = 1; i <= m_numPipe; i++)
2914 {
2915 dmem->numTiles[i - 1] = (GetNumTilesInFrame()) / m_numPipe;
2916 dmem->tileSizeRecordOffset[i] = m_tileStatsOffset.tileSizeRecord + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.tileSizeRecord);
2917 dmem->vdencStatOffset[i] = m_tileStatsOffset.vdencStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.vdencStats);
2918 dmem->vp9PakStatOffset[i] = m_tileStatsOffset.pakStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.pakStats);
2919 dmem->vp9CounterBufferOffset[i] = m_tileStatsOffset.counterBuffer + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.counterBuffer);
2920 }
2921 m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass]);
2922
2923 return eStatus;
2924 }
2925
HuCVp9PakInt(PMOS_COMMAND_BUFFER cmdBuffer)2926 MOS_STATUS CodechalVdencVp9StateG11::HuCVp9PakInt(
2927 PMOS_COMMAND_BUFFER cmdBuffer)
2928 {
2929 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2930
2931 CODECHAL_ENCODE_FUNCTION_ENTER;
2932
2933 if (!IsFirstPipe())
2934 {
2935 return eStatus;
2936 }
2937
2938 CODECHAL_DEBUG_TOOL(
2939 uint32_t hucRegionSize[16] = { 0 };
2940 const char* hucRegionName[16] = { "\0" };
2941
2942 hucRegionName[0] = "_MultiPakStreamout_input";
2943 hucRegionSize[0] = m_tileStatsPakIntegrationBufferSize;
2944 hucRegionName[1] = "_IntegratedStreamout_output";
2945 hucRegionSize[1] = m_frameStatsPakIntegrationBufferSize;
2946 hucRegionName[4] = "_BitStream_input";
2947 hucRegionSize[4] = MOS_ALIGN_CEIL(m_bitstreamUpperBound, CODECHAL_PAGE_SIZE);
2948 hucRegionName[5] = "_BitStream_output";
2949 hucRegionSize[5] = MOS_ALIGN_CEIL(m_bitstreamUpperBound, CODECHAL_PAGE_SIZE);
2950 hucRegionName[6] = "_HistoryBufferOutput";
2951 hucRegionSize[6] = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
2952 hucRegionName[7] = "_HCPPICSTATEInputDummy";
2953 hucRegionSize[7] = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
2954 hucRegionName[8] = "_HCPPICSTATEOutputDummy";
2955 hucRegionSize[8] = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
2956 hucRegionName[9] = "_BrcDataOutputBuffer"; // This is the pak MMIO region 7 , not 4, of BRC update
2957 hucRegionSize[9] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
2958 hucRegionName[15] = "_TileRecordBuffer"; // This is the pak MMIO region 7 , not 4, of BRC update
2959 hucRegionSize[15] = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
2960 )
2961
2962 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
2963 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
2964 imemParams.dwKernelDescriptor = m_vdboxHucPakIntegrationKernelDescriptor;
2965 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(cmdBuffer, &imemParams));
2966
2967 // pipe mode select
2968 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
2969 pipeModeSelectParams.Mode = m_mode;
2970 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
2971
2972 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakInt());
2973
2974 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
2975 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
2976 dmemParams.presHucDataSource = &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()];
2977 dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
2978 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
2979 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
2980
2981 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
2982 MOS_ZeroMemory(&virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
2983 virtualAddrParams.regionParams[0].presRegion = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
2984 virtualAddrParams.regionParams[0].dwOffset = 0;
2985 virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource; // Region 1 - HuC Frame statistics output
2986 virtualAddrParams.regionParams[1].isWritable = true;
2987 virtualAddrParams.regionParams[4].presRegion = &m_hucPakIntDummyBuffer; // Region 4 - Not used for VP9
2988 virtualAddrParams.regionParams[5].presRegion = &m_hucPakIntDummyBuffer; // Region 5 - Not used for VP9
2989 virtualAddrParams.regionParams[5].isWritable = true;
2990 virtualAddrParams.regionParams[6].presRegion = &m_hucPakIntDummyBuffer; // Region 6 - Not used for VP9
2991 virtualAddrParams.regionParams[6].isWritable = true;
2992 virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntDummyBuffer; // Region 7 - Not used for VP9
2993 virtualAddrParams.regionParams[8].presRegion = &m_hucPakIntDummyBuffer; // Region 8 - Not used for VP9
2994 virtualAddrParams.regionParams[8].isWritable = true;
2995 virtualAddrParams.regionParams[9].presRegion = &m_hucPakIntBrcDataBuffer; // Region 9 - HuC outputs BRC data
2996 virtualAddrParams.regionParams[9].isWritable = true;
2997 virtualAddrParams.regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
2998 virtualAddrParams.regionParams[15].dwOffset = 0;
2999
3000 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
3001
3002 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
3003
3004 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(cmdBuffer, true));
3005
3006 // wait Huc completion (use HEVC bit for now)
3007 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3008 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3009 vdPipeFlushParams.Flags.bFlushHEVC = 1;
3010 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3011 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
3012
3013 // Flush the engine to ensure memory written out
3014 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3015 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3016 flushDwParams.bVideoPipelineCacheInvalidate = true;
3017 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
3018
3019 return eStatus;
3020 }
3021
ConstructPicStateBatchBuf(PMOS_RESOURCE picStateBuffer)3022 MOS_STATUS CodechalVdencVp9StateG11::ConstructPicStateBatchBuf(
3023 PMOS_RESOURCE picStateBuffer)
3024 {
3025 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3026
3027 CODECHAL_ENCODE_FUNCTION_ENTER;
3028
3029 CODECHAL_ENCODE_CHK_NULL_RETURN(picStateBuffer);
3030
3031 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hucCmdInitializer);
3032
3033 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CommandInitializerSetVp9Params(this));
3034
3035 MOS_COMMAND_BUFFER cmdBuffer;
3036 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3037
3038 if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
3039 {
3040 // Send command buffer header at the beginning (OS dependent)
3041 bool requestFrameTracking = false;
3042 //For Superframes, there is an extra submission at the end, so submit with frame tracking there
3043 if (!m_vp9PicParams->PicFlags.fields.super_frame) {
3044 requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
3045 }
3046 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3047 m_firstTaskInPhase = false;
3048 }
3049
3050 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerVp9Execute(&cmdBuffer, picStateBuffer));
3051
3052 if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
3053 {
3054 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3055 }
3056
3057 ReturnCommandBuffer(&cmdBuffer);
3058
3059 if (!m_singleTaskPhaseSupported)
3060 {
3061 bool renderFlags = m_videoContextUsesNullHw;
3062 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
3063 }
3064
3065 MOS_LOCK_PARAMS lockFlagsWriteOnly;
3066 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3067 lockFlagsWriteOnly.WriteOnly = 1;
3068 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, picStateBuffer, &lockFlagsWriteOnly);
3069 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3070
3071 MOS_COMMAND_BUFFER constructedCmdBuf;
3072 MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
3073 constructedCmdBuf.pCmdBase = (uint32_t *)data;
3074 constructedCmdBuf.pCmdPtr = (uint32_t *)data;
3075 constructedCmdBuf.iOffset = 0;
3076 constructedCmdBuf.iRemaining = m_vdencPicStateSecondLevelBatchBufferSize;
3077
3078 // HCP_VP9_PIC_STATE
3079 MHW_VDBOX_VP9_ENCODE_PIC_STATE picState;
3080 MOS_ZeroMemory(&picState, sizeof(picState));
3081 picState.pVp9PicParams = m_vp9PicParams;
3082 picState.pVp9SeqParams = m_vp9SeqParams;
3083 picState.ppVp9RefList = &(m_refList[0]);
3084 picState.PrevFrameParams.fields.KeyFrame = m_prevFrameInfo.KeyFrame;
3085 picState.PrevFrameParams.fields.IntraOnly = m_prevFrameInfo.IntraOnly;
3086 picState.PrevFrameParams.fields.Display = m_prevFrameInfo.ShowFrame;
3087 picState.dwPrevFrmWidth = m_prevFrameInfo.FrameWidth;
3088 picState.dwPrevFrmHeight = m_prevFrameInfo.FrameHeight;
3089 picState.ucTxMode = m_txMode;
3090 picState.bSSEEnable = m_vdencBrcEnabled;
3091 picState.bUseDysRefSurface = (m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled;
3092 picState.bVdencPakOnlyPassFlag = m_vdencPakonlyMultipassEnabled;
3093 picState.uiMaxBitRate = m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
3094 picState.uiMinBitRate = m_vp9SeqParams->MinBitRate * CODECHAL_ENCODE_BRC_KBPS;
3095 constructedCmdBuf.iOffset += m_cmd1Size;
3096 m_hucPicStateOffset = (uint16_t)constructedCmdBuf.iOffset;
3097 constructedCmdBuf.pCmdPtr += constructedCmdBuf.iOffset/sizeof(uint32_t);
3098 eStatus = m_hcpInterface->AddHcpVp9PicStateEncCmd(&constructedCmdBuf, nullptr, &picState);
3099 if (eStatus != MOS_STATUS_SUCCESS)
3100 {
3101 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
3102 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add HCP_VP9_PIC_STATE command.");
3103 return eStatus;
3104 }
3105
3106 // HCP_VP9_SEGMENT_STATE
3107 MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
3108 MOS_ZeroMemory(&segmentState, sizeof(segmentState));
3109 segmentState.Mode = m_mode;
3110 segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
3111 uint8_t segmentCount = (m_vp9PicParams->PicFlags.fields.segmentation_enabled) ? CODEC_VP9_MAX_SEGMENTS : 1;
3112
3113 for (uint8_t i = 0; i < segmentCount; i++)
3114 {
3115 segmentState.ucCurrentSegmentId = i;
3116 eStatus = m_hcpInterface->AddHcpVp9SegmentStateCmd(&constructedCmdBuf, nullptr, &segmentState);
3117 if (eStatus != MOS_STATUS_SUCCESS)
3118 {
3119 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
3120 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MHW_VDBOX_VP9_SEGMENT_STATE command.");
3121 return eStatus;
3122 }
3123 }
3124
3125 // Adjust cmd buffer offset to have 8 segment state blocks
3126 if (segmentCount < CODEC_VP9_MAX_SEGMENTS)
3127 {
3128 // Max 7 segments, 32 bytes each
3129 uint8_t zeroBlock[m_segmentStateBlockSize * (CODEC_VP9_MAX_SEGMENTS - 1)];
3130 MOS_ZeroMemory(zeroBlock, sizeof(zeroBlock));
3131 Mhw_AddCommandCmdOrBB(m_osInterface, &constructedCmdBuf, nullptr, zeroBlock, (CODEC_VP9_MAX_SEGMENTS - segmentCount) * m_segmentStateBlockSize);
3132 }
3133 m_slbbImgStateOffset = (uint16_t)constructedCmdBuf.iOffset;
3134 constructedCmdBuf.iOffset += m_cmd2Size;
3135 constructedCmdBuf.pCmdPtr += m_cmd2Size/ sizeof(uint32_t);
3136
3137 // BB_END
3138 eStatus = m_miInterface->AddMiBatchBufferEnd(&constructedCmdBuf, nullptr);
3139 if (eStatus != MOS_STATUS_SUCCESS)
3140 {
3141 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
3142 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MI Batch Buffer End command.");
3143 return eStatus;
3144 }
3145 m_hucSlbbSize = (uint16_t)constructedCmdBuf.iOffset;
3146
3147 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
3148
3149 return eStatus;
3150 }
3151
HuCVp9Prob()3152 MOS_STATUS CodechalVdencVp9StateG11::HuCVp9Prob()
3153 {
3154 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3155
3156 CODECHAL_ENCODE_FUNCTION_ENTER;
3157 if (!IsFirstPipe())
3158 {
3159 return eStatus;
3160 }
3161
3162 CODECHAL_DEBUG_TOOL(
3163 uint32_t hucRegionSize[16] = { 0 };
3164 const char* hucRegionName[16] = { "\0" };
3165
3166 hucRegionName[0] = "_UpdatedProbBuffer"; // hucRegionName[0] is used to dump region 0 after HuC is run, which has updated probabilities. Input Region 0 is dumped separetely before HuC.
3167 hucRegionSize[0] = 32 * CODECHAL_CACHELINE_SIZE;
3168 hucRegionName[1] = "_CountersBuffer";
3169 hucRegionSize[1] = 193 * CODECHAL_CACHELINE_SIZE;
3170 hucRegionName[2] = "_ProbBuffer";
3171 hucRegionSize[2] = 32 * CODECHAL_CACHELINE_SIZE;
3172 hucRegionName[3] = "_ProbDeltaBuffer";
3173 hucRegionSize[3] = 29 * CODECHAL_CACHELINE_SIZE;
3174 hucRegionName[4] = "_UncompressedHdr";
3175 hucRegionSize[4] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
3176 hucRegionName[5] = "_CompressedHdr";
3177 hucRegionSize[5] = 32 * CODECHAL_CACHELINE_SIZE;
3178 hucRegionName[6] = "_SecondLevelBatchBuffer";
3179 hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
3180 hucRegionName[7] = "_SecondLevelBatchBuffer";
3181 hucRegionSize[7] = m_vdencPicStateSecondLevelBatchBufferSize;
3182 hucRegionName[8] = "_UncompressedHdr";
3183 hucRegionSize[8] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
3184 hucRegionName[9] = "_DefaultProbs";
3185 hucRegionSize[9] = sizeof(Keyframe_Default_Probs) + sizeof(Inter_Default_Probs);
3186 hucRegionName[10] = "_SuperFrameBuffer";
3187 hucRegionSize[10] = CODECHAL_ENCODE_VP9_BRC_SUPER_FRAME_BUFFER_SIZE;
3188 hucRegionName[11] = "_DataExtension";
3189 hucRegionSize[11] = CODECHAL_ENCODE_VP9_VDENC_DATA_EXTENSION_SIZE;
3190 )
3191
3192 MOS_COMMAND_BUFFER cmdBuffer;
3193 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3194
3195 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
3196 {
3197 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3198 // Send command buffer header at the beginning (OS dependent)
3199 // frame tracking tag is only added in the last command buffer header
3200 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
3201 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3202 m_firstTaskInPhase = false;
3203 }
3204 int currPass = GetCurrentPass();
3205
3206 // load kernel from WOPCM into L2 storage RAM
3207 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
3208 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
3209 imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencProbKernelDescriptor;
3210 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
3211
3212 // pipe mode select
3213 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
3214 pipeModeSelectParams.Mode = m_mode;
3215 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
3216
3217 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCVp9Prob());
3218
3219 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
3220 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
3221 dmemParams.presHucDataSource = &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx];
3222 dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucProbDmem), CODECHAL_CACHELINE_SIZE);
3223 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
3224 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
3225
3226 // Add Virtual addr
3227 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
3228 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3229 // Input regions
3230 virtualAddrParams.regionParams[0].presRegion = &m_resProbBuffer[m_vp9PicParams->PicFlags.fields.frame_context_idx];
3231 virtualAddrParams.regionParams[0].isWritable = true; // Region 0 is both read and write for HuC. Has input probabilities before running HuC and updated probabilities after running HuC, which will then be input to next pass
3232 if (m_scalableMode)
3233 {
3234 virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3235 virtualAddrParams.regionParams[1].dwOffset = m_frameStatsOffset.counterBuffer;
3236 }
3237 else
3238 {
3239 virtualAddrParams.regionParams[1].presRegion = &m_resProbabilityCounterBuffer;
3240 virtualAddrParams.regionParams[1].dwOffset = 0;
3241 }
3242 // If BRC enabled, BRC Pass 2 output SLBB -> input SLBB for HPU on pass 3 (HPU pass 1 and 3. BRC Update pass 1 and 2)
3243 // BRC Pass 1 output SLBB -> input SLBB for HPU on pass 1
3244 // If BRC not on , Driver prepared SLBB -> input to HPU on both passes
3245
3246 if (m_vdencBrcEnabled)
3247 {
3248 virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
3249 }
3250 else
3251 {
3252 virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
3253 }
3254
3255 virtualAddrParams.regionParams[8].presRegion = &m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
3256 virtualAddrParams.regionParams[9].presRegion = &m_resHucDefaultProbBuffer;
3257
3258 // Output regions
3259 virtualAddrParams.regionParams[2].presRegion = &m_resHucProbOutputBuffer; // Final probability output from HuC after each pass
3260 virtualAddrParams.regionParams[2].isWritable = true;
3261 virtualAddrParams.regionParams[3].presRegion = &m_resProbabilityDeltaBuffer;
3262 virtualAddrParams.regionParams[3].isWritable = true;
3263 virtualAddrParams.regionParams[4].presRegion = &m_resHucPakInsertUncompressedHeaderWriteBuffer;
3264 virtualAddrParams.regionParams[4].isWritable = true;
3265 virtualAddrParams.regionParams[5].presRegion = &m_resCompressedHeaderBuffer;
3266 virtualAddrParams.regionParams[5].isWritable = true;
3267 virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
3268 virtualAddrParams.regionParams[6].isWritable = true;
3269 virtualAddrParams.regionParams[10].presRegion = &m_resBitstreamBuffer;
3270 virtualAddrParams.regionParams[10].isWritable = true;
3271 virtualAddrParams.regionParams[11].presRegion = &m_resVdencDataExtensionBuffer;
3272 virtualAddrParams.regionParams[11].isWritable = true;
3273
3274 m_hpuVirtualAddrParams = virtualAddrParams;
3275 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
3276 // Store HUC_STATUS2 register bit 6 before HUC_Start command
3277 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
3278 // (HUC_Start command with last start bit set).
3279 CODECHAL_DEBUG_TOOL(
3280 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
3281 )
3282
3283 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
3284
3285 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
3286
3287 // wait Huc completion (use HEVC bit for now)
3288 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3289 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3290 vdPipeFlushParams.Flags.bFlushHEVC = 1;
3291 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3292 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
3293
3294 // Flush the engine to ensure memory written out
3295 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3296 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3297 flushDwParams.bVideoPipelineCacheInvalidate = true;
3298 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3299
3300 // Write HUC_STATUS mask: DW1 (mask value)
3301 MHW_MI_STORE_DATA_PARAMS storeDataParams;
3302 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3303 storeDataParams.pOsResource = &m_resHucPakMmioBuffer;
3304 storeDataParams.dwResourceOffset = sizeof(uint32_t);
3305 storeDataParams.dwValue = 1 << 31; //Repak bit for HUC is bit 31
3306 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
3307
3308 // store HUC_STATUS register
3309 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
3310 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
3311 storeRegParams.presStoreBuffer = &m_resHucPakMmioBuffer;
3312 storeRegParams.dwOffset = 0;
3313 storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1)->hucStatusRegOffset;
3314 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
3315
3316 // For superframe pass, after HuC executes, write the updated size (combined frame size) to status report
3317 // So app knows total size instead of just the showframe size
3318 if (m_superFrameHucPass)
3319 {
3320 EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
3321 uint32_t baseOffset =
3322 (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) +
3323 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
3324
3325 MHW_MI_COPY_MEM_MEM_PARAMS copyMemMemParams;
3326 MOS_ZeroMemory(©MemMemParams, sizeof(copyMemMemParams));
3327
3328 copyMemMemParams.presSrc = virtualAddrParams.regionParams[11].presRegion;
3329 copyMemMemParams.dwSrcOffset = 0; // Updated framesize is 1st DW in buffer
3330 copyMemMemParams.presDst = &encodeStatusBuf->resStatusBuffer;
3331 copyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf->dwBSByteCountOffset;
3332
3333 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(
3334 &cmdBuffer,
3335 ©MemMemParams));
3336 }
3337
3338 if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
3339 {
3340 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3341 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3342 }
3343
3344 // Dump input probabilites before running HuC
3345 CODECHAL_DEBUG_TOOL(
3346 m_debugInterface->DumpHucRegion(
3347 virtualAddrParams.regionParams[0].presRegion,
3348 0,
3349 hucRegionSize[0],
3350 0,
3351 "_ProbBuffer",
3352 (virtualAddrParams.regionParams[0].isWritable ? true : false),
3353 currPass,
3354 CodechalHucRegionDumpType::hucRegionDumpHpu);
3355 )
3356
3357 ReturnCommandBuffer(&cmdBuffer);
3358
3359 // For Temporal scaling, super frame pass is initiated after the command buffer submission in ExecuteSliceLevel.
3360 // So if Single Task Phase is enabled, then we need to explicitly submit the command buffer here to call HuC
3361 if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
3362 {
3363 bool renderFlags = m_videoContextUsesNullHw;
3364
3365 CODECHAL_DEBUG_TOOL(
3366 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
3367 &cmdBuffer,
3368 CODECHAL_NUM_MEDIA_STATES,
3369 ((currPass == 0)? "HPU_Pass0":"HPU_Pass1"))));
3370
3371 if (m_superFrameHucPass) {
3372 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3373 //For superframe submission, this is the last submission so add frame tracking header
3374 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, m_vp9PicParams->PicFlags.fields.super_frame));
3375 ReturnCommandBuffer(&cmdBuffer);
3376 }
3377
3378 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
3379
3380 CODECHAL_DEBUG_TOOL(
3381 if(m_superFrameHucPass)
3382 {
3383 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3384 &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx],
3385 sizeof(HucProbDmem),
3386 currPass,
3387 CodechalHucRegionDumpType::hucRegionDumpHpuSuperFrame));
3388 }
3389 else
3390 {
3391 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3392 &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx],
3393 sizeof(HucProbDmem),
3394 currPass,
3395 CodechalHucRegionDumpType::hucRegionDumpHpu));
3396 }
3397
3398 for (auto i = 0; i < 16; i++) {
3399 if (virtualAddrParams.regionParams[i].presRegion)
3400 {
3401 if (m_scalableMode && m_isTilingSupported && virtualAddrParams.regionParams[i].isWritable && i != 11)
3402 {
3403 continue;
3404 }
3405 m_debugInterface->DumpHucRegion(
3406 virtualAddrParams.regionParams[i].presRegion,
3407 virtualAddrParams.regionParams[i].dwOffset,
3408 hucRegionSize[i],
3409 i,
3410 hucRegionName[i],
3411 !virtualAddrParams.regionParams[i].isWritable,
3412 currPass,
3413 CodechalHucRegionDumpType::hucRegionDumpHpu);
3414 }
3415 })
3416 }
3417
3418 return eStatus;
3419 }
3420
3421 /*----------------------------------------------------------------------------
3422 | Name : HuCBrcUpdate
3423 | Purpose : Start/Submit VP9 HuC BrcUpdate kernel to HW
3424 |
3425 | Returns : MOS_STATUS
3426 \---------------------------------------------------------------------------*/
HuCBrcUpdate()3427 MOS_STATUS CodechalVdencVp9StateG11::HuCBrcUpdate()
3428 {
3429 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3430
3431 CODECHAL_ENCODE_FUNCTION_ENTER;
3432
3433 int currPass = GetCurrentPass();
3434
3435 CODECHAL_DEBUG_TOOL(
3436 uint32_t hucRegionSize[16];
3437 const char* hucRegionName[16];
3438
3439 hucRegionName[0] = "_BrcHistory";
3440 hucRegionSize[0] = m_brcHistoryBufferSize;
3441 hucRegionName[1] = "_VDEncStats";
3442 hucRegionSize[1] = m_vdencBrcStatsBufferSize;
3443 hucRegionName[2] = "_PAKStats";
3444 hucRegionSize[2] = m_vdencBrcPakStatsBufferSize;
3445 hucRegionName[3] = "_InputSLBB";
3446 hucRegionSize[3] = m_vdencPicStateSecondLevelBatchBufferSize;
3447 hucRegionName[4] = "_BRCData";
3448 hucRegionSize[4] = CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE;
3449 hucRegionName[5] = "_ConstData";
3450 hucRegionSize[5] = m_brcConstantSurfaceSize;
3451 hucRegionName[6] = "_OutputSLBB";
3452 hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
3453 hucRegionName[7] = "_PAKMMIO";
3454 hucRegionSize[7] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
3455 )
3456
3457 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
3458 #if (_DEBUG || _RELEASE_INTERNAL)
3459 if (m_swBrcMode)
3460 {
3461 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
3462 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType));
3463 // Set region params for dumping only
3464 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3465 virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
3466 virtualAddrParams.regionParams[0].isWritable = true;
3467 virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
3468 virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
3469 virtualAddrParams.regionParams[3].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
3470 virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
3471 virtualAddrParams.regionParams[4].isWritable = true;
3472 virtualAddrParams.regionParams[5].presRegion = GetBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType);
3473 virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
3474 virtualAddrParams.regionParams[6].isWritable = true;
3475 virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
3476 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(// Dump history IN since it's both IN/OUT, OUT will dump at end of function, rest of buffers are IN XOR OUT (not both)
3477 virtualAddrParams.regionParams[0].presRegion,
3478 virtualAddrParams.regionParams[0].dwOffset,
3479 hucRegionSize[0],
3480 0,
3481 hucRegionName[0],
3482 true,
3483 currPass,
3484 CodechalHucRegionDumpType::hucRegionDumpUpdate));
3485 CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(true));
3486
3487 CODECHAL_DEBUG_TOOL(
3488 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3489 &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx],
3490 sizeof(HucBrcUpdateDmem), // Change buffer and size to update dmem
3491 currPass,
3492 CodechalHucRegionDumpType::hucRegionDumpUpdate));
3493
3494 for (auto i = 0; i < 16; i++) {
3495 if (virtualAddrParams.regionParams[i].presRegion)
3496 {
3497 m_debugInterface->DumpHucRegion(
3498 virtualAddrParams.regionParams[i].presRegion,
3499 virtualAddrParams.regionParams[i].dwOffset,
3500 hucRegionSize[i],
3501 i,
3502 hucRegionName[i],
3503 !virtualAddrParams.regionParams[i].isWritable,
3504 currPass,
3505 CodechalHucRegionDumpType::hucRegionDumpUpdate);
3506 }
3507 });
3508 // We increment by the average frame value once for each frame
3509 if (IsFirstPass())
3510 {
3511 m_curTargetFullness += m_inputBitsPerFrame;
3512 }
3513
3514 return eStatus;
3515 }
3516 #endif
3517
3518 MOS_COMMAND_BUFFER cmdBuffer;
3519 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3520
3521 if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit)) && !m_scalableMode)
3522 {
3523 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3524 // Send command buffer header at the beginning (OS dependent)
3525 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
3526 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3527
3528 m_firstTaskInPhase = false;
3529 }
3530
3531 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType));
3532
3533 // load kernel from WOPCM into L2 storage RAM
3534 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
3535 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
3536 imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcUpdateKernelDescriptor;
3537 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
3538
3539 // pipe mode select
3540 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
3541 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
3542
3543 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
3544
3545 // set HuC DMEM param
3546 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
3547 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
3548 dmemParams.presHucDataSource = &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx];
3549 dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcUpdateDmem), CODECHAL_CACHELINE_SIZE);
3550 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; // how to set?
3551 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
3552
3553 // Set surfaces to HuC regions
3554 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3555
3556 // History Buffer - IN/OUT
3557 virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
3558 virtualAddrParams.regionParams[0].isWritable = true;
3559 if (IsFirstPass()) // First BRC pass needs stats from last frame
3560 {
3561 if (m_lastFrameScalableMode) // Frame (n-1) Scalable mode stats output -> input for frame n, BRC Pass 0
3562 {
3563 // VDEnc Stats Buffer - IN
3564 virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3565 virtualAddrParams.regionParams[1].dwOffset = m_tileStatsOffset.vdencStats;
3566 // Frame (not PAK) Stats Buffer - IN
3567 virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3568 virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
3569 // PAK MMIO - IN
3570 virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
3571 }
3572 else
3573 {
3574 virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
3575 virtualAddrParams.regionParams[1].dwOffset = 0;
3576 virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
3577 virtualAddrParams.regionParams[2].dwOffset = 0;
3578 virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
3579 }
3580 }
3581 else // Second BRC Update Pass
3582 {
3583 if (m_scalableMode)
3584 {
3585 // VDEnc Stats Buffer - IN
3586 virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3587 virtualAddrParams.regionParams[1].dwOffset = m_tileStatsOffset.vdencStats;
3588 // Frame (not PAK) Stats Buffer - IN
3589 virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3590 virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
3591 // PAK MMIO - IN
3592 virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
3593 }
3594 else
3595 {
3596 virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
3597 virtualAddrParams.regionParams[1].dwOffset = 0;
3598 virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
3599 virtualAddrParams.regionParams[2].dwOffset = 0;
3600 virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
3601 }
3602 }
3603
3604 //For Dys + BRC Pass 0, use the resVdencDysPictureState2ndLevelBatchBuffer as input buffer
3605 virtualAddrParams.regionParams[3].presRegion = (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled) ? &m_resVdencDysPictureState2NdLevelBatchBuffer : &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
3606 // BRC Data - OUT
3607 virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
3608 virtualAddrParams.regionParams[4].isWritable = true;
3609
3610 // Const Data - IN
3611 virtualAddrParams.regionParams[5].presRegion = GetBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType);
3612
3613 // Output SLBB - OUT
3614 virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
3615 virtualAddrParams.regionParams[6].isWritable = true;
3616
3617 // Load HuC Regions into Cmd Buf
3618 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
3619
3620 // Store HUC_STATUS2 register bit 6 before HUC_Start command
3621 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
3622 // (HUC_Start command with last start bit set).
3623 CODECHAL_DEBUG_TOOL(
3624 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
3625 )
3626
3627 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
3628
3629 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
3630
3631 // wait Huc completion (use HEVC bit for now)
3632 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3633 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3634 vdPipeFlushParams.Flags.bFlushHEVC = 1;
3635 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3636 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
3637
3638 // Flush the engine to ensure memory written out
3639 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3640 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3641 flushDwParams.bVideoPipelineCacheInvalidate = true;
3642 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3643
3644 if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
3645 {
3646 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3647 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3648 }
3649
3650 ReturnCommandBuffer(&cmdBuffer);
3651
3652 if (!m_singleTaskPhaseSupported)
3653 {
3654 bool renderingFlags = m_videoContextUsesNullHw;
3655
3656 // Dump history input before HuC runs
3657 CODECHAL_DEBUG_TOOL(
3658 m_debugInterface->DumpHucRegion(
3659 virtualAddrParams.regionParams[0].presRegion,
3660 0,
3661 hucRegionSize[0],
3662 0,
3663 hucRegionName[0],
3664 true,
3665 currPass,
3666 CodechalHucRegionDumpType::hucRegionDumpUpdate);
3667 );
3668
3669 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
3670
3671 CODECHAL_DEBUG_TOOL(
3672 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3673 &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx],
3674 sizeof(HucBrcUpdateDmem), // Change buffer and size to update dmem
3675 currPass,
3676 CodechalHucRegionDumpType::hucRegionDumpUpdate));
3677
3678 for (auto i = 0; i < 16; i++) {
3679 if (virtualAddrParams.regionParams[i].presRegion)
3680 {
3681 m_debugInterface->DumpHucRegion(
3682 virtualAddrParams.regionParams[i].presRegion,
3683 virtualAddrParams.regionParams[i].dwOffset,
3684 hucRegionSize[i],
3685 i,
3686 hucRegionName[i],
3687 !virtualAddrParams.regionParams[i].isWritable,
3688 currPass,
3689 CodechalHucRegionDumpType::hucRegionDumpUpdate);
3690 }
3691 })
3692 }
3693
3694 // We increment by the average frame value once for each frame
3695 if (IsFirstPass())
3696 {
3697 m_curTargetFullness += m_inputBitsPerFrame;
3698 }
3699
3700 return eStatus;
3701 }
3702
3703 /*----------------------------------------------------------------------------
3704 | Name : HuCBrcInitReset
3705 | Purpose : Start/Submit VP9 HuC BrcInit kernel to HW
3706 |
3707 | Returns : MOS_STATUS
3708 \---------------------------------------------------------------------------*/
HuCBrcInitReset()3709 MOS_STATUS CodechalVdencVp9StateG11::HuCBrcInitReset()
3710 {
3711 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3712
3713 CODECHAL_ENCODE_FUNCTION_ENTER;
3714
3715 int currPass = GetCurrentPass();
3716
3717 CODECHAL_DEBUG_TOOL(
3718 uint32_t hucRegionSize[16];
3719 const char* hucRegionName[16];
3720
3721 hucRegionName[0] = "_BrcHistoryBuffer";
3722 hucRegionSize[0] = m_brcHistoryBufferSize;
3723 )
3724
3725 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
3726 #if (_DEBUG || _RELEASE_INTERNAL)
3727 if (m_swBrcMode)
3728 {
3729 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
3730 CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(false));
3731 // Set region params for dumping only
3732 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3733 virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
3734 virtualAddrParams.regionParams[0].isWritable = true;
3735 m_inputBitsPerFrame = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
3736 m_curTargetFullness = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
3737
3738 CODECHAL_DEBUG_TOOL(
3739 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3740 &m_resVdencBrcInitDmemBuffer,
3741 sizeof(HucBrcInitDmem),
3742 0,
3743 CodechalHucRegionDumpType::hucRegionDumpInit));
3744
3745 for (auto i = 0; i < 16; i++) {
3746 if (virtualAddrParams.regionParams[i].presRegion)
3747 {
3748 m_debugInterface->DumpHucRegion(
3749 virtualAddrParams.regionParams[i].presRegion,
3750 virtualAddrParams.regionParams[i].dwOffset,
3751 hucRegionSize[i],
3752 i,
3753 hucRegionName[i],
3754 !virtualAddrParams.regionParams[i].isWritable,
3755 currPass,
3756 CodechalHucRegionDumpType::hucRegionDumpInit);
3757 }
3758 })
3759 return eStatus;
3760 }
3761 #endif
3762 MOS_COMMAND_BUFFER cmdBuffer;
3763 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3764
3765 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
3766 {
3767 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3768 // Send command buffer header at the beginning (OS dependent)
3769 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
3770 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3771
3772 m_firstTaskInPhase = false;
3773 }
3774
3775 // load kernel from WOPCM into L2 storage RAM
3776 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
3777 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
3778 imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcInitKernelDescriptor;
3779 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
3780
3781 // pipe mode select
3782 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
3783 pipeModeSelectParams.Mode = m_mode;
3784 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
3785
3786 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
3787
3788 m_inputBitsPerFrame = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
3789 m_curTargetFullness = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
3790
3791 // set HuC DMEM param
3792 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
3793 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
3794 dmemParams.presHucDataSource = &m_resVdencBrcInitDmemBuffer;
3795 dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcInitDmem), CODECHAL_CACHELINE_SIZE);
3796 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
3797 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
3798
3799 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3800 virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
3801 virtualAddrParams.regionParams[0].isWritable = true;
3802 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
3803
3804 // Store HUC_STATUS2 register bit 6 before HUC_Start command
3805 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
3806 // (HUC_Start command with last start bit set).
3807 CODECHAL_DEBUG_TOOL(
3808 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
3809 )
3810
3811 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
3812
3813 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
3814
3815 // wait Huc completion (use HEVC bit for now)
3816 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3817 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3818 vdPipeFlushParams.Flags.bFlushHEVC = 1;
3819 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3820 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
3821
3822 // Flush the engine to ensure memory written out
3823 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3824 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3825 flushDwParams.bVideoPipelineCacheInvalidate = true;
3826 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3827
3828 if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
3829 {
3830 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3831 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3832 }
3833
3834 ReturnCommandBuffer(&cmdBuffer);
3835
3836 if (!m_singleTaskPhaseSupported)
3837 {
3838 bool renderingFlags = m_videoContextUsesNullHw;
3839
3840 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
3841
3842 CODECHAL_DEBUG_TOOL(
3843 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3844 &m_resVdencBrcInitDmemBuffer,
3845 sizeof(HucBrcInitDmem),
3846 0,
3847 CodechalHucRegionDumpType::hucRegionDumpInit));
3848
3849 for (auto i = 0; i < 16; i++) {
3850 if (virtualAddrParams.regionParams[i].presRegion)
3851 {
3852 m_debugInterface->DumpHucRegion(
3853 virtualAddrParams.regionParams[i].presRegion,
3854 virtualAddrParams.regionParams[i].dwOffset,
3855 hucRegionSize[i],
3856 i,
3857 hucRegionName[i],
3858 !virtualAddrParams.regionParams[i].isWritable,
3859 0,
3860 CodechalHucRegionDumpType::hucRegionDumpInit);
3861 }
3862 })
3863 }
3864
3865 return eStatus;
3866 }
3867
SetSequenceStructs()3868 MOS_STATUS CodechalVdencVp9StateG11::SetSequenceStructs()
3869 {
3870 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3871
3872 CODECHAL_ENCODE_FUNCTION_ENTER;
3873
3874 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetSequenceStructs());
3875
3876 // All pipe need to go through the picture-level and slice-level commands
3877 m_numPassesInOnePipe = m_numPasses;
3878 m_numPasses = (m_numPasses + 1) * m_numPipe - 1;
3879
3880 return eStatus;
3881 }
3882
SetPictureStructs()3883 MOS_STATUS CodechalVdencVp9StateG11::SetPictureStructs()
3884 {
3885 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3886
3887 CODECHAL_ENCODE_FUNCTION_ENTER;
3888
3889 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetPictureStructs());
3890
3891 m_virtualEngineBBIndex = m_currOriginalPic.FrameIdx;
3892
3893 if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
3894 {
3895 if (!m_hucEnabled)
3896 {
3897 m_numPassesInOnePipe = (m_dysRefFrameFlags != DYS_REF_NONE);
3898 }
3899 if (m_vdencBrcEnabled)
3900 {
3901 //Reduce per pipe passes by 1, as m_numPassesInOnePipe == 1 becomes m_numPassesInOnePipe = 0 for Huc to run
3902 m_dysBrc = true;
3903 m_numPassesInOnePipe = (m_numPassesInOnePipe > 0 ) ? m_numPassesInOnePipe - 1 : m_numPassesInOnePipe;
3904 }
3905 else
3906 {
3907 m_dysCqp = true;
3908 }
3909 m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
3910 }
3911 return eStatus;
3912 }
3913
ExecutePictureLevel()3914 MOS_STATUS CodechalVdencVp9StateG11::ExecutePictureLevel()
3915 {
3916 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3917
3918 CODECHAL_ENCODE_FUNCTION_ENTER;
3919
3920 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
3921
3922 PerfTagSetting perfTag;
3923 perfTag.Value = 0;
3924 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
3925 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
3926 perfTag.PictureCodingType = m_pictureCodingType;
3927 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
3928
3929 // Scalable Mode header
3930 if (m_scalableMode)
3931 {
3932 MOS_COMMAND_BUFFER cmdBuffer;
3933 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3934
3935 bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass();
3936 // In scalable mode, command buffer header is sent on last pipe only
3937 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3938 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
3939 }
3940
3941 if (IsFirstPass() && IsFirstPipe())
3942 {
3943 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPakInsertObjBatchBuf(&m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx]));
3944 }
3945
3946 // For VDENC dynamic scaling, here are the steps we need to process
3947 // Pass 0. VDENC + PAK Pass
3948 // a. If this is Dys + BRC case, then run BRC Pass 0
3949 // b. Ref frame scaling
3950 // c. VDENC + PAK pass to stream out PakObjCmd
3951 // Pass 1 -> Reset to Pass 0 so as to run HPU Pass 0
3952 // a. If this is Dys + BRC case, then run BRC Pass 1
3953 // b. Run HPU Pass 0
3954 // c. Lite Pass (Pak only multi pass enabled) to stream in
3955 // PakObjCmd from previous pass
3956 // Pass 1 -> Only run HPU Pass 1 to update the probabilities for
3957 // next frame. Repak is disabled for performance reasons
3958 if (m_dysRefFrameFlags != DYS_REF_NONE)
3959 {
3960 if (m_currPass == 0)
3961 {
3962 if (m_dysVdencMultiPassEnabled)
3963 {
3964 if (Mos_ResourceIsNull(&m_resVdencDysPictureState2NdLevelBatchBuffer))
3965 {
3966 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3967
3968 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3969 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3970 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3971 allocParamsForBufferLinear.Format = Format_Buffer;
3972 allocParamsForBufferLinear.dwBytes = m_vdencPicStateSecondLevelBatchBufferSize;
3973 allocParamsForBufferLinear.pBufName = "VDEnc DYS Picture Second Level Batch Buffer";
3974
3975 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
3976 m_osInterface,
3977 &allocParamsForBufferLinear,
3978 &m_resVdencDysPictureState2NdLevelBatchBuffer);
3979
3980 if (eStatus != MOS_STATUS_SUCCESS)
3981 {
3982 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate VDEnc DYS Picture Second Level Batch Buffer.");
3983 return eStatus;
3984 }
3985 }
3986
3987 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
3988 }
3989 }
3990 else if (m_currPass == 1)
3991 {
3992 m_hucEnabled = m_dysHucEnabled; // recover huc state
3993 m_vdencPakonlyMultipassEnabled = true;
3994 m_dysRefFrameFlags = DYS_REF_NONE;
3995 m_currPass = 0; // reset ucCurrPass = 0 to run the Huc
3996 m_lastTaskInPhase = false;
3997 }
3998 }
3999 else
4000 {
4001 if (IsFirstPass() && m_vdencBrcEnabled)
4002 {
4003 m_vdencPakObjCmdStreamOutEnabled = true;
4004 m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
4005 }
4006 else
4007 {
4008 m_vdencPakObjCmdStreamOutEnabled = false;
4009 }
4010 }
4011 if (m_isTilingSupported)
4012 {
4013 MOS_LOCK_PARAMS lockFlagsWriteOnly;
4014 uint8_t* tileStatsData = nullptr;
4015 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4016 lockFlagsWriteOnly.WriteOnly = 1;
4017 if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBBIndex].sResource))
4018 {
4019 // Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
4020 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4021 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4022 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4023 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4024 allocParamsForBufferLinear.Format = Format_Buffer;
4025 auto size = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
4026 allocParamsForBufferLinear.dwBytes = size;
4027 allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
4028
4029 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4030 m_osInterface,
4031 &allocParamsForBufferLinear,
4032 &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource));
4033 m_tileRecordBuffer[m_virtualEngineBBIndex].dwSize = size;
4034
4035 auto tileRecordData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
4036
4037 MOS_ZeroMemory(tileRecordData, allocParamsForBufferLinear.dwBytes);
4038 m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource);
4039 }
4040 }
4041 // Running in the multiple VDBOX mode, Allocate Required Buffers for Tile based operation. Do this only once per frame.
4042 if (m_isTilingSupported && m_scalableMode && m_hucEnabled && IsFirstPipe() && IsFirstPass())
4043 {
4044 // Max row is 4 by VP9 Spec
4045 uint32_t m_maxScalableModeRows = 4;
4046 uint32_t m_maxScalableModeTiles = m_numVdbox * m_maxScalableModeRows;
4047
4048 // Fill Pak integration kernel input tile stats structure
4049 MOS_ZeroMemory(&m_tileStatsOffset, sizeof(StatsInfo));
4050 // TileSizeRecord has to be 4k aligned
4051 m_tileStatsOffset.tileSizeRecord = 0; // TileReord is in a separated resource
4052 // VdencStats has to be 4k aligned
4053 m_tileStatsOffset.vdencStats = 0; // vdencStats is head of m_tileStatsPakIntegrationBuffer
4054 // VP9PAKStats has to be 64 byte aligned
4055 m_tileStatsOffset.pakStats = MOS_ALIGN_CEIL((m_tileStatsOffset.vdencStats + (m_maxScalableModeTiles * m_statsSize.vdencStats)), CODECHAL_PAGE_SIZE);
4056 // VP9CounterBuffer has to be 4k aligned
4057 m_tileStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_tileStatsOffset.pakStats + (m_maxScalableModeTiles * m_statsSize.pakStats)), CODECHAL_PAGE_SIZE);
4058
4059 MOS_LOCK_PARAMS lockFlagsWriteOnly;
4060 uint8_t* tileStatsData = nullptr;
4061 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4062 lockFlagsWriteOnly.WriteOnly = 1;
4063
4064 if (Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource))
4065 {
4066 // Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
4067 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4068 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4069 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4070 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4071 allocParamsForBufferLinear.Format = Format_Buffer;
4072 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL((m_tileStatsOffset.counterBuffer + (m_maxScalableModeTiles * m_statsSize.counterBuffer)), CODECHAL_PAGE_SIZE);
4073 allocParamsForBufferLinear.pBufName = "GEN11 Tile Level Statistics Buffer";
4074
4075 m_tileStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
4076
4077 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4078 m_osInterface,
4079 &allocParamsForBufferLinear,
4080 &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource));
4081 m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].dwSize = allocParamsForBufferLinear.dwBytes;
4082
4083 tileStatsData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
4084
4085 MOS_ZeroMemory(tileStatsData, allocParamsForBufferLinear.dwBytes);
4086 m_osInterface->pfnUnlockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource);
4087 }
4088 }
4089
4090 int currPass = GetCurrentPass();
4091 if ((m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled)
4092 {
4093 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencDysPictureState2NdLevelBatchBuffer));
4094 }
4095 else
4096 {
4097 if (IsFirstPipe())
4098 {
4099 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex]));
4100 }
4101 }
4102
4103 if (IsFirstPipe() && m_vdencBrcEnabled)
4104 {
4105 // Invoke BRC init/reset FW
4106 if (m_brcInit || m_brcReset)
4107 {
4108 if (!m_singleTaskPhaseSupported)
4109 {
4110 //Reset earlier set PAK perf tag
4111 m_osInterface->pfnResetPerfBufferID(m_osInterface);
4112 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET);
4113 }
4114 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
4115 m_brcInit = m_brcReset = false;
4116 }
4117 // For multipass and singlepass+RePAK we call BRC update for all passes except last pass (RePAK)
4118 // For single pass w/o RePAK (1 total pass) we call BRC update on one and only pass
4119 if (!IsLastPass() || (m_currPass == 0 && m_numPasses == 0))
4120 {
4121 bool origSingleTaskPhase = m_singleTaskPhaseSupported;
4122 bool origFrameTrackingHeader = false;
4123
4124 // If this is the case of Dynamic Scaling + BRC Pass 0' VDENC + Pak pass
4125 // Disable SingleTaskPhase before running 1st BRC update
4126 // To run HPU0 on the next pass i.e Pak only pass, we make Pass 1 as Pass 0 in which case the
4127 // BRC dmem buffer( resVdencBrcUpdateDmemBuffer[0] ) will get overridden if we do not submit BRC command now.
4128 if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
4129 {
4130 m_singleTaskPhaseSupported = false;
4131
4132 //Reset Frame Tracking Header for this submission
4133 MOS_COMMAND_BUFFER cmdBuffer;
4134 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
4135 origFrameTrackingHeader = cmdBuffer.Attributes.bEnableMediaFrameTracking;
4136 cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
4137 ReturnCommandBuffer(&cmdBuffer);
4138 }
4139
4140 if (!m_singleTaskPhaseSupported)
4141 {
4142 //Reset performance buffer used for BRC init
4143 m_osInterface->pfnResetPerfBufferID(m_osInterface);
4144 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
4145 }
4146 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
4147 //Restore the original state of SingleTaskPhaseSupported flag
4148 m_singleTaskPhaseSupported = origSingleTaskPhase;
4149
4150 //Restore Original Frame Tracking Header
4151 if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
4152 {
4153 MOS_COMMAND_BUFFER cmdBuffer;
4154 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
4155 cmdBuffer.Attributes.bEnableMediaFrameTracking = origFrameTrackingHeader;
4156 ReturnCommandBuffer(&cmdBuffer);
4157 }
4158 }
4159 }
4160
4161 // run HuC_VP9Prob first pass (it runs in parallel with ENC)
4162 if (m_hucEnabled)
4163 {
4164 if (IsFirstPipe() && (IsFirstPass() || IsLastPass() || (m_vdencBrcEnabled))) // Before the first PAK pass, for RePak pass and for BRC case, HuC_VP9Prob needs to be called on Pass 1 as well
4165 {
4166 if (!m_singleTaskPhaseSupported)
4167 {
4168 //Reset earlier set PAK perf tag
4169 m_osInterface->pfnResetPerfBufferID(m_osInterface);
4170 // Add Hpu tag here after updated
4171 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
4172 }
4173 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9Prob());
4174 if (!m_singleTaskPhaseSupported)
4175 {
4176 //reset performance buffer used for HPU update
4177 m_osInterface->pfnResetPerfBufferID(m_osInterface);
4178 }
4179 }
4180 }
4181 else
4182 {
4183 CODECHAL_ENCODE_CHK_STATUS_RETURN(RefreshFrameInternalBuffers());
4184 }
4185
4186 if (m_dysRefFrameFlags != DYS_REF_NONE && IsFirstPass())
4187 {
4188 // Turn off scalability and Tiling for Dynamic scaling pass 0 for reference scaling
4189 uint8_t logTileRows = m_vp9PicParams->log2_tile_rows;
4190 uint8_t logTileColumns = m_vp9PicParams->log2_tile_columns;
4191 bool scalableMode = m_scalableMode;
4192 uint8_t numPipe = m_numPipe;
4193 m_vp9PicParams->log2_tile_rows = 0;
4194 m_vp9PicParams->log2_tile_columns = 0;
4195 m_scalableMode = false;
4196 m_numPipe = 1;
4197 // Execute Reference scaling pass
4198 CODECHAL_ENCODE_CHK_STATUS_RETURN(DysRefFrames());
4199
4200 // Restore scalability and Tiling status for subsequent passes
4201 m_vp9PicParams->log2_tile_rows = logTileRows;
4202 m_vp9PicParams->log2_tile_columns = logTileColumns;
4203 m_scalableMode = scalableMode;
4204 m_numPipe = numPipe;
4205
4206 if (m_dysVdencMultiPassEnabled)
4207 {
4208 m_singleTaskPhaseSupported = true;
4209 m_firstTaskInPhase = true;
4210 m_vdencPakObjCmdStreamOutEnabled = true;
4211 m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
4212 }
4213 else
4214 {
4215 m_hucEnabled = m_dysHucEnabled; //recover huc state
4216 }
4217 }
4218
4219 // set HCP_SURFACE_STATE values
4220 MHW_VDBOX_SURFACE_PARAMS surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID + 1];
4221 for (uint8_t i = 0; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
4222 {
4223 MOS_ZeroMemory(&surfaceParams[i], sizeof(surfaceParams[i]));
4224 surfaceParams[i].Mode = m_mode;
4225 surfaceParams[i].ucSurfaceStateId = i;
4226 surfaceParams[i].ChromaType = m_outputChromaFormat;
4227 surfaceParams[i].bSrc8Pak10Mode = (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth) && (!m_vp9SeqParams->SeqFlags.fields.SourceBitDepth);
4228
4229 switch (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth)
4230 {
4231 case VP9_ENCODED_BIT_DEPTH_10: //10 bit encoding
4232 {
4233 surfaceParams[i].ucBitDepthChromaMinus8 = 2;
4234 surfaceParams[i].ucBitDepthLumaMinus8 = 2;
4235 break;
4236 }
4237 default:
4238 {
4239 surfaceParams[i].ucBitDepthChromaMinus8 = 0;
4240 surfaceParams[i].ucBitDepthLumaMinus8 = 0;
4241 break;
4242 }
4243 }
4244 }
4245
4246 // For PAK engine, we do NOT use scaled reference images even if dynamic scaling is enabled
4247 PMOS_SURFACE refSurface[3], refSurfaceNonScaled[3], dsRefSurface4x[3], dsRefSurface8x[3];
4248 for (auto i = 0; i < 3; i++)
4249 {
4250 refSurface[i] = refSurfaceNonScaled[i] = dsRefSurface4x[i] = dsRefSurface8x[i] = nullptr;
4251 }
4252 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetHcpSrcSurfaceParams(surfaceParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x));
4253
4254 MOS_COMMAND_BUFFER cmdBuffer;
4255 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
4256
4257 if (!m_singleTaskPhaseSupported)
4258 {
4259 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
4260 }
4261
4262 // Non scalable mode header
4263 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
4264 {
4265 // Send command buffer header at the beginning (OS dependent)
4266 // frame tracking tag is only added in the last command buffer header
4267 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
4268 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
4269 }
4270
4271 // Place hw semaphore on all other pipe to wait for first pipe HUC to finish. Apply for all passes after extend the Dmen HPU buffer size
4272 int currPipe = GetCurrentPipe();
4273 if (m_scalableMode)
4274 {
4275 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
4276
4277 //HW Semaphore cmd to make sure all pipes start encode at the same time
4278 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSync, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
4279 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
4280 &m_resPipeStartSync,
4281 &cmdBuffer,
4282 m_numPipe));
4283
4284 // Program some placeholder cmds to resolve the hazard between pipe sync
4285 MHW_MI_STORE_DATA_PARAMS dataParams;
4286 dataParams.pOsResource = &m_resDelayMinus;
4287 dataParams.dwResourceOffset = 0;
4288 dataParams.dwValue = 0xDE1A;
4289 for (uint32_t i = 0; i < m_numDelay; i++)
4290 {
4291 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
4292 &cmdBuffer,
4293 &dataParams));
4294 }
4295
4296 //clean HW semaphore memory
4297 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSync, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
4298
4299 //Start Watchdog Timer
4300 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
4301 }
4302
4303 // clean-up per VDBOX semaphore memory, only in the first BRC pass. Same semaphore is re-used across BRC passes for stitch command
4304 if (IsFirstPass())
4305 {
4306 if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[currPipe].sResource))
4307 {
4308 CODECHAL_ENCODE_CHK_STATUS_RETURN(
4309 SetSemaphoreMem(
4310 &m_stitchWaitSemaphoreMem[currPipe].sResource,
4311 &cmdBuffer,
4312 false));
4313 }
4314 }
4315
4316 // Repak conditional batch buffer end based on repak flag written by Huc to HUC_STATUS regster
4317 if (m_hucEnabled && (m_numPasses > 0) && IsLastPass())
4318 {
4319 // Insert conditional batch buffer end
4320 // Bit 30 has been added as a success condition, therefore this needs to be masked to only check 31 for RePAK
4321 // or else if HuC decides not to do RePAK for conditional RePAK yet terminates successfully RePAK will still happen.
4322 // Success = bit 30 set to 1, Do RePAK = bit 31 set to 1, value is always 0; if 0 < memory, continue
4323 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
4324 MOS_ZeroMemory(
4325 &miConditionalBatchBufferEndParams,
4326 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
4327
4328 miConditionalBatchBufferEndParams.presSemaphoreBuffer =
4329 &m_resHucPakMmioBuffer;
4330 // Make the DisableCompareMask 0, so that the HW will do AND operation on DW0 with Mask DW1, refer to HuCVp9Prob() for the settings
4331 // and compare the result against the Semaphore data which in our case dwValue = 0.
4332 // If result > dwValue then continue execution otherwise terminate the batch buffer
4333 miConditionalBatchBufferEndParams.bDisableCompareMask = false;
4334
4335 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
4336 &cmdBuffer,
4337 &miConditionalBatchBufferEndParams));
4338 }
4339
4340 if (IsFirstPipe())
4341 {
4342 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
4343 }
4344
4345 // set HCP_PIPE_BUF_ADDR_STATE values
4346 PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams = nullptr;
4347 pipeBufAddrParams = CreateHcpPipeBufAddrParams(pipeBufAddrParams);
4348 CODECHAL_ENCODE_CHK_NULL_RETURN(pipeBufAddrParams);
4349 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetHcpPipeBufAddrParams(*pipeBufAddrParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x));
4350 pipeBufAddrParams->pRawSurfParam = &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID];
4351 pipeBufAddrParams->pDecodedReconParam = &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID];
4352 #ifdef _MMC_SUPPORTED
4353 // In case of dynamic scaling refSurface is scaled for pass 0 and nonscaled for last pass
4354 // This ensures correct references are passed in for MMC
4355 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetPipeBufAddr(pipeBufAddrParams, refSurface, &cmdBuffer));
4356 #endif
4357
4358 // set HCP_PIPE_MODE_SELECT values
4359 PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams = nullptr;
4360 pipeModeSelectParams = m_vdencInterface->CreateMhwVdboxPipeModeSelectParams();
4361 CODECHAL_ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
4362 SetHcpPipeModeSelectParams(*pipeModeSelectParams);
4363
4364 auto delete_func = [&]()
4365 {
4366 if (pipeModeSelectParams)
4367 {
4368 MOS_Delete(pipeModeSelectParams);
4369 pipeModeSelectParams = nullptr;
4370 }
4371 };
4372
4373 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), delete_func);
4374
4375 // This wait cmd is needed to make sure copy is done as suggested by HW folk
4376 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, false), delete_func);
4377
4378 // Decoded picture
4379 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]), delete_func);
4380
4381 // Source input
4382 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]), delete_func);
4383
4384 // Last reference picture
4385 if (refSurface[0])
4386 {
4387 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]), delete_func);
4388 }
4389
4390 if (MEDIA_IS_WA(m_waTable, Wa_Vp9UnalignedHeight))
4391 {
4392 uint32_t real_height = m_oriFrameHeight;
4393 uint32_t aligned_height = MOS_ALIGN_CEIL(real_height, CODEC_VP9_MIN_BLOCK_HEIGHT);
4394
4395 fill_pad_with_value(m_rawSurfaceToPak, real_height, aligned_height);
4396 }
4397
4398 // Golden reference picture
4399 if (refSurface[1])
4400 {
4401 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]), delete_func);
4402 }
4403
4404 // Alt reference picture
4405 if (refSurface[2])
4406 {
4407 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]), delete_func);
4408 }
4409
4410 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams), delete_func);
4411
4412 // set HCP_IND_OBJ_BASE_ADDR_STATE values
4413 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
4414 SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
4415 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams), delete_func);
4416
4417 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), delete_func);
4418 if (pipeModeSelectParams)
4419 {
4420 MOS_Delete(pipeModeSelectParams);
4421 pipeModeSelectParams = nullptr;
4422 }
4423
4424 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
4425 if (m_pictureCodingType == I_TYPE)
4426 {
4427 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
4428 }
4429 else
4430 {
4431 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]));
4432 if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4433 {
4434 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]));
4435 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]));
4436 }
4437 }
4438
4439 MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2]; // 8x and 4x DS surfaces
4440 SetHcpDsSurfaceParams(&dsSurfaceParams[0]);
4441 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2));
4442
4443 if (pipeBufAddrParams)
4444 {
4445 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams));
4446 MOS_Delete(pipeBufAddrParams);
4447 pipeBufAddrParams = nullptr;
4448 }
4449
4450 MHW_BATCH_BUFFER secondLevelBatchBuffer;
4451 MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
4452 secondLevelBatchBuffer.dwOffset = 0;
4453 secondLevelBatchBuffer.bSecondLevel = true;
4454 if (m_hucEnabled)
4455 {
4456 secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferWrite[0];
4457 }
4458 else
4459 {
4460 if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
4461 {
4462 // For DyS + BRC case, we run BRC on Pass 0, so although we dont run HPU on Pass 0
4463 // (VDENC + PAK pass) we will still use the write buffer here
4464 if (m_dysBrc)
4465 {
4466 secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferWrite[0];
4467 }
4468 else //CQP case for Pass 0 , HPU has not run yet.. so use this buffer
4469 {
4470 secondLevelBatchBuffer.OsResource = m_resVdencDysPictureState2NdLevelBatchBuffer;
4471 }
4472 }
4473 else
4474 {
4475 secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
4476 }
4477 }
4478
4479 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
4480 &cmdBuffer,
4481 &secondLevelBatchBuffer));
4482
4483 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
4484
4485 return eStatus;
4486 }
4487
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams,PMOS_SURFACE * refSurface,PMOS_SURFACE * refSurfaceNonScaled,PMOS_SURFACE * dsRefSurface4x,PMOS_SURFACE * dsRefSurface8x)4488 MOS_STATUS CodechalVdencVp9StateG11::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams,
4489 PMOS_SURFACE* refSurface,
4490 PMOS_SURFACE* refSurfaceNonScaled,
4491 PMOS_SURFACE* dsRefSurface4x,
4492 PMOS_SURFACE* dsRefSurface8x)
4493 {
4494 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4495
4496 CODECHAL_ENCODE_FUNCTION_ENTER;
4497
4498 pipeBufAddrParams = {};
4499 pipeBufAddrParams.Mode = m_mode;
4500 pipeBufAddrParams.psPreDeblockSurface = &m_reconSurface;
4501 pipeBufAddrParams.psPostDeblockSurface = &m_reconSurface;
4502 pipeBufAddrParams.psRawSurface = m_rawSurfaceToPak;
4503
4504 pipeBufAddrParams.presMfdDeblockingFilterRowStoreScratchBuffer =
4505 &m_resDeblockingFilterLineBuffer;
4506
4507 pipeBufAddrParams.presDeblockingFilterTileRowStoreScratchBuffer =
4508 &m_resDeblockingFilterTileLineBuffer;
4509
4510 pipeBufAddrParams.presDeblockingFilterColumnRowStoreScratchBuffer =
4511 &m_resDeblockingFilterTileColumnBuffer;
4512
4513 pipeBufAddrParams.presMetadataLineBuffer = &m_resMetadataLineBuffer;
4514 pipeBufAddrParams.presMetadataTileLineBuffer = &m_resMetadataTileLineBuffer;
4515 pipeBufAddrParams.presMetadataTileColumnBuffer = &m_resMetadataTileColumnBuffer;
4516 pipeBufAddrParams.presCurMvTempBuffer = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex);
4517
4518 // Huc first pass doesn't write probabilities to output prob region but only updates to the input region. HuC run before repak writes to the ouput region.
4519 uint8_t frameCtxIdx = 0;
4520 if (m_hucEnabled && IsLastPass())
4521 {
4522 pipeBufAddrParams.presVp9ProbBuffer = &m_resHucProbOutputBuffer;
4523 }
4524 else
4525 {
4526 frameCtxIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
4527 CODECHAL_ENCODE_ASSERT(frameCtxIdx < CODEC_VP9_NUM_CONTEXTS);
4528 pipeBufAddrParams.presVp9ProbBuffer = &m_resProbBuffer[frameCtxIdx];
4529 }
4530
4531 pipeBufAddrParams.presVp9SegmentIdBuffer = &m_resSegmentIdBuffer;
4532 pipeBufAddrParams.presHvdTileRowStoreBuffer = &m_resHvcTileRowstoreBuffer;
4533 pipeBufAddrParams.ps4xDsSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
4534 pipeBufAddrParams.ps8xDsSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
4535 pipeBufAddrParams.presVdencIntraRowStoreScratchBuffer = &m_resVdencIntraRowStoreScratchBuffer;
4536 pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
4537 if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
4538 {
4539 pipeBufAddrParams.presVdencStreamOutBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
4540 pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_tileStatsOffset.vdencStats;
4541 }
4542 else
4543 {
4544 pipeBufAddrParams.presVdencStreamOutBuffer = &m_resVdencBrcStatsBuffer;
4545 pipeBufAddrParams.dwVdencStatsStreamOutOffset = 0;
4546 }
4547 pipeBufAddrParams.presStreamOutBuffer = nullptr;
4548 pipeBufAddrParams.presFrameStatStreamOutBuffer = &m_resFrameStatStreamOutBuffer;
4549 pipeBufAddrParams.presSseSrcPixelRowStoreBuffer = &m_resSseSrcPixelRowStoreBuffer;
4550 pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
4551 pipeBufAddrParams.presSegmentMapStreamOut = &m_resVdencSegmentMapStreamOut;
4552 pipeBufAddrParams.presPakCuLevelStreamoutBuffer =
4553 Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ? nullptr : &m_resPakcuLevelStreamoutData.sResource;
4554 if (m_dysRefFrameFlags != DYS_REF_NONE)
4555 {
4556 pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer =
4557 (m_vdencPakObjCmdStreamOutEnabled) ? m_resVdencPakObjCmdStreamOutBuffer : nullptr;
4558 }
4559 else
4560 {
4561 pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer = m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
4562 }
4563
4564 if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
4565 {
4566 PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex];
4567 bool useTileStatisticsBuffer = tileStatisticsBuffer && !Mos_ResourceIsNull(&tileStatisticsBuffer->sResource);
4568 // the new framestats streamout will now be the tile level stats buffer because each pak is spewing out tile level stats
4569 pipeBufAddrParams.presFrameStatStreamOutBuffer = useTileStatisticsBuffer ? &tileStatisticsBuffer->sResource : nullptr;
4570 pipeBufAddrParams.dwFrameStatStreamOutOffset = useTileStatisticsBuffer ? m_tileStatsOffset.pakStats : 0;
4571 //Main Frame Stats are integrated by PAK integration kernel
4572 }
4573 else
4574 {
4575 pipeBufAddrParams.presFrameStatStreamOutBuffer = &m_resFrameStatStreamOutBuffer;
4576 pipeBufAddrParams.dwFrameStatStreamOutOffset = 0;
4577 }
4578
4579 if (m_pictureCodingType != I_TYPE)
4580 {
4581 for (auto i = 0; i < 3; i++)
4582 {
4583 CODECHAL_ENCODE_CHK_NULL_RETURN(refSurface[i]);
4584 CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface4x[i]);
4585 CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface8x[i]);
4586
4587 pipeBufAddrParams.presReferences[i] = &refSurface[i]->OsResource;
4588 pipeBufAddrParams.presVdencReferences[i] = &refSurface[i]->OsResource;
4589 pipeBufAddrParams.presVdenc4xDsSurface[i] = &dsRefSurface4x[i]->OsResource;
4590 pipeBufAddrParams.presVdenc8xDsSurface[i] = &dsRefSurface8x[i]->OsResource;
4591
4592 if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4593 {
4594 pipeBufAddrParams.presReferences[i + 4] = &refSurfaceNonScaled[i]->OsResource;
4595 }
4596 }
4597
4598 pipeBufAddrParams.presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex ^ 0x01);
4599 }
4600
4601 return eStatus;
4602 }
4603
GetNumTilesInFrame()4604 uint16_t CodechalVdencVp9StateG11::GetNumTilesInFrame()
4605 {
4606 return ((1 << m_vp9PicParams->log2_tile_rows) * (1 << m_vp9PicParams->log2_tile_columns));
4607 }
4608
AllocateResources()4609 MOS_STATUS CodechalVdencVp9StateG11::AllocateResources()
4610 {
4611 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4612
4613 CODECHAL_ENCODE_FUNCTION_ENTER;
4614
4615 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::AllocateResources());
4616
4617 // create the tile coding state parameters
4618 CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams =
4619 (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)MOS_AllocAndZeroMemory(sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11) * m_maxTileNumber));
4620
4621 if (m_isTilingSupported)
4622 {
4623 uint32_t maxPicWidthInSb = MOS_ROUNDUP_DIVIDE(m_maxPicWidth, CODEC_VP9_SUPER_BLOCK_WIDTH);
4624 uint32_t maxPicHeightInSb = MOS_ROUNDUP_DIVIDE(m_maxPicHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT);
4625
4626 //PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
4627 uint32_t size = maxPicWidthInSb * maxPicHeightInSb * 64 * CODECHAL_CACHELINE_SIZE; // One CU has 16-byte, and there are 64 CU in one SB. But, each tile needs to be aliged to the cache line
4628 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4629 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4630 allocParamsForBufferLinear.dwBytes = size;
4631 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4632 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4633 allocParamsForBufferLinear.Format = Format_Buffer;
4634 allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
4635
4636 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4637 m_osInterface,
4638 &allocParamsForBufferLinear,
4639 &m_resPakcuLevelStreamoutData.sResource);
4640 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4641
4642 //PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command
4643 // one LCU has one cache line. Use CU as LCU during creation
4644 allocParamsForBufferLinear.dwBytes = size;
4645 allocParamsForBufferLinear.pBufName = "PAK Slice Level Streamout Data";
4646
4647 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4648 m_osInterface,
4649 &allocParamsForBufferLinear,
4650 &m_resPakSliceLevelStreamutData.sResource);
4651 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4652
4653 //HCP scalability Sync buffer
4654 size = CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
4655 allocParamsForBufferLinear.dwBytes = size;
4656 allocParamsForBufferLinear.pBufName = "Hcp scalability Sync buffer ";
4657
4658 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4659 m_osInterface,
4660 &allocParamsForBufferLinear,
4661 &m_hcpScalabilitySyncBuffer.sResource);
4662 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4663 m_hcpScalabilitySyncBuffer.dwSize = size;
4664
4665 // PAK integration related
4666 if (m_isTilingSupported && m_scalableMode && m_hucEnabled)
4667 {
4668 // HUC Pak Int DMEM buffer
4669 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
4670 allocParamsForBufferLinear.pBufName = "Huc Pak Int Dmem Buffer";
4671 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4672 {
4673 for (auto j = 0; j < m_brcMaxNumPasses; j++)
4674 {
4675 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4676 m_osInterface,
4677 &allocParamsForBufferLinear,
4678 &m_hucPakIntDmemBuffer[i][j]);
4679 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4680 }
4681 }
4682
4683 // HuC PAK Int region 7, 8
4684 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
4685 allocParamsForBufferLinear.pBufName = "HUC PAK Int Dummy Buffer";
4686
4687 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4688 m_osInterface,
4689 &allocParamsForBufferLinear,
4690 &m_hucPakIntDummyBuffer);
4691 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4692
4693 MOS_LOCK_PARAMS lockFlags;
4694 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4695 lockFlags.WriteOnly = 1;
4696 uint8_t* data = nullptr;
4697
4698 data = (uint8_t*)m_osInterface->pfnLockResource(
4699 m_osInterface,
4700 &m_hucPakIntDummyBuffer,
4701 &lockFlags);
4702
4703 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4704
4705 MOS_ZeroMemory(
4706 data,
4707 allocParamsForBufferLinear.dwBytes);
4708
4709 m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDummyBuffer);
4710
4711 // HuC PAK Int Region 1 programming related stats
4712 MOS_ZeroMemory(&m_frameStatsOffset, sizeof(StatsInfo));
4713 MOS_ZeroMemory(&m_statsSize, sizeof(StatsInfo));
4714
4715 //Sizes of each buffer to be loaded into the region 0 as input and 1 loaded out as output.
4716 m_statsSize.tileSizeRecord = m_hcpInterface->GetPakHWTileSizeRecordSize();
4717 m_statsSize.vdencStats = m_brcStatsBufSize;
4718 m_statsSize.pakStats = m_brcPakStatsBufSize;
4719 m_statsSize.counterBuffer = m_probabilityCounterBufferSize;
4720
4721 //Offsets for output of all integrated frame statistics (region 1) from PAK integration kernel
4722 m_frameStatsOffset.tileSizeRecord = 0;
4723 // Vdenc stats has to be 4K aligned
4724 m_frameStatsOffset.vdencStats = 0;
4725 // VP9 PAK stats/ BRC pak stats / Frame Stats have to be 4K aligned
4726 m_frameStatsOffset.pakStats = MOS_ALIGN_CEIL((m_frameStatsOffset.vdencStats + m_statsSize.vdencStats), CODECHAL_PAGE_SIZE);
4727 // VP9 CounterBuffer goes as input to HUC region so it has to be 4k aligned
4728 m_frameStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_frameStatsOffset.pakStats + m_statsSize.pakStats), CODECHAL_PAGE_SIZE);
4729
4730 // HuC PAK Int DMEM region 1 buffer allocation
4731 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_frameStatsOffset.counterBuffer + m_statsSize.counterBuffer, CODECHAL_PAGE_SIZE);
4732 allocParamsForBufferLinear.pBufName = "PAK HUC Integrated Frame Stats Buffer";
4733 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4734 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4735 allocParamsForBufferLinear.Format = Format_Buffer;
4736
4737 m_frameStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
4738
4739 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4740 m_osInterface,
4741 &allocParamsForBufferLinear,
4742 &m_frameStatsPakIntegrationBuffer.sResource));
4743 m_frameStatsPakIntegrationBuffer.dwSize = allocParamsForBufferLinear.dwBytes;
4744
4745 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4746 lockFlags.WriteOnly = 1;
4747 data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource, &lockFlags);
4748 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
4749 m_osInterface->pfnUnlockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource);
4750
4751 // Allocate region 9 of pak integration to be fed as input to HUC BRC region 7
4752 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4753 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4754 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4755 allocParamsForBufferLinear.Format = Format_Buffer;
4756 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
4757 allocParamsForBufferLinear.pBufName = "GEN11 PAK Integration FrameByteCount output";
4758 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4759 m_osInterface,
4760 &allocParamsForBufferLinear,
4761 &m_hucPakIntBrcDataBuffer));
4762
4763 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4764 lockFlags.WriteOnly = 1;
4765 data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_hucPakIntBrcDataBuffer, &lockFlags);
4766 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
4767 m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntBrcDataBuffer);
4768
4769 // Allocate Semaphore memory for VDEnc/PAK on all pipes to signal stitch command to stop waiting
4770 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4771 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4772 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4773 allocParamsForBufferLinear.Format = Format_Buffer;
4774 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4775 allocParamsForBufferLinear.pBufName = "GEN11 VDEnc PAK done Semaphore Memory";
4776
4777 for (auto i = 0; i < m_numPipe; i++)
4778 {
4779 uint32_t* data = nullptr;
4780
4781 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4782 m_osInterface,
4783 &allocParamsForBufferLinear,
4784 &m_stitchWaitSemaphoreMem[i].sResource));
4785
4786 m_stitchWaitSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
4787
4788 data = (uint32_t*)m_osInterface->pfnLockResource(
4789 m_osInterface,
4790 &m_stitchWaitSemaphoreMem[i].sResource,
4791 &lockFlags);
4792
4793 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4794
4795 *data = 1;
4796
4797 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
4798 m_osInterface,
4799 &m_stitchWaitSemaphoreMem[i].sResource));
4800 }
4801
4802 }
4803 uint32_t* data = nullptr;
4804 MOS_LOCK_PARAMS lockFlagsWriteOnly;
4805 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4806 lockFlagsWriteOnly.WriteOnly = 1;
4807
4808 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4809 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4810 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4811 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4812 allocParamsForBufferLinear.Format = Format_Buffer;
4813 allocParamsForBufferLinear.pBufName = "Pipe Start Sync memory";
4814
4815 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4816 m_osInterface,
4817 &allocParamsForBufferLinear,
4818 &m_resPipeStartSync));
4819
4820 data = (uint32_t *)m_osInterface->pfnLockResource(
4821 m_osInterface,
4822 &m_resPipeStartSync,
4823 &lockFlagsWriteOnly);
4824
4825 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4826
4827 MOS_ZeroMemory(data, sizeof(uint32_t));
4828
4829 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
4830 m_osInterface,
4831 &m_resPipeStartSync));
4832
4833 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4834 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4835 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4836 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4837 allocParamsForBufferLinear.Format = Format_Buffer;
4838 allocParamsForBufferLinear.pBufName = "Frame Start Sync memory";
4839
4840 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4841 m_osInterface,
4842 &allocParamsForBufferLinear,
4843 &m_resFrameStartSync));
4844
4845 data = (uint32_t *)m_osInterface->pfnLockResource(
4846 m_osInterface,
4847 &m_resFrameStartSync,
4848 &lockFlagsWriteOnly);
4849
4850 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4851
4852 MOS_ZeroMemory(data, sizeof(uint32_t));
4853
4854 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
4855 m_osInterface,
4856 &m_resFrameStartSync));
4857
4858 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4859 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4860 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4861 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4862 allocParamsForBufferLinear.Format = Format_Buffer;
4863 allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
4864
4865 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4866 m_osInterface,
4867 &allocParamsForBufferLinear,
4868 &m_resDelayMinus));
4869
4870 data = (uint32_t*)m_osInterface->pfnLockResource(
4871 m_osInterface,
4872 &m_resDelayMinus,
4873 &lockFlagsWriteOnly);
4874
4875 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4876
4877 MOS_ZeroMemory(data, sizeof(uint32_t));
4878
4879 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
4880 m_osInterface,
4881 &m_resDelayMinus));
4882 }
4883
4884 return eStatus;
4885 }
4886
FreeResources()4887 void CodechalVdencVp9StateG11::FreeResources()
4888 {
4889 CodechalVdencVp9State::FreeResources();
4890
4891 MOS_FreeMemory(m_tileParams);
4892 if (m_isTilingSupported)
4893 {
4894 if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource))
4895 {
4896 m_osInterface->pfnFreeResource(
4897 m_osInterface,
4898 &m_resPakcuLevelStreamoutData.sResource);
4899 }
4900
4901 if (!Mos_ResourceIsNull(&m_resPakSliceLevelStreamutData.sResource))
4902 {
4903 m_osInterface->pfnFreeResource(
4904 m_osInterface,
4905 &m_resPakSliceLevelStreamutData.sResource);
4906 }
4907
4908 // Release Hcp scalability Sync buffer
4909 if (!Mos_ResourceIsNull(&m_hcpScalabilitySyncBuffer.sResource))
4910 {
4911 m_osInterface->pfnFreeResource(
4912 m_osInterface,
4913 &m_hcpScalabilitySyncBuffer.sResource);
4914 }
4915
4916 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
4917 {
4918 if (!Mos_ResourceIsNull(&m_tileRecordBuffer[i].sResource))
4919 {
4920 m_osInterface->pfnFreeResource(
4921 m_osInterface,
4922 &m_tileRecordBuffer[i].sResource);
4923 }
4924 }
4925
4926 for (auto i = 0; i < m_numUncompressedSurface; i++)
4927 {
4928 for (auto j = 0; j < CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE; j++)
4929 {
4930 for (auto k = 0; k < 3; k++)
4931 {
4932 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
4933
4934 if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
4935 {
4936 if (cmdBuffer->pCmdBase)
4937 {
4938 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
4939 }
4940 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
4941 }
4942 }
4943 }
4944 }
4945 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4946 {
4947 for (auto j = 0; j < m_brcMaxNumPasses; j++)
4948 {
4949 if (!Mos_ResourceIsNull(&m_hucPakIntDmemBuffer[i][j]))
4950 {
4951 m_osInterface->pfnFreeResource(
4952 m_osInterface,
4953 &m_hucPakIntDmemBuffer[i][j]);
4954 }
4955 }
4956 }
4957
4958 if (!Mos_ResourceIsNull(&m_hucPakIntDummyBuffer))
4959 {
4960 m_osInterface->pfnFreeResource(
4961 m_osInterface,
4962 &m_hucPakIntDummyBuffer);
4963 }
4964
4965 if (!Mos_ResourceIsNull(&m_frameStatsPakIntegrationBuffer.sResource))
4966 {
4967 m_osInterface->pfnFreeResource(
4968 m_osInterface,
4969 &m_frameStatsPakIntegrationBuffer.sResource);
4970 }
4971
4972 if (!Mos_ResourceIsNull(&m_hucPakIntBrcDataBuffer))
4973 {
4974 m_osInterface->pfnFreeResource(
4975 m_osInterface,
4976 &m_hucPakIntBrcDataBuffer);
4977 }
4978
4979 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileStatsPakIntegrationBuffer); i++)
4980 {
4981 if (!Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[i].sResource))
4982 {
4983 m_osInterface->pfnFreeResource(
4984 m_osInterface,
4985 &m_tileStatsPakIntegrationBuffer[i].sResource);
4986 }
4987 }
4988
4989 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_stitchWaitSemaphoreMem); i++)
4990 {
4991 if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource))
4992 {
4993 m_osInterface->pfnFreeResource(
4994 m_osInterface,
4995 &m_stitchWaitSemaphoreMem[i].sResource);
4996 }
4997 }
4998
4999 if (!Mos_ResourceIsNull(&m_resPipeStartSync))
5000 {
5001 m_osInterface->pfnFreeResource(
5002 m_osInterface,
5003 &m_resPipeStartSync);
5004 }
5005
5006 if (!Mos_ResourceIsNull(&m_resFrameStartSync))
5007 {
5008 m_osInterface->pfnFreeResource(
5009 m_osInterface,
5010 &m_resFrameStartSync);
5011 }
5012
5013 if (!Mos_ResourceIsNull(&m_resDelayMinus))
5014 {
5015 m_osInterface->pfnFreeResource(
5016 m_osInterface,
5017 &m_resDelayMinus);
5018 }
5019 }
5020
5021 return;
5022 }
5023
SendMIAtomicCmd(PMOS_RESOURCE semaMem,uint32_t immData,MHW_COMMON_MI_ATOMIC_OPCODE opCode,PMOS_COMMAND_BUFFER cmdBuffer)5024 MOS_STATUS CodechalVdencVp9StateG11::SendMIAtomicCmd(
5025 PMOS_RESOURCE semaMem,
5026 uint32_t immData,
5027 MHW_COMMON_MI_ATOMIC_OPCODE opCode,
5028 PMOS_COMMAND_BUFFER cmdBuffer
5029 )
5030 {
5031 MHW_MI_ATOMIC_PARAMS atomicParams;
5032 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5033
5034 CODECHAL_ENCODE_FUNCTION_ENTER;
5035
5036 MOS_ZeroMemory((&atomicParams), sizeof(atomicParams));
5037 atomicParams.pOsResource = semaMem;
5038 atomicParams.dwDataSize = sizeof(uint32_t);
5039 atomicParams.Operation = opCode;
5040 atomicParams.bInlineData = true;
5041 atomicParams.dwOperand1Data[0] = immData;
5042 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(cmdBuffer, &atomicParams));
5043
5044 return eStatus;
5045 }
5046
Initialize(CodechalSetting * settings)5047 MOS_STATUS CodechalVdencVp9StateG11::Initialize(CodechalSetting * settings)
5048 {
5049 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5050 uint32_t maxRows = 1;
5051
5052 CODECHAL_ENCODE_FUNCTION_ENTER;
5053
5054 //Create and register huc Cmd Initializer
5055 m_hucCmdInitializer = MOS_New(CodechalCmdInitializerG11, this);
5056
5057 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::Initialize(settings));
5058
5059 GetSystemPipeNumberCommon();
5060
5061 if (MOS_VE_SUPPORTED(m_osInterface))
5062 {
5063 m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
5064 CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
5065 //scalability initialize
5066 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
5067 }
5068
5069 maxRows = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT;
5070 //Max num of rows = 4 by VP9 Spec
5071 maxRows = MOS_MIN(maxRows, 4);
5072 //Max tile numbers = max of number tiles for single pipe or max muber of tiles for scalable pipes
5073 m_maxTileNumber = MOS_MAX((MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH), m_numVdbox * maxRows);
5074
5075 m_dysVdencMultiPassEnabled = true;
5076
5077 m_numPipe = m_numVdbox;
5078
5079 m_scalableMode = (m_numPipe > 1);
5080
5081 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
5082
5083 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5084 MOS_STATUS eStatusKey = MOS_UserFeature_ReadValue_ID(
5085 nullptr,
5086 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH,
5087 &userFeatureData,
5088 m_osInterface->pOsContext);
5089 m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
5090
5091 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5092 userFeatureData.i32Data = 1;
5093 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
5094 MOS_UserFeature_ReadValue_ID(
5095 nullptr,
5096 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_HUC_ENABLE_ID,
5097 &userFeatureData,
5098 m_osInterface->pOsContext);
5099 m_hucEnabled = (userFeatureData.i32Data) ? true : false;
5100
5101 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5102 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
5103 userFeatureData.i32Data = 1;
5104 MOS_UserFeature_ReadValue_ID(
5105 nullptr,
5106 __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
5107 &userFeatureData,
5108 m_osInterface->pOsContext);
5109 m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
5110 m_singleTaskPhaseSupportedInPak = m_singleTaskPhaseSupported;
5111 // For dynamic scaling, the SingleTaskPhaseSupported is set to true and it does not get restored
5112 // to the original value after encoding of the frame. So need to restore to the original state
5113 m_storeSingleTaskPhaseSupported = m_singleTaskPhaseSupported; //Save the SingleTaskPhase state here
5114
5115 // Multi-Pass BRC: currently disabled by default, plan to enable by default
5116 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5117 MOS_UserFeature_ReadValue_ID(
5118 nullptr,
5119 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_MULTIPASS_BRC_ENABLE_ID,
5120 &userFeatureData,
5121 m_osInterface->pOsContext);
5122 m_multipassBrcSupported = (userFeatureData.i32Data) ? true : false;
5123 m_vdencBrcStatsBufferSize = m_brcStatsBufSize;
5124 m_vdencBrcPakStatsBufferSize = m_brcPakStatsBufSize;
5125 m_brcHistoryBufferSize = m_brcHistoryBufSize;
5126
5127 // HME enabled by default for VP9
5128 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5129 MOS_UserFeature_ReadValue_ID(
5130 NULL,
5131 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ME_ENABLE_ID,
5132 &userFeatureData,
5133 m_osInterface->pOsContext);
5134 m_hmeSupported = (userFeatureData.i32Data) ? true : false;
5135
5136 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5137 MOS_UserFeature_ReadValue_ID(
5138 NULL,
5139 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_16xME_ENABLE_ID,
5140 &userFeatureData,
5141 m_osInterface->pOsContext);
5142 m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
5143
5144 // disable superHME when HME is disabled
5145 if (m_hmeSupported == false)
5146 {
5147 m_16xMeSupported = false;
5148 }
5149
5150 // UHME disabled
5151 m_32xMeSupported = false;
5152 // VP9 uses a different streamin kernel
5153 m_useNonLegacyStreamin = true;
5154
5155 // Initialize kernel State
5156 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStates());
5157
5158 // Get max binding table count
5159 m_maxBtCount = GetMaxBtCount(); // Need to add the correct BTcount when HME is enabled
5160
5161 return eStatus;
5162 }
5163
5164 /*----------------------------------------------------------------------------
5165 | Name : GetSegmentBlockIndexInFrame
5166 | Purpose : Returns the offset of 32x32 block in the frame based on current x,y 32 block location in current tile
5167 |
5168 | Returns : MOS_STATUS
5169 \---------------------------------------------------------------------------*/
GetSegmentBlockIndexInFrame(uint32_t frameWidth,uint32_t curr32XInTile,uint32_t curr32YInTile,uint32_t currTileStartY64aligned,uint32_t currTileStartX64aligned)5170 uint32_t CodechalVdencVp9StateG11::GetSegmentBlockIndexInFrame(
5171 uint32_t frameWidth,
5172 uint32_t curr32XInTile,
5173 uint32_t curr32YInTile,
5174 uint32_t currTileStartY64aligned,
5175 uint32_t currTileStartX64aligned)
5176 {
5177 uint32_t frameWidthIn32 = MOS_ALIGN_CEIL(frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
5178 uint32_t curr32XInFrame = currTileStartX64aligned / 32 + curr32XInTile;
5179 uint32_t curr32YInFrame = currTileStartY64aligned / 32 + curr32YInTile;
5180 uint32_t curr32BlockInFrame = curr32YInFrame * frameWidthIn32 + curr32XInFrame;
5181 return curr32BlockInFrame;
5182 }
5183
5184 /*----------------------------------------------------------------------------
5185 | Name : InitZigZagToRasterLUTPerTile
5186 | Purpose : Rasterize a tile's 32 blocks' segmap indices, add to frame mapbuffer created for these indices
5187 |
5188 | Returns : MOS_STATUS
5189 \---------------------------------------------------------------------------*/
InitZigZagToRasterLUTPerTile(uint32_t tileHeight,uint32_t tileWidth,uint32_t currTileStartYInFrame,uint32_t currTileStartXInFrame)5190 MOS_STATUS CodechalVdencVp9StateG11::InitZigZagToRasterLUTPerTile(
5191 uint32_t tileHeight,
5192 uint32_t tileWidth,
5193 uint32_t currTileStartYInFrame,
5194 uint32_t currTileStartXInFrame)
5195 {
5196 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5197
5198 // Allocate space for zig-zag to raster LUT used for vdenc streamin (1 int32_t for every 32x32 block (pic 64 aligned))
5199 // We only do this when the 1st tile of new frame is being processed and keep it the same unless tile resolutions changed.
5200 // We keep this map around until sequence is finished, it's deleted at device destruction.
5201 if (currTileStartXInFrame == 0 && currTileStartYInFrame == 0)
5202 {
5203 if (m_mapBuffer) // free previous if it exists - it may exist if this isn't first seg streamin frame, but it's a new tile with different res
5204 {
5205 MOS_FreeMemory(m_mapBuffer);
5206 }
5207 // Allocate one integer space for each 32*32 block in the whole frame to hold the segmentation index.
5208 m_mapBuffer = (uint32_t*)MOS_AllocAndZeroMemory(
5209 (MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
5210 (MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
5211 sizeof(int32_t)); //Framewidth and height are 64 aligned already
5212 }
5213 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mapBuffer);
5214
5215 uint32_t align64Width32 = MOS_ALIGN_CEIL(tileWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
5216 uint32_t align64Height32 = MOS_ALIGN_CEIL(tileHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
5217 uint32_t* mapBufferZigZagPerTile = (uint32_t*)MOS_AllocAndZeroMemory(align64Width32*align64Height32 * sizeof(uint32_t));
5218 CODECHAL_ENCODE_CHK_NULL_RETURN(mapBufferZigZagPerTile);
5219
5220 m_segStreamInHeight = m_frameHeight;
5221 m_segStreamInWidth = m_frameWidth;
5222
5223 uint32_t count32 = 0; //Number of 32 by 32 blocks that will be processed here
5224 for (uint32_t curr32YInTile = 0; curr32YInTile< align64Height32; curr32YInTile++)
5225 {
5226 for (uint32_t curr32XInTile = 0; curr32XInTile < align64Width32; curr32XInTile++)
5227 {
5228 mapBufferZigZagPerTile[count32++] = GetSegmentBlockIndexInFrame(
5229 m_frameWidth,
5230 curr32XInTile,
5231 curr32YInTile,
5232 currTileStartYInFrame,
5233 currTileStartXInFrame);
5234 }
5235 }
5236
5237 // mapBufferZigZagPerTile ---> m_mapBuffer
5238 // | a b c d ... ---> | a b W X c d Y Z ....
5239 // | W X Y Z ...
5240 uint32_t num32blocks = align64Width32 * align64Height32;
5241 uint32_t tileOffsetIndex = m_32BlocksRasterized;
5242 for (uint32_t i = 0, dwRasterCount = 0; i < num32blocks; i += (align64Width32 * 2))
5243 {
5244 for (uint32_t j = i; j < i + (align64Width32 * 2); j += 4)
5245 {
5246 m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
5247 m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
5248 }
5249 for (uint32_t j = i + 2; j < i + (align64Width32 * 2); j += 4)
5250 {
5251 m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
5252 m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
5253 }
5254 }
5255 if (mapBufferZigZagPerTile) // free per tile map buffer as it has been rasterized and copied into the mapbuffer
5256 {
5257 MOS_FreeMemory(mapBufferZigZagPerTile);
5258 }
5259
5260 // ^ Zig-zag pattern filled to SB aligned (CEIL), if unaligned then we base seg ID address on previous row/column (data replication)
5261 uint32_t width32 = CODECHAL_GET_WIDTH_IN_BLOCKS(tileWidth, 32);
5262 if (width32 != align64Width32) // replicate last column
5263 {
5264 for (auto i = (align64Width32 * 2) - 1 - 2; i < num32blocks; i += (align64Width32 * 2))
5265 {
5266 m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 1];
5267 m_mapBuffer[i + tileOffsetIndex + 2] = m_mapBuffer[i + tileOffsetIndex + 1];
5268 }
5269 }
5270
5271 uint32_t height32 = CODECHAL_GET_HEIGHT_IN_BLOCKS(tileHeight, 32);
5272 if (height32 != align64Height32) // replicate last row
5273 {
5274 for (auto i = num32blocks - (align64Width32 * 2) + 2; i < num32blocks; i += 4)
5275 {
5276 m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 2];
5277 m_mapBuffer[i + tileOffsetIndex + 1] = m_mapBuffer[i + tileOffsetIndex + 1 - 2];
5278 }
5279 }
5280 //Index offset to be added to the buffer for the next tile depending on how many blocks were rasterized already in this tile
5281 m_32BlocksRasterized += count32;
5282
5283 return eStatus;
5284 }
5285
CalculateVdencPictureStateCommandSize()5286 MOS_STATUS CodechalVdencVp9StateG11::CalculateVdencPictureStateCommandSize()
5287 {
5288 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5289
5290 CODECHAL_ENCODE_FUNCTION_ENTER;
5291
5292 MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11 stateCmdSizeParams;
5293 uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
5294 stateCmdSizeParams.bHucDummyStream = true;
5295 m_hwInterface->GetHxxStateCommandSize(
5296 CODECHAL_ENCODE_MODE_VP9,
5297 &vdencPictureStatesSize,
5298 &vdencPicturePatchListSize,
5299 &stateCmdSizeParams);
5300
5301 m_defaultPictureStatesSize += vdencPictureStatesSize;
5302 m_defaultPicturePatchListSize += vdencPicturePatchListSize;
5303
5304 m_hwInterface->GetVdencStateCommandsDataSize(
5305 CODECHAL_ENCODE_MODE_VP9,
5306 &vdencPictureStatesSize,
5307 &vdencPicturePatchListSize);
5308
5309 m_defaultPictureStatesSize += vdencPictureStatesSize;
5310 m_defaultPicturePatchListSize += vdencPicturePatchListSize;
5311
5312 return eStatus;
5313 }
5314
CreateHcpPipeBufAddrParams(PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams)5315 PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS CodechalVdencVp9StateG11::CreateHcpPipeBufAddrParams(PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams)
5316 {
5317 pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G11);
5318
5319 return pipeBufAddrParams;
5320 }
5321
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)5322 MOS_STATUS CodechalVdencVp9StateG11::UpdateCmdBufAttribute(
5323 PMOS_COMMAND_BUFFER cmdBuffer,
5324 bool renderEngineInUse)
5325 {
5326 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5327
5328 // should not be there. Will remove it in the next change
5329 CODECHAL_ENCODE_FUNCTION_ENTER;
5330 if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
5331 {
5332 PMOS_CMD_BUF_ATTRI_VE attriExt =
5333 (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
5334
5335 memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
5336 attriExt->bUseVirtualEngineHint =
5337 attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
5338 }
5339
5340 return eStatus;
5341 }