1 /*
2 * Copyright (c) 2017-2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 
23 /*
24 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
25 *
26 *  Use of this source code is governed by a BSD-style license
27 *  that can be found in the LICENSE file in the root of the source
28 *  tree. An additional intellectual property rights grant can be found
29 *  in the file PATENTS.  All contributing project authors may
30 *  be found in the AUTHORS file in the root of the source tree.
31 */
32 
33 //!
34 //! \file     codechal_vdenc_vp9_g11.cpp
35 //! \brief    VP9 VDENC encoder for GEN11.
36 //!
37 #include "codechal_vdenc_vp9_g11.h"
38 #include "codechal_kernel_header_g11.h"
39 #include "codeckrnheader.h"
40 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
41 #include "igcodeckrn_g11.h"
42 #endif
43 #include "mhw_vdbox_hcp_g11_X.h"
44 #include "mhw_vdbox_vdenc_g11_X.h"
45 #include "mhw_vdbox_g11_X.h"
46 #include "mhw_vdbox_vdenc_hwcmd_g11_X.h"
47 #include "codechal_huc_cmd_initializer_g11.h"
48 
49 const uint32_t CodechalVdencVp9StateG11::meCurbeInit[48] =
50     {
51     0x00000000, 0x00200010, 0x00003939, 0x77a43000, 0x00000000, 0x28300000, 0x00000000, 0x00000000,
52     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
53     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
54     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
55     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
56     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
57 };
58 
UserFeatureKeyReport()59 MOS_STATUS CodechalVdencVp9StateG11::UserFeatureKeyReport()
60 {
61     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
62 
63     CODECHAL_ENCODE_FUNCTION_ENTER;
64 
65     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::UserFeatureKeyReport());
66 
67 #if (_DEBUG || _RELEASE_INTERNAL)
68     CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
69     CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH, m_enableTileStitchByHW, m_osInterface->pOsContext);
70     CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
71 #endif
72 
73     return eStatus;
74 }
75 
CodechalVdencVp9StateG11(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)76 CodechalVdencVp9StateG11::CodechalVdencVp9StateG11(
77     CodechalHwInterface* hwInterface,
78     CodechalDebugInterface* debugInterface,
79     PCODECHAL_STANDARD_INFO standardInfo)
80     :CodechalVdencVp9State(hwInterface, debugInterface, standardInfo)
81 {
82     m_useCommonKernel = true;
83     m_isTilingSupported      = true;
84 
85 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
86     m_kernelBase = (uint8_t *)IGCODECKRN_G11;
87 #endif
88 
89     // KUID for HME + DS + SW SCOREBOARD Kernel
90     m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
91 
92     // We need the DYS kernel inside AllVP9Enc_CNLA0, for SHME we need kernels inside
93     // HME_DS_SCOREBOARD_KERNEL, so we need to allocate enough size in ISH for both.
94     pfnGetKernelHeaderAndSize = GetCommonKernelHeaderAndSizeG11;
95 
96     uint8_t* binary = nullptr;
97     uint32_t combinedKernelSize = 0;
98     m_scalabilityState = nullptr;
99 
100     m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_VP9_NUM_SYNC_TAGS;
101     m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_ENCODE_VP9_INIT_DSH_SIZE;
102 
103     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
104     if (m_useCommonKernel)
105     {
106         m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
107         eStatus = CodecHalGetKernelBinaryAndSize(
108 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
109             (uint8_t*)IGCODECKRN_G11,
110 #else
111             nullptr,
112 #endif
113             m_kuidCommon,
114             &binary,
115             &combinedKernelSize);
116         CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
117 
118         m_hwInterface->GetStateHeapSettings()->dwIshSize +=
119             MOS_ALIGN_CEIL(combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
120     }
121 
122     // Initialize to 0
123     MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
124     MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
125     MOS_ZeroMemory(&m_hcpScalabilitySyncBuffer, sizeof(m_hcpScalabilitySyncBuffer));
126 
127     for (auto i = 0; i < m_numUncompressedSurface; i++)
128     {
129         MOS_ZeroMemory(&m_tileRecordBuffer[i].sResource, sizeof(m_tileRecordBuffer[i].sResource));
130     }
131     for (auto i = 0; i < m_numUncompressedSurface; i++)
132     {
133         MOS_ZeroMemory(&m_tileStatsPakIntegrationBuffer[i].sResource, sizeof(m_tileStatsPakIntegrationBuffer[i].sResource));
134     }
135     MOS_ZeroMemory(&m_frameStatsPakIntegrationBuffer.sResource, sizeof(m_frameStatsPakIntegrationBuffer.sResource));
136     for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
137     {
138         for (auto j = 0; j < m_brcMaxNumPasses; j++)
139         {
140             MOS_ZeroMemory(&m_hucPakIntDmemBuffer[i][j], sizeof(m_hucPakIntDmemBuffer[i][j]));
141         }
142     }
143     MOS_ZeroMemory(&m_hucPakIntDummyBuffer, sizeof(m_hucPakIntDummyBuffer));
144     MOS_ZeroMemory(&m_hucPakIntBrcDataBuffer, sizeof(m_hucPakIntBrcDataBuffer));
145     MOS_ZeroMemory(&m_resPipeStartSync, sizeof(m_resPipeStartSync));
146     MOS_ZeroMemory(&m_resDelayMinus, sizeof(m_resDelayMinus));
147     for (auto i = 0; i < m_maxNumPipes; i++)
148     {
149         MOS_ZeroMemory(&m_stitchWaitSemaphoreMem[i], sizeof(m_stitchWaitSemaphoreMem[i]));
150     }
151 
152     for (auto i = 0; i < 3; i++)
153     {
154         MOS_ZeroMemory(&m_refPicList0[i], sizeof(m_refPicList0[i]));
155     }
156     CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
157     m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
158     Mos_SetVirtualEngineSupported(m_osInterface, true);
159 }
160 
GetSystemPipeNumberCommon()161 MOS_STATUS CodechalVdencVp9StateG11::GetSystemPipeNumberCommon()
162 {
163     MOS_STATUS eStatus   = MOS_STATUS_SUCCESS;
164     MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
165 
166     CODECHAL_ENCODE_FUNCTION_ENTER;
167 
168     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
169     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
170     statusKey = MOS_UserFeature_ReadValue_ID(
171         NULL,
172         __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
173         &userFeatureData,
174         m_osInterface->pOsContext);
175 
176     //Disable scalability temporarily
177     bool disableScalability = true; // m_hwInterface->IsDisableScalability()
178     if (statusKey == MOS_STATUS_SUCCESS)
179     {
180         disableScalability = userFeatureData.i32Data ? true : false;
181     }
182 
183     MEDIA_SYSTEM_INFO *gtSystemInfo = m_gtSystemInfo;
184 
185     if (gtSystemInfo && disableScalability == false)
186     {
187         // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
188         m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
189     }
190     else
191     {
192         m_numVdbox = 1;
193     }
194 
195     return eStatus;
196 }
197 
ExecuteDysSliceLevel()198 MOS_STATUS CodechalVdencVp9StateG11::ExecuteDysSliceLevel()
199 {
200     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
201 
202     CODECHAL_ENCODE_FUNCTION_ENTER;
203 
204     CODECHAL_ENCODE_CHK_NULL_RETURN(m_nalUnitParams);
205 
206     MOS_COMMAND_BUFFER cmdBuffer;
207     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
208 
209     if (!m_singleTaskPhaseSupported)
210     {
211         PerfTagSetting perfTag;
212         CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
213     }
214 
215     MHW_BATCH_BUFFER secondLevelBatchBuffer;
216     MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
217     secondLevelBatchBuffer.dwOffset = 0;
218     secondLevelBatchBuffer.bSecondLevel = true;
219     if (!m_hucEnabled)
220     {
221         secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
222     }
223     else
224     {
225         secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
226     }
227     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
228         &cmdBuffer,
229         &secondLevelBatchBuffer));
230 
231     // Setup Tile level PAK commands
232     CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams);
233 
234     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9StateG11::SetTileData());
235     CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG11 *>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[0]));
236 
237     //Disbale Frame Tracking Header for this submission as this is not the last submission
238     bool isFrameTrackingHeaderSet = cmdBuffer.Attributes.bEnableMediaFrameTracking;
239     cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
240 
241     MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(MHW_BATCH_BUFFER));
242     secondLevelBatchBuffer.OsResource = m_resMbCodeSurface;
243     secondLevelBatchBuffer.dwOffset = 0;
244     secondLevelBatchBuffer.bSecondLevel = true;
245     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &secondLevelBatchBuffer));
246 
247     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
248     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
249     // MFXPipeDone should not be set for tail insertion
250     vdPipelineFlushParams.Flags.bWaitDoneMFX =
251         (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
252     vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
253     vdPipelineFlushParams.Flags.bFlushHEVC = 1;
254     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
255 
256     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
257 
258     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
259     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
260     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
261 
262     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
263 
264     if (!m_scalableMode) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
265     {
266         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
267     }
268 
269     if (m_currPass >= (m_numPasses - 1))    // Last pass and the one before last
270     {
271         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
272     }
273 
274     std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
275     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
276         &cmdBuffer,
277         CODECHAL_NUM_MEDIA_STATES,
278         currPassName.data())));
279 
280     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
281 
282     if (m_waitForEnc &&
283         !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
284     {
285         MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
286         syncParams.GpuContext = m_videoContext;
287         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
288 
289         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
290         m_waitForEnc = false;
291     }
292 
293     if (m_currPass >= (m_numPasses - 1))    // Last pass and the one before last
294     {
295         bool renderFlags;
296 
297         renderFlags = m_videoContextUsesNullHw;
298 
299         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
300     }
301 
302     //Restore the frame tracking header for the further passes and submissions
303     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
304     cmdBuffer.Attributes.bEnableMediaFrameTracking = isFrameTrackingHeaderSet;
305     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
306 
307     CODECHAL_DEBUG_TOOL(
308         if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
309             //CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
310             //m_debugInterface->DumpBuffer(
311             //    (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
312             //    CodechalDbgAttr::attrOutput,
313             //    "SegMap_Out",
314             //    CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
315             //    0,
316             //    CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
317         } if (m_mmcState) {
318             m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
319         });
320 
321     return eStatus;
322 }
323 
InitKernelStateMe()324 MOS_STATUS CodechalVdencVp9StateG11::InitKernelStateMe()
325 {
326     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
327 
328     CODECHAL_ENCODE_FUNCTION_ENTER;
329 
330 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
331     CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderEngineInterface->GetHwCaps());
332 
333     uint32_t combinedKernelSize = 0;
334     uint8_t *binary             = nullptr;
335     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
336         m_kernelBase,
337         m_kuidCommon,
338         &binary,
339         &combinedKernelSize));
340 
341     for (uint32_t krnStateIdx = 0; krnStateIdx < CodechalEncoderState::CODECHAL_ENCODE_ME_IDX_NUM; krnStateIdx++)
342     {
343         CODECHAL_KERNEL_HEADER currKrnHeader;
344         PMHW_KERNEL_STATE      kernelStatePtr = &m_meKernelStates[krnStateIdx];
345         uint32_t               kernelSize     = combinedKernelSize;
346         // For dual pipe HME-P kernel state is loaded for both ids
347         // Non legacy streamin is a new hevc vp9 streamin kernel
348         EncOperation encOperation = (krnStateIdx > 0 && m_vdencEnabled) ? (m_useNonLegacyStreamin ? VDENC_STREAMIN_HEVC : VDENC_ME) : ENC_ME;
349         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
350             binary,
351             encOperation,
352             (encOperation == ENC_ME) ? krnStateIdx : 0,
353             &currKrnHeader,
354             &kernelSize));
355 
356         kernelStatePtr->KernelParams.iBTCount     = CODECHAL_ENCODE_ME_NUM_SURFACES_G11;
357         kernelStatePtr->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
358         kernelStatePtr->KernelParams.iCurbeLength = sizeof(MeCurbe);
359         kernelStatePtr->KernelParams.iBlockWidth  = CODECHAL_MACROBLOCK_WIDTH;
360         kernelStatePtr->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
361         kernelStatePtr->KernelParams.iIdCount     = 1;
362 
363         kernelStatePtr->dwCurbeOffset        = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
364         kernelStatePtr->KernelParams.pBinary = binary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
365         kernelStatePtr->KernelParams.iSize   = kernelSize;
366         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
367             m_stateHeapInterface,
368             kernelStatePtr->KernelParams.iBTCount,
369             &kernelStatePtr->dwSshSize,
370             &kernelStatePtr->dwBindingTableSize));
371 
372         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
373 
374         if (m_noMeKernelForPFrame)
375         {
376             m_meKernelStates[1] = m_meKernelStates[0];
377             break;
378         }
379     }
380 
381     // Until a better way can be found, maintain old binding table structures
382     MeKernelBindingTable *bindingTable        = &m_meBindingTable;
383     bindingTable->dwMEMVDataSurface           = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G11;
384     bindingTable->dw16xMEMVDataSurface        = CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G11;
385     bindingTable->dw32xMEMVDataSurface        = CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G11;
386     bindingTable->dwMEDist                    = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G11;
387     bindingTable->dwMEBRCDist                 = CODECHAL_ENCODE_ME_BRC_DISTORTION_G11;
388     bindingTable->dwMECurrForFwdRef           = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G11;
389     bindingTable->dwMEFwdRefPicIdx[0]         = CODECHAL_ENCODE_ME_FWD_REF_IDX0_G11;
390     bindingTable->dwMEFwdRefPicIdx[1]         = CODECHAL_ENCODE_ME_FWD_REF_IDX1_G11;
391     bindingTable->dwMEFwdRefPicIdx[2]         = CODECHAL_ENCODE_ME_FWD_REF_IDX2_G11;
392     bindingTable->dwMEFwdRefPicIdx[3]         = CODECHAL_ENCODE_ME_FWD_REF_IDX3_G11;
393     bindingTable->dwMEFwdRefPicIdx[4]         = CODECHAL_ENCODE_ME_FWD_REF_IDX4_G11;
394     bindingTable->dwMEFwdRefPicIdx[5]         = CODECHAL_ENCODE_ME_FWD_REF_IDX5_G11;
395     bindingTable->dwMEFwdRefPicIdx[6]         = CODECHAL_ENCODE_ME_FWD_REF_IDX6_G11;
396     bindingTable->dwMEFwdRefPicIdx[7]         = CODECHAL_ENCODE_ME_FWD_REF_IDX7_G11;
397     bindingTable->dwMECurrForBwdRef           = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G11;
398     bindingTable->dwMEBwdRefPicIdx[0]         = CODECHAL_ENCODE_ME_BWD_REF_IDX0_G11;
399     bindingTable->dwMEBwdRefPicIdx[1]         = CODECHAL_ENCODE_ME_BWD_REF_IDX1_G11;
400     bindingTable->dwVdencStreamInSurface      = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G11;
401     bindingTable->dwVdencStreamInInputSurface = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G11;
402 #endif
403 
404     return eStatus;
405 }
406 
InitKernelStates()407 MOS_STATUS CodechalVdencVp9StateG11::InitKernelStates()
408 {
409     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
410 
411     CODECHAL_ENCODE_FUNCTION_ENTER;
412 
413 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
414     // DYS
415     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateDys());
416 
417     // G11 VDEnc SHME (16x) and 4x/streamin
418     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
419 #endif
420 
421     return eStatus;
422 }
423 
GetMaxBtCount()424 uint32_t CodechalVdencVp9StateG11::GetMaxBtCount()
425 {
426     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
427 
428     CODECHAL_ENCODE_FUNCTION_ENTER;
429     uint32_t maxBtCount = 0;
430 
431 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
432     if (m_hmeSupported)
433     {
434         uint32_t scalingBtCount = 0;
435         uint32_t numKernelsToLoad = m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
436         uint16_t btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
437         for(uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
438         {
439             scalingBtCount += MOS_ALIGN_CEIL(
440                 m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
441                 btIdxAlignment);
442         }
443         uint32_t meBtCount = 0;
444         // 4xME + Streamin kernel btcount
445         meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_VDENC].KernelParams.iBTCount, btIdxAlignment);
446 
447         //16xME streamin kernel count added to ME count and scaling kernel 16x added to scaling count
448         if (m_16xMeSupported)
449         {
450             meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_P].KernelParams.iBTCount, btIdxAlignment);
451             for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
452             {
453                 scalingBtCount += MOS_ALIGN_CEIL(
454                     m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
455                     btIdxAlignment);
456             }
457         }
458         maxBtCount = scalingBtCount + meBtCount;
459     }
460 #endif
461 
462     return maxBtCount;
463 }
464 
465 // DYS kernel state init
InitKernelStateDys()466 MOS_STATUS CodechalVdencVp9StateG11::InitKernelStateDys()
467 {
468     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
469 
470     CODECHAL_ENCODE_FUNCTION_ENTER;
471 
472 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
473     uint32_t combinedKernelSize = 0;
474     uint8_t* binary = nullptr;
475     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
476         (uint8_t*)IGCODECKRN_G11,
477         m_kuidCommon,
478         &binary,
479         &combinedKernelSize));
480 
481     uint32_t kernelSize = combinedKernelSize;
482     CODECHAL_KERNEL_HEADER currKrnHeader;
483     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
484         binary,
485         ENC_DYS,
486         0,
487         &currKrnHeader,
488         &kernelSize));
489 
490     PMHW_KERNEL_STATE kernelState = &m_dysKernelState;
491     kernelState->KernelParams.iBTCount = MOS_ALIGN_CEIL(m_dysNumSurfaces, m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment());
492     kernelState->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
493     kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL(m_dysStaticDataSize, m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
494     kernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
495     kernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
496     kernelState->KernelParams.iIdCount = 1;
497     kernelState->KernelParams.iSamplerCount = 1;
498     kernelState->KernelParams.iSamplerLength = m_stateHeapInterface->pStateHeapInterface->GetSizeofSamplerStateAvs();
499 
500     kernelState->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
501     kernelState->dwSamplerOffset = MOS_ALIGN_CEIL(kernelState->dwCurbeOffset + kernelState->KernelParams.iCurbeLength, MHW_SAMPLER_STATE_AVS_ALIGN_G9);
502     kernelState->KernelParams.pBinary =
503         binary +
504         (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
505     kernelState->KernelParams.iSize = kernelSize;
506     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
507         m_stateHeapInterface,
508         kernelState->KernelParams.iBTCount,
509         &kernelState->dwSshSize,
510         &kernelState->dwBindingTableSize));
511 
512     m_dysDshSize = kernelState->dwSamplerOffset +
513         MOS_ALIGN_CEIL(kernelState->KernelParams.iSamplerLength * kernelState->KernelParams.iSamplerCount, MHW_SAMPLER_STATE_AVS_ALIGN);
514 
515     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelState));
516 #endif
517 
518     return eStatus;
519 }
520 
SetupSegmentationStreamIn()521 MOS_STATUS CodechalVdencVp9StateG11::SetupSegmentationStreamIn()
522 {
523     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
524 
525     CODECHAL_ENCODE_FUNCTION_ENTER;
526 
527     if (!m_segmentMapProvided && !m_hmeEnabled) // If we're not going to use the streamin surface leave now
528     {
529         return eStatus;
530     }
531 
532     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
533     MOS_LOCK_PARAMS lockFlagsWriteOnly;
534     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
535     lockFlagsWriteOnly.WriteOnly = 1;
536 
537     MOS_LOCK_PARAMS lockFlagsReadOnly;
538     MOS_ZeroMemory(&lockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
539     lockFlagsReadOnly.ReadOnly = 1;
540 
541     mhw_vdbox_vdenc_g11_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *
542     streamIn = (mhw_vdbox_vdenc_g11_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *)m_osInterface->pfnLockResource(
543         m_osInterface,
544         &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
545         &lockFlagsWriteOnly);
546     CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
547 
548     // align to cache line size is OK since streamin state is padded to cacheline size - HW uses cacheline size to read, not command size
549     uint32_t blockWidth   = MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
550     uint32_t blockHeight  = MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
551     uint32_t streamInSize = blockHeight * blockWidth * CODECHAL_CACHELINE_SIZE;
552     MOS_ZeroMemory(streamIn, streamInSize);
553 
554     // If segment map isn't provided then we unlock surface and exit function here.
555     // Reason why check isn't done before function call is to take advantage of the fact that
556     // we need the surface locked here if seg map is provided and we want it 0'd either way.
557     // This saves us from doing 2 locks on this buffer per frame.
558     if (!m_segmentMapProvided)
559     {
560         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
561             m_osInterface,
562             &m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
563         return eStatus;
564     }
565 
566     char *data = (char *)m_osInterface->pfnLockResource(
567         m_osInterface,
568         &m_mbSegmentMapSurface.OsResource,
569         &lockFlagsReadOnly);
570     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
571 
572     // Rasterization is done within a tile and then for each tile within the frame in raster order.
573     uint32_t numTileColumns          = (1 << m_vp9PicParams->log2_tile_columns);
574     uint32_t numTileRows             = (1 << m_vp9PicParams->log2_tile_rows);
575     uint32_t numTiles = numTileColumns * numTileRows;
576     uint32_t currTileStartX64Aligned = 0, dwCurrTileStartY64Aligned = 0;         //Set tile Y coordinate 0
577     m_32BlocksRasterized = 0;   //Count of rasterized blocks for this frame
578     uint32_t tileX = 0;
579     uint32_t tileY = 0;
580     for (uint32_t tileIdx = 0; tileIdx < numTiles; tileIdx++)
581     {
582         tileX = tileIdx % numTileColumns; //Current tile column position
583         tileY = tileIdx / numTileColumns; //Current tile row position
584 
585         currTileStartX64Aligned   = ((tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns) * CODEC_VP9_SUPER_BLOCK_WIDTH;
586         dwCurrTileStartY64Aligned = ((tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows) * CODEC_VP9_SUPER_BLOCK_HEIGHT;
587 
588         uint32_t tileWidth64Aligned = (((tileX == (numTileColumns - 1)) ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) *
589                                           CODEC_VP9_SUPER_BLOCK_WIDTH) -
590                                       currTileStartX64Aligned;
591 
592         uint32_t tileHeight64Aligned = (((tileY == (numTileRows - 1)) ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) *
593                                            CODEC_VP9_SUPER_BLOCK_HEIGHT) -
594                                        dwCurrTileStartY64Aligned;
595 
596         // last tile col raw width and raw height not necessarily 64 aligned, use this length to duplicate values from segmap for empty padding blocks in last tiles.
597         uint32_t lastTileColWidth = (tileX == (numTileColumns - 1)) ? (m_frameWidth - currTileStartX64Aligned) : tileWidth64Aligned;
598         uint32_t lastTileRowHeight = (tileY == (numTileRows - 1)) ? (m_frameHeight - dwCurrTileStartY64Aligned) : tileHeight64Aligned;
599 
600         uint32_t tileWidth = (tileX == (numTileColumns - 1)) ? lastTileColWidth : tileWidth64Aligned;
601         uint32_t tileHeight = (tileY == (numTileRows - 1)) ? lastTileRowHeight : tileHeight64Aligned;
602 
603         // Recreate the mapbuffer and remap it if, for this frame, tile height and width have changed from previous tile
604         // which was processed from this frame or previous,
605         // or if map buffer is created for previous frame and tile map has changed from previous frame (numtilerows and cols)
606         if (!m_mapBuffer ||
607             tileHeight != m_segStreamInHeight ||
608             tileWidth != m_segStreamInWidth ||
609             numTileColumns != m_tileParams[tileIdx].NumOfTileColumnsInFrame ||
610             m_tileParams[tileIdx].NumOfTilesInFrame != numTiles)
611         {
612             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitZigZagToRasterLUTPerTile(tileHeight,
613                 tileWidth,
614                 dwCurrTileStartY64Aligned,
615                 currTileStartX64Aligned));
616         }
617         m_tileParams[tileIdx].NumOfTileColumnsInFrame = numTileColumns;
618         m_tileParams[tileIdx].NumOfTilesInFrame       = numTiles;
619     }
620 
621 
622     uint32_t dwPitch = m_mbSegmentMapSurface.dwPitch;
623     if (m_osInterface->pfnGetResType(&m_mbSegmentMapSurface.OsResource) == MOS_GFXRES_BUFFER)
624     {
625         //application can send 1D or 2D buffer, based on that change the pitch to correctly access the map buffer
626         //driver reads the seg ids from the buffer for each 16x16 block. Reads 4 values for each 32x32 block
627         dwPitch = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) / CODECHAL_MACROBLOCK_WIDTH;
628     }
629     // set seg ID's of streamin states
630     for (uint32_t i = 0 ; i < blockHeight * blockWidth ; ++i)
631     {
632         uint32_t addrOffset = CalculateBufferOffset(
633             m_mapBuffer[i],
634             m_frameWidth,
635             m_vp9PicParams->PicFlags.fields.seg_id_block_size,
636             dwPitch);
637         uint32_t segId  = *(data + addrOffset);
638         streamIn[i].DW7.SegidEnable = 1;
639         streamIn[i].DW7.Segid32X32016X1603Vp9Only = segId | (segId << 4) | (segId << 8) | (segId << 12);
640 
641         // TU functions copied from there.
642         streamIn[i].DW0.Maxtusize = 3;
643 
644         streamIn[i].DW0.Maxcusize = 3;
645         // For InterFrames we change the CUsize to 32x32 if we have sub 32 blocks with different segids in superblock
646         if ((i % 4) == 3 && m_pictureCodingType == P_TYPE)
647         {
648             if (!(streamIn[i - 3].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only &&
649                 streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only &&
650                 streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only == streamIn[i].DW7.Segid32X32016X1603Vp9Only))
651             {
652                 streamIn[i - 3].DW0.Maxcusize = streamIn[i - 2].DW0.Maxcusize = streamIn[i - 1].DW0.Maxcusize = streamIn[i].DW0.Maxcusize = 2;
653             }
654         }
655 
656         streamIn[i].DW0.Numimepredictors = CODECHAL_VDENC_NUMIMEPREDICTORS;
657 
658         switch (m_vp9SeqParams->TargetUsage)
659         {
660         case 1:     // Quality mode
661         case 4:     // Normal mode
662             streamIn[i].DW6.Nummergecandidatecu8X8 = 1;
663             streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
664             streamIn[i].DW6.Nummergecandidatecu32X32 = 3;
665             streamIn[i].DW6.Nummergecandidatecu64X64 = 4;
666             break;
667         case 7:     // Speed mode
668             streamIn[i].DW0.Numimepredictors = 4;
669             streamIn[i].DW6.Nummergecandidatecu8X8 = 0;
670             streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
671             streamIn[i].DW6.Nummergecandidatecu32X32 = 2;
672             streamIn[i].DW6.Nummergecandidatecu64X64 = 2;
673             break;
674         default:
675             MHW_ASSERTMESSAGE("Invalid TU provided!");
676             return MOS_STATUS_INVALID_PARAMETER;
677         }
678     }
679 
680     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
681         m_osInterface,
682         &m_mbSegmentMapSurface.OsResource));
683 
684     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
685         m_osInterface,
686         &m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
687 
688     return eStatus;
689 }
690 
SetMeSurfaceParams(MeSurfaceParams * meSurfaceParams)691 MOS_STATUS CodechalVdencVp9StateG11::SetMeSurfaceParams(MeSurfaceParams *meSurfaceParams)
692 {
693     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
694 
695     CODECHAL_ENCODE_FUNCTION_ENTER;
696 
697     CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
698 
699     meSurfaceParams->bMbaff = false;
700     meSurfaceParams->b4xMeDistortionBufferSupported = true;
701     meSurfaceParams->dwNumRefIdxL0ActiveMinus1      = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
702     meSurfaceParams->dwNumRefIdxL1ActiveMinus1 = 0;
703 
704     MOS_ZeroMemory(&m_refPicList0, sizeof(m_refPicList0));
705 
706     if (m_lastRefPic)
707     {
708         m_refPicList0[0].FrameIdx = m_vp9PicParams->RefFlags.fields.LastRefIdx;
709         m_refPicList0[0].PicFlags = PICTURE_FRAME;
710     }
711     if (m_goldenRefPic)
712     {
713         m_refPicList0[1].FrameIdx = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
714         m_refPicList0[1].PicFlags = PICTURE_FRAME;
715     }
716     if (m_altRefPic)
717     {
718         m_refPicList0[2].FrameIdx = m_vp9PicParams->RefFlags.fields.AltRefIdx;
719         m_refPicList0[2].PicFlags = PICTURE_FRAME;
720     }
721 
722     meSurfaceParams->pL0RefFrameList = &(m_refPicList0[0]);
723     meSurfaceParams->ppRefList                  = &m_refList[0];
724     meSurfaceParams->pPicIdx = &m_picIdx[0];
725     meSurfaceParams->pCurrOriginalPic = &m_currOriginalPic;
726     meSurfaceParams->ps4xMeMvDataBuffer = &m_4xMeMvDataBuffer;
727     meSurfaceParams->ps16xMeMvDataBuffer = &m_16xMeMvDataBuffer;
728     meSurfaceParams->psMeDistortionBuffer = &m_4xMeDistortionBuffer;
729     meSurfaceParams->dwVerticalLineStride = m_verticalLineStride;
730     meSurfaceParams->dwVerticalLineStrideOffset = m_verticalLineStrideOffset;
731     meSurfaceParams->b32xMeEnabled = m_32xMeSupported;
732     meSurfaceParams->b16xMeEnabled = m_16xMeEnabled;
733     meSurfaceParams->pMeBindingTable = &m_meBindingTable;
734     meSurfaceParams->bVdencStreamInEnabled = true;
735     meSurfaceParams->psMeVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
736     meSurfaceParams->dwVDEncStreamInSurfaceSize = MOS_BYTES_TO_DWORDS((MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
737                                                   (MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
738                                                   CODECHAL_CACHELINE_SIZE);
739     return eStatus;
740 }
741 
SetMeCurbeParams(MeCurbeParams * meParams)742 MOS_STATUS CodechalVdencVp9StateG11::SetMeCurbeParams(MeCurbeParams *meParams)
743 {
744     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
745 
746     CODECHAL_ENCODE_FUNCTION_ENTER;
747 
748     CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
749 
750     meParams->b16xMeEnabled = m_16xMeEnabled;
751     meParams->b32xMeEnabled = m_32xMeSupported;
752     meParams->TargetUsage = TU_QUALITY;
753     meParams->MaxMvLen = m_hmeMaxMvLength;
754     meParams->CurrOriginalPic.FrameIdx     = m_vp9PicParams->CurrOriginalPic.FrameIdx;
755     meParams->CurrOriginalPic.PicEntry     = m_vp9PicParams->CurrOriginalPic.PicEntry;
756     meParams->CurrOriginalPic.PicFlags     = m_vp9PicParams->CurrOriginalPic.PicFlags;
757     meParams->pic_init_qp_minus26          = m_vp9PicParams->LumaACQIndex - 26;
758     meParams->num_ref_idx_l0_active_minus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
759     meParams->num_ref_idx_l1_active_minus1 = 0;
760 
761     return eStatus;
762 }
763 
SendMeSurfaces(PMOS_COMMAND_BUFFER cmdBuffer,MeSurfaceParams * params)764 MOS_STATUS CodechalVdencVp9StateG11::SendMeSurfaces(
765     PMOS_COMMAND_BUFFER cmdBuffer,
766     MeSurfaceParams *   params)
767 {
768     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
769 
770     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
771     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
772     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
773     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pCurrOriginalPic);
774     CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps4xMeMvDataBuffer);
775     CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeDistortionBuffer);
776 
777     if (!params->bVdencStreamInEnabled)
778     {
779         CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeBrcDistortionBuffer);
780     }
781     else
782     {
783         CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeVdencStreamInBuffer);
784     }
785 
786     CODECHAL_MEDIA_STATE_TYPE encMediaStateType = (params->b32xMeInUse) ? CODECHAL_MEDIA_STATE_32X_ME : params->b16xMeInUse ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
787 
788     if (params->bVdencStreamInEnabled && encMediaStateType == CODECHAL_MEDIA_STATE_4X_ME)
789     {
790         encMediaStateType = CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
791     }
792 
793     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pMeBindingTable);
794     MeKernelBindingTable *meBindingTable = params->pMeBindingTable;
795 
796     bool    isFieldPicture = CodecHal_PictureIsField(*(params->pCurrOriginalPic)) ? 1 : 0;
797     bool    isBottomField  = CodecHal_PictureIsBottomField(*(params->pCurrOriginalPic)) ? 1 : 0;
798     uint8_t currVDirection = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME : ((isBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
799 
800     PMOS_SURFACE currScaledSurface = nullptr, meMvDataBuffer = nullptr;
801     uint32_t     meMvBottomFieldOffset = 0, currScaledBottomFieldOffset = 0;
802     if (params->b32xMeInUse)
803     {
804         CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps32xMeMvDataBuffer);
805         currScaledSurface           = m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
806         meMvDataBuffer              = params->ps32xMeMvDataBuffer;
807         meMvBottomFieldOffset       = params->dw32xMeMvBottomFieldOffset;
808         currScaledBottomFieldOffset = params->dw32xScaledBottomFieldOffset;
809     }
810     else if (params->b16xMeInUse)
811     {
812         CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps16xMeMvDataBuffer);
813         currScaledSurface           = m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
814         meMvDataBuffer              = params->ps16xMeMvDataBuffer;
815         meMvBottomFieldOffset       = params->dw16xMeMvBottomFieldOffset;
816         currScaledBottomFieldOffset = params->dw16xScaledBottomFieldOffset;
817     }
818     else
819     {
820         currScaledSurface           = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
821         meMvDataBuffer              = params->ps4xMeMvDataBuffer;
822         meMvBottomFieldOffset       = params->dw4xMeMvBottomFieldOffset;
823         currScaledBottomFieldOffset = params->dw4xScaledBottomFieldOffset;
824     }
825 
826     // Reference height and width information should be taken from the current scaled surface rather
827     // than from the reference scaled surface in the case of PAFF.
828 
829 
830     uint32_t width  = MOS_ALIGN_CEIL(params->dwDownscaledWidthInMb * 32, 64);
831     uint32_t height = params->dwDownscaledHeightInMb * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER;
832 
833     // Force the values
834     meMvDataBuffer->dwWidth  = width;
835     meMvDataBuffer->dwHeight = height;
836     meMvDataBuffer->dwPitch  = width;
837 
838     CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
839     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
840     surfaceParams.bIs2DSurface          = true;
841     surfaceParams.bMediaBlockRW         = true;
842     surfaceParams.psSurface             = meMvDataBuffer;
843     surfaceParams.dwOffset              = meMvBottomFieldOffset;
844     surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
845     surfaceParams.dwBindingTableOffset  = meBindingTable->dwMEMVDataSurface;
846     surfaceParams.bIsWritable           = true;
847     surfaceParams.bRenderTarget         = true;
848     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
849         m_hwInterface,
850         cmdBuffer,
851         &surfaceParams,
852         params->pKernelState));
853 
854     if (params->b16xMeInUse && params->b32xMeEnabled)
855     {
856         // Pass 32x MV to 16x ME operation
857         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
858         surfaceParams.bIs2DSurface  = true;
859         surfaceParams.bMediaBlockRW = true;
860         surfaceParams.psSurface     = params->ps32xMeMvDataBuffer;
861         surfaceParams.dwOffset =
862             isBottomField ? params->dw32xMeMvBottomFieldOffset : 0;
863         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
864         surfaceParams.dwBindingTableOffset  = meBindingTable->dw32xMEMVDataSurface;
865         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
866             m_hwInterface,
867             cmdBuffer,
868             &surfaceParams,
869             params->pKernelState));
870     }
871     else if (!params->b32xMeInUse && params->b16xMeEnabled)
872     {
873         // Pass 16x MV to 4x ME operation
874         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
875         surfaceParams.bIs2DSurface  = true;
876         surfaceParams.bMediaBlockRW = true;
877         surfaceParams.psSurface     = params->ps16xMeMvDataBuffer;
878         surfaceParams.dwOffset =
879             isBottomField ? params->dw16xMeMvBottomFieldOffset : 0;
880         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
881         surfaceParams.dwBindingTableOffset  = meBindingTable->dw16xMEMVDataSurface;
882         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
883             m_hwInterface,
884             cmdBuffer,
885             &surfaceParams,
886             params->pKernelState));
887     }
888 
889     // Insert Distortion buffers only for 4xMe case
890     if (!params->b32xMeInUse && !params->b16xMeInUse)
891     {
892         if (!params->bVdencStreamInEnabled)
893         {
894             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
895             surfaceParams.bIs2DSurface          = true;
896             surfaceParams.bMediaBlockRW         = true;
897             surfaceParams.psSurface             = params->psMeBrcDistortionBuffer;
898             surfaceParams.dwOffset              = params->dwMeBrcDistortionBottomFieldOffset;
899             surfaceParams.dwBindingTableOffset  = meBindingTable->dwMEBRCDist;
900             surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
901             surfaceParams.bIsWritable           = true;
902             surfaceParams.bRenderTarget         = true;
903             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
904                 m_hwInterface,
905                 cmdBuffer,
906                 &surfaceParams,
907                 params->pKernelState));
908         }
909 
910         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
911         surfaceParams.bIs2DSurface          = true;
912         surfaceParams.bMediaBlockRW         = true;
913         surfaceParams.psSurface             = params->psMeDistortionBuffer;
914         surfaceParams.dwOffset              = params->dwMeDistortionBottomFieldOffset;
915         surfaceParams.dwBindingTableOffset  = meBindingTable->dwMEDist;
916         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
917         surfaceParams.bIsWritable           = true;
918         surfaceParams.bRenderTarget         = true;
919         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
920             m_hwInterface,
921             cmdBuffer,
922             &surfaceParams,
923             params->pKernelState));
924     }
925 
926     // Setup references 1...n
927     // LIST 0 references (not optional)
928     CODEC_PICTURE refPic;
929     bool          isRefFieldPicture = false, isRefBottomField = false;
930     uint8_t       refPicIdx = 0;
931     if (params->pL0RefFrameList)
932     {
933         for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL0ActiveMinus1; refIdx++)
934         {
935             refPic = params->pL0RefFrameList[refIdx];
936 
937             if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
938             {
939                 if (refIdx == 0)
940                 {
941                     // Current Picture Y - VME
942                     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
943                     surfaceParams.bUseAdvState          = true;
944                     surfaceParams.psSurface             = currScaledSurface;
945                     surfaceParams.dwOffset              = isBottomField ? currScaledBottomFieldOffset : 0;
946                     surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
947                     surfaceParams.dwBindingTableOffset  = meBindingTable->dwMECurrForFwdRef;
948                     surfaceParams.ucVDirection          = currVDirection;
949                     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
950                         m_hwInterface,
951                         cmdBuffer,
952                         &surfaceParams,
953                         params->pKernelState));
954                 }
955 
956                 isRefFieldPicture                   = CodecHal_PictureIsField(refPic) ? 1 : 0;
957                 isRefBottomField                    = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
958                 refPicIdx                           = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
959                 uint8_t  scaledIdx                  = params->ppRefList[refPicIdx]->ucScalingIdx;
960                 uint32_t refScaledBottomFieldOffset = 0;
961                 MOS_SURFACE *refScaledSurface;
962                 if (params->b32xMeInUse)
963                 {
964                     refScaledSurface                = m_trackedBuf->Get32xDsSurface(scaledIdx);
965                 }
966                 else if (params->b16xMeInUse)
967                 {
968                     refScaledSurface                = m_trackedBuf->Get16xDsSurface(scaledIdx);
969                 }
970                 else
971                 {
972                     refScaledSurface                = m_trackedBuf->Get4xDsSurface(scaledIdx);
973                 }
974                 refScaledBottomFieldOffset          = isRefBottomField ? currScaledBottomFieldOffset : 0;
975 
976 
977                 // L0 Reference Picture Y - VME
978                 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
979                 surfaceParams.bUseAdvState          = true;
980                 surfaceParams.psSurface             = refScaledSurface;
981                 surfaceParams.dwOffset              = isRefBottomField ? refScaledBottomFieldOffset : 0;
982                 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
983                 surfaceParams.dwBindingTableOffset  = meBindingTable->dwMEFwdRefPicIdx[refIdx];
984                 surfaceParams.ucVDirection          = !isFieldPicture ? CODECHAL_VDIRECTION_FRAME : ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
985                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
986                     m_hwInterface,
987                     cmdBuffer,
988                     &surfaceParams,
989                     params->pKernelState));
990             }
991         }
992     }
993     else
994     {
995         return MOS_STATUS_NULL_POINTER;
996     }
997 
998     // Setup references 1...n
999     // LIST 1 references (optional)
1000     if (params->pL1RefFrameList)
1001     {
1002         for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL1ActiveMinus1; refIdx++)
1003         {
1004             refPic = params->pL1RefFrameList[refIdx];
1005 
1006             if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
1007             {
1008                 if (refIdx == 0)
1009                 {
1010                     // Current Picture Y - VME
1011                     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1012                     surfaceParams.bUseAdvState          = true;
1013                     surfaceParams.psSurface             = currScaledSurface;
1014                     surfaceParams.dwOffset              = isBottomField ? currScaledBottomFieldOffset : 0;
1015                     surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1016                     surfaceParams.dwBindingTableOffset  = meBindingTable->dwMECurrForBwdRef;
1017                     surfaceParams.ucVDirection          = currVDirection;
1018                     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1019                         m_hwInterface,
1020                         cmdBuffer,
1021                         &surfaceParams,
1022                         params->pKernelState));
1023                 }
1024 
1025                 isRefFieldPicture                   = CodecHal_PictureIsField(refPic) ? 1 : 0;
1026                 isRefBottomField                    = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
1027                 refPicIdx                           = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
1028                 uint8_t  scaledIdx                  = params->ppRefList[refPicIdx]->ucScalingIdx;
1029                 uint32_t refScaledBottomFieldOffset = 0;
1030                 MOS_SURFACE *refScaledSurface;
1031                 if (params->b32xMeInUse)
1032                 {
1033                     refScaledSurface                = m_trackedBuf->Get32xDsSurface(scaledIdx);
1034                 }
1035                 else if (params->b16xMeInUse)
1036                 {
1037                     refScaledSurface                = m_trackedBuf->Get16xDsSurface(scaledIdx);
1038                 }
1039                 else
1040                 {
1041                     refScaledSurface                = m_trackedBuf->Get4xDsSurface(scaledIdx);
1042                 }
1043                 refScaledBottomFieldOffset          = isRefBottomField ? currScaledBottomFieldOffset : 0;
1044 
1045 
1046                 // L1 Reference Picture Y - VME
1047                 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1048                 surfaceParams.bUseAdvState          = true;
1049                 surfaceParams.psSurface             = refScaledSurface;
1050                 surfaceParams.dwOffset              = isRefBottomField ? refScaledBottomFieldOffset : 0;
1051                 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1052                 surfaceParams.dwBindingTableOffset  = meBindingTable->dwMEBwdRefPicIdx[refIdx];
1053                 surfaceParams.ucVDirection          = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME : ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1054                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1055                     m_hwInterface,
1056                     cmdBuffer,
1057                     &surfaceParams,
1058                     params->pKernelState));
1059             }
1060         }
1061     }
1062     if (encMediaStateType == CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN)
1063     {
1064         // Output buffer
1065         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1066         surfaceParams.dwSize                = params->dwVDEncStreamInSurfaceSize;
1067         surfaceParams.bIs2DSurface          = false;
1068         surfaceParams.presBuffer            = params->psMeVdencStreamInBuffer;
1069         surfaceParams.dwBindingTableOffset  = meBindingTable->dwVdencStreamInSurface;
1070         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1071         surfaceParams.bIsWritable           = true;
1072         surfaceParams.bRenderTarget         = true;
1073         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1074             m_hwInterface,
1075             cmdBuffer,
1076             &surfaceParams,
1077             params->pKernelState));
1078 
1079         // Input buffer (for AVC case we only read the surface and update data)
1080         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1081         surfaceParams.dwSize                = params->dwVDEncStreamInSurfaceSize;
1082         surfaceParams.bIs2DSurface          = false;
1083         surfaceParams.presBuffer            = params->psMeVdencStreamInBuffer;
1084         surfaceParams.dwBindingTableOffset  = meBindingTable->dwVdencStreamInInputSurface;
1085         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1086         surfaceParams.bIsWritable           = true;
1087         surfaceParams.bRenderTarget         = true;
1088         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1089             m_hwInterface,
1090             cmdBuffer,
1091             &surfaceParams,
1092             params->pKernelState));
1093     }
1094 
1095     return eStatus;
1096 }
1097 
1098 
1099 //------------------------------------------------------------------------------
1100 //| Purpose:    Setup curbe for common ME kernels
1101 //| Return:     N/A
1102 //------------------------------------------------------------------------------
SetCurbeMe(MeCurbeParams * params)1103 MOS_STATUS CodechalVdencVp9StateG11::SetCurbeMe(
1104     MeCurbeParams *params)
1105 {
1106     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1107 
1108     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1109     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
1110 
1111     CODECHAL_ENCODE_ASSERT(params->TargetUsage <= NUM_TARGET_USAGE_MODES);
1112 
1113     uint32_t scaleFactor       = 0;
1114     bool     useMvFromPrevStep = false, writeDistortions = false;
1115     uint8_t  mvShiftFactor = 0, prevMvReadPosFactor = 0;
1116     switch (params->hmeLvl)
1117     {
1118     case HME_LEVEL_32x:
1119         useMvFromPrevStep = CODECHAL_ENCODE_HME_FIRST_STEP_G11;
1120         writeDistortions  = false;
1121         scaleFactor       = SCALE_FACTOR_32x;
1122         mvShiftFactor     = CODECHAL_ENCODE_MV_SHIFT_FACTOR_32x_G11;
1123         break;
1124     case HME_LEVEL_16x:
1125         useMvFromPrevStep   = (params->b32xMeEnabled) ? CODECHAL_ENCODE_HME_FOLLOWING_STEP_G11 : CODECHAL_ENCODE_HME_FIRST_STEP_G11;
1126         writeDistortions    = false;
1127         scaleFactor         = SCALE_FACTOR_16x;
1128         mvShiftFactor       = CODECHAL_ENCODE_MV_SHIFT_FACTOR_16x_G11;
1129         prevMvReadPosFactor = CODECHAL_ENCODE_PREV_MV_READ_POSITION_16x_G11;
1130         break;
1131     case HME_LEVEL_4x:
1132         useMvFromPrevStep   = (params->b16xMeEnabled) ? CODECHAL_ENCODE_HME_FOLLOWING_STEP_G11 : CODECHAL_ENCODE_HME_FIRST_STEP_G11;
1133         writeDistortions    = true;
1134         scaleFactor         = SCALE_FACTOR_4x;
1135         mvShiftFactor       = CODECHAL_ENCODE_MV_SHIFT_FACTOR_4x_G11;
1136         prevMvReadPosFactor = CODECHAL_ENCODE_PREV_MV_READ_POSITION_4x_G11;
1137         break;
1138     default:
1139         return MOS_STATUS_INVALID_PARAMETER;
1140     }
1141 
1142     MeCurbe cmd;
1143     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1144         &cmd,
1145         sizeof(MeCurbe),
1146         meCurbeInit,
1147         sizeof(MeCurbe)));
1148 
1149     cmd.DW3.SubPelMode = 3;
1150     if (m_fieldScalingOutputInterleaved)
1151     {
1152         cmd.DW3.SrcAccess =
1153             cmd.DW3.RefAccess    = CodecHal_PictureIsField(params->CurrOriginalPic) ? 1 : 0;
1154         cmd.DW7.SrcFieldPolarity = CodecHal_PictureIsBottomField(params->CurrOriginalPic) ? 1 : 0;
1155     }
1156 
1157     bool framePicture = CodecHal_PictureIsFrame(params->CurrOriginalPic);
1158     char qpPrimeY     = (params->pic_init_qp_minus26 + 26) + params->slice_qp_delta;
1159 
1160     cmd.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
1161     cmd.DW4.PictureWidth        = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
1162     cmd.DW5.QpPrimeY            = qpPrimeY;
1163     cmd.DW6.WriteDistortions    = writeDistortions;
1164     cmd.DW6.UseMvFromPrevStep   = useMvFromPrevStep;
1165 
1166     cmd.DW6.SuperCombineDist = m_superCombineDistGeneric[params->TargetUsage];
1167     cmd.DW6.MaxVmvR          = (framePicture) ? params->MaxMvLen * 4 : (params->MaxMvLen >> 1) * 4;
1168 
1169     if (m_pictureCodingType == B_TYPE)
1170     {
1171         // This field is irrelevant since we are not using the bi-direct search.
1172         // set it to 32 to match
1173         cmd.DW1.BiWeight             = 32;
1174         cmd.DW13.NumRefIdxL1MinusOne = params->num_ref_idx_l1_active_minus1;
1175     }
1176 
1177     if (m_pictureCodingType == P_TYPE ||
1178         m_pictureCodingType == B_TYPE)
1179     {
1180         if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
1181         {
1182             cmd.DW30.ActualMBHeight = m_frameHeight;
1183             cmd.DW30.ActualMBWidth  = m_frameWidth;
1184         }
1185         else if (m_vdencEnabled && m_16xMeSupported)
1186         {
1187             cmd.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
1188             cmd.DW30.ActualMBWidth  = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
1189         }
1190         cmd.DW13.NumRefIdxL0MinusOne =
1191             params->num_ref_idx_l0_active_minus1;
1192     }
1193 
1194     cmd.DW13.RefStreaminCost = 5;
1195     // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
1196     cmd.DW13.ROIEnable = 0;
1197 
1198     if (!framePicture)
1199     {
1200         if (m_pictureCodingType != I_TYPE)
1201         {
1202             cmd.DW14.List0RefID0FieldParity = params->List0RefID0FieldParity;
1203             cmd.DW14.List0RefID1FieldParity = params->List0RefID1FieldParity;
1204             cmd.DW14.List0RefID2FieldParity = params->List0RefID2FieldParity;
1205             cmd.DW14.List0RefID3FieldParity = params->List0RefID3FieldParity;
1206             cmd.DW14.List0RefID4FieldParity = params->List0RefID4FieldParity;
1207             cmd.DW14.List0RefID5FieldParity = params->List0RefID5FieldParity;
1208             cmd.DW14.List0RefID6FieldParity = params->List0RefID6FieldParity;
1209             cmd.DW14.List0RefID7FieldParity = params->List0RefID7FieldParity;
1210         }
1211         if (m_pictureCodingType == B_TYPE)
1212         {
1213             cmd.DW14.List1RefID0FieldParity = params->List1RefID0FieldParity;
1214             cmd.DW14.List1RefID1FieldParity = params->List1RefID1FieldParity;
1215         }
1216     }
1217 
1218     cmd.DW15.MvShiftFactor       = mvShiftFactor;
1219     cmd.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
1220 
1221     // r3 & r4
1222     uint8_t targetUsage = params->TargetUsage;
1223     uint8_t meMethod    = 0;
1224     if (m_pictureCodingType == B_TYPE)
1225     {
1226         meMethod = params->pBMEMethodTable ?  // use the ME table dependent on codec standard
1227                        params->pBMEMethodTable[targetUsage]
1228                                            : m_bMeMethodGeneric[targetUsage];
1229     }
1230     else
1231     {
1232         meMethod = params->pMEMethodTable ?  // use the ME table dependent on codec standard
1233                        params->pMEMethodTable[targetUsage]
1234                                           : m_meMethodGeneric[targetUsage];
1235     }
1236 
1237     uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0;
1238     eStatus          = MOS_SecureMemcpy(&(cmd.SPDelta), 14 * sizeof(uint32_t), m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t));
1239     if (eStatus != MOS_STATUS_SUCCESS)
1240     {
1241         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
1242         return eStatus;
1243     }
1244 
1245     // Non legacy stream in is for hevc vp9 streamin kernel
1246     if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
1247     {
1248         //StreamIn CURBE
1249         cmd.DW6.LCUSize                    = 1;  //Only LCU64 supported by the VDEnc HW
1250         cmd.DW6.InputStreamInSurfaceEnable = params->segmapProvided;
1251         cmd.DW31.MaxCuSize                 = 3;
1252         cmd.DW31.MaxTuSize                 = 3;
1253         switch (params->TargetUsage)
1254         {
1255         case 1:
1256         case 4:
1257             cmd.DW36.NumMergeCandidateCu64x64 = 4;
1258             cmd.DW36.NumMergeCandidateCu32x32 = 3;
1259             cmd.DW36.NumMergeCandidateCu16x16 = 2;
1260             cmd.DW36.NumMergeCandidateCu8x8   = 1;
1261             cmd.DW31.NumImePredictors         = 8;
1262             break;
1263         case 7:
1264             cmd.DW36.NumMergeCandidateCu64x64 = 2;
1265             cmd.DW36.NumMergeCandidateCu32x32 = 2;
1266             cmd.DW36.NumMergeCandidateCu16x16 = 2;
1267             cmd.DW36.NumMergeCandidateCu8x8   = 0;
1268             cmd.DW31.NumImePredictors         = 4;
1269             break;
1270         }
1271     }
1272 
1273     // r5
1274     cmd.DW40._4xMeMvOutputDataSurfIndex      = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G11;
1275     cmd.DW41._16xOr32xMeMvInputDataSurfIndex = (params->hmeLvl == HME_LEVEL_32x) ? CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G11 : CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G11;
1276     cmd.DW42._4xMeOutputDistSurfIndex        = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G11;
1277     cmd.DW43._4xMeOutputBrcDistSurfIndex     = CODECHAL_ENCODE_ME_BRC_DISTORTION_G11;
1278     cmd.DW44.VMEFwdInterPredictionSurfIndex  = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G11;
1279     cmd.DW45.VMEBwdInterPredictionSurfIndex  = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G11;
1280     cmd.DW46.VDEncStreamInOutputSurfIndex    = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G11;
1281     cmd.DW47.VDEncStreamInInputSurfIndex     = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G11;
1282 
1283     CODECHAL_ENCODE_CHK_STATUS_RETURN(params->pKernelState->m_dshRegion.AddData(
1284         &cmd,
1285         params->pKernelState->dwCurbeOffset,
1286         sizeof(cmd)));
1287 
1288     return eStatus;
1289 }
1290 
ExecuteMeKernel(MeCurbeParams * meParams,MeSurfaceParams * meSurfaceParams,HmeLevel hmeLevel)1291 MOS_STATUS CodechalVdencVp9StateG11::ExecuteMeKernel(
1292     MeCurbeParams *  meParams,
1293     MeSurfaceParams *meSurfaceParams,
1294     HmeLevel         hmeLevel)
1295 {
1296     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1297 
1298     CODECHAL_ENCODE_FUNCTION_ENTER;
1299 
1300     CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
1301     CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
1302 
1303     PerfTagSetting perfTag;
1304     perfTag.Value             = 0;
1305     perfTag.Mode              = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1306     perfTag.CallType          = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL : CODECHAL_ENCODE_PERFTAG_CALL_ME_KERNEL;
1307     perfTag.PictureCodingType = m_pictureCodingType;
1308     m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1309     // Each ME kernel buffer counts as a separate perf task
1310     m_osInterface->pfnResetPerfBufferID(m_osInterface);
1311 
1312     CODECHAL_MEDIA_STATE_TYPE encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME : (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
1313 
1314     bool vdencMeInUse = false;
1315     if (m_vdencEnabled && (encFunctionType == CODECHAL_MEDIA_STATE_4X_ME))
1316     {
1317         vdencMeInUse = true;
1318         // Non legacy stream in is for hevc vp9 streamin kernel
1319         encFunctionType = m_useNonLegacyStreamin ? CODECHAL_MEDIA_STATE_4X_ME : CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
1320     }
1321 
1322     uint32_t krnStateIdx = vdencMeInUse ? CODECHAL_ENCODE_ME_IDX_VDENC : ((m_pictureCodingType == P_TYPE) ? CODECHAL_ENCODE_ME_IDX_P : CODECHAL_ENCODE_ME_IDX_B);
1323 
1324     PMHW_KERNEL_STATE kernelState = &m_meKernelStates[krnStateIdx];
1325 
1326     // If Single Task Phase is not enabled, use BT count for the kernel state.
1327     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
1328     {
1329         uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
1330         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
1331             m_stateHeapInterface,
1332             maxBtCount));
1333         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
1334         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
1335     }
1336 
1337     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
1338         m_stateHeapInterface,
1339         kernelState,
1340         false,
1341         0,
1342         false,
1343         m_storeData));
1344     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
1345     MOS_ZeroMemory(&idParams, sizeof(idParams));
1346     idParams.pKernelState = kernelState;
1347     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
1348         m_stateHeapInterface,
1349         1,
1350         &idParams));
1351 
1352     // Setup Additional MeParams (Most of them set up in codec specific function, so don't zero out here)
1353     meParams->hmeLvl       = hmeLevel;
1354     meParams->pKernelState = kernelState;
1355 
1356     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMe(meParams));
1357 
1358     CODECHAL_DEBUG_TOOL(
1359         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1360             encFunctionType,
1361             MHW_DSH_TYPE,
1362             kernelState));
1363         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1364             encFunctionType,
1365             kernelState));
1366         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1367             encFunctionType,
1368             MHW_ISH_TYPE,
1369             kernelState));)
1370     MOS_COMMAND_BUFFER cmdBuffer;
1371     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
1372     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1373     {
1374         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
1375     }
1376     SendKernelCmdsParams sendKernelCmdsParams;
1377     sendKernelCmdsParams                 = SendKernelCmdsParams();
1378     sendKernelCmdsParams.EncFunctionType = encFunctionType;
1379     sendKernelCmdsParams.pKernelState    = kernelState;
1380 
1381     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
1382 
1383     // Add binding table
1384     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
1385         m_stateHeapInterface,
1386         kernelState));
1387 
1388     // Setup Additional ME surface params (Most of them set up in codec specific function, so don't zero out here)
1389     meSurfaceParams->dwDownscaledWidthInMb  = (hmeLevel == HME_LEVEL_32x) ? m_downscaledWidthInMb32x : (hmeLevel == HME_LEVEL_16x) ? m_downscaledWidthInMb16x : m_downscaledWidthInMb4x;
1390     meSurfaceParams->dwDownscaledHeightInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledFrameFieldHeightInMb32x : (hmeLevel == HME_LEVEL_16x) ? m_downscaledFrameFieldHeightInMb16x : m_downscaledFrameFieldHeightInMb4x;
1391     meSurfaceParams->b32xMeInUse            = (hmeLevel == HME_LEVEL_32x) ? true : false;
1392     meSurfaceParams->b16xMeInUse            = (hmeLevel == HME_LEVEL_16x) ? true : false;
1393     meSurfaceParams->pKernelState           = kernelState;
1394 
1395     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(&cmdBuffer, meSurfaceParams));
1396 
1397     // Dump SSH for ME kernel
1398     CODECHAL_DEBUG_TOOL(
1399         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1400             encFunctionType,
1401             MHW_SSH_TYPE,
1402             kernelState)));
1403 
1404     /* zero out the mv data memory and me distortion buffer for the driver ULT
1405     kernel only writes out this data used for current frame, in some cases the the data used for
1406     previous frames would be left in the buffer (for example, the L1 mv for B frame would still show
1407     in the P frame mv data buffer */
1408 
1409     // Zeroing out the buffers has perf impact, so zero it out only when dumps are actually enabled
1410     CODECHAL_DEBUG_TOOL(
1411         CODECHAL_ENCODE_CHK_NULL_RETURN(m_debugInterface);
1412         uint8_t *data                = NULL;
1413         uint32_t size                = 0;
1414         bool     driverMeDumpEnabled = m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrOutput, encFunctionType);
1415 
1416         if (driverMeDumpEnabled) {
1417             MOS_LOCK_PARAMS lockFlags;
1418             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1419             lockFlags.WriteOnly = 1;
1420 
1421             switch (hmeLevel)
1422             {
1423             case HME_LEVEL_32x:
1424                 data = (uint8_t *)m_osInterface->pfnLockResource(
1425                     m_osInterface,
1426                     &meSurfaceParams->ps32xMeMvDataBuffer->OsResource,
1427                     &lockFlags);
1428                 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1429                 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) *
1430                        (m_downscaledHeightInMb32x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1431                 MOS_ZeroMemory(data, size);
1432                 m_osInterface->pfnUnlockResource(
1433                     m_osInterface,
1434                     &meSurfaceParams->ps32xMeMvDataBuffer->OsResource);
1435                 break;
1436             case HME_LEVEL_16x:
1437                 data = (uint8_t *)m_osInterface->pfnLockResource(
1438                     m_osInterface,
1439                     &meSurfaceParams->ps16xMeMvDataBuffer->OsResource,
1440                     &lockFlags);
1441                 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1442                 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) *
1443                        (m_downscaledHeightInMb16x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1444                 MOS_ZeroMemory(data, size);
1445                 m_osInterface->pfnUnlockResource(
1446                     m_osInterface,
1447                     &meSurfaceParams->ps16xMeMvDataBuffer->OsResource);
1448                 break;
1449             case HME_LEVEL_4x:
1450                 if (!m_vdencEnabled)
1451                 {
1452                     data = (uint8_t *)m_osInterface->pfnLockResource(
1453                         m_osInterface,
1454                         &meSurfaceParams->ps4xMeMvDataBuffer->OsResource,
1455                         &lockFlags);
1456                     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1457                     size = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) *
1458                            (m_downscaledHeightInMb4x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1459                     MOS_ZeroMemory(data, size);
1460                     m_osInterface->pfnUnlockResource(
1461                         m_osInterface,
1462                         &meSurfaceParams->ps4xMeMvDataBuffer->OsResource);
1463                 }
1464                 break;
1465             default:
1466                 return MOS_STATUS_INVALID_PARAMETER;
1467             }
1468 
1469             // zeroing out ME dist buffer
1470             if (meSurfaceParams->b4xMeDistortionBufferSupported)
1471             {
1472                 data = (uint8_t *)m_osInterface->pfnLockResource(
1473                     m_osInterface, &meSurfaceParams->psMeDistortionBuffer->OsResource, &lockFlags);
1474                 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1475                 size = meSurfaceParams->psMeDistortionBuffer->dwHeight * meSurfaceParams->psMeDistortionBuffer->dwPitch;
1476                 MOS_ZeroMemory(data, size);
1477                 m_osInterface->pfnUnlockResource(
1478                     m_osInterface,
1479                     &meSurfaceParams->psMeDistortionBuffer->OsResource);
1480             }
1481         });
1482 
1483     uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x : (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
1484 
1485     uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
1486     uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
1487 
1488     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
1489     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
1490     walkerCodecParams.WalkerMode              = m_walkerMode;
1491     walkerCodecParams.dwResolutionX           = resolutionX;
1492     walkerCodecParams.dwResolutionY           = resolutionY;
1493     walkerCodecParams.bNoDependency           = true;
1494     walkerCodecParams.bMbaff                  = meSurfaceParams->bMbaff;
1495     walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
1496     walkerCodecParams.ucGroupId               = m_groupId;
1497 
1498     MHW_WALKER_PARAMS walkerParams;
1499     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
1500         m_hwInterface,
1501         &walkerParams,
1502         &walkerCodecParams));
1503 
1504     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
1505         &cmdBuffer,
1506         &walkerParams));
1507 
1508     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
1509 
1510     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
1511         m_stateHeapInterface,
1512         kernelState));
1513     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1514     {
1515         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
1516             m_stateHeapInterface));
1517         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
1518         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1519     }
1520 
1521     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1522         &cmdBuffer,
1523         encFunctionType,
1524         nullptr)));
1525 
1526     m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
1527 
1528     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1529 
1530     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1531     {
1532         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
1533         m_lastTaskInPhase = false;
1534     }
1535 
1536     return eStatus;
1537 }
1538 
ExecuteKernelFunctions()1539 MOS_STATUS CodechalVdencVp9StateG11::ExecuteKernelFunctions()
1540 {
1541     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1542 
1543     CODECHAL_ENCODE_FUNCTION_ENTER;
1544 
1545 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
1546     uint32_t dumpFormat = 0;
1547     CODECHAL_DEBUG_TOOL(
1548      //   CodecHal_DbgMapSurfaceFormatToDumpFormat(m_rawSurfaceToEnc->Format, &dumpFormat);
1549     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1550         m_rawSurfaceToEnc,
1551         CodechalDbgAttr::attrEncodeRawInputSurface,
1552         "SrcSurf"));
1553     if (m_lastRefPic)
1554     {
1555       //  CodecHal_DbgMapSurfaceFormatToDumpFormat(m_lastRefPic->Format, &dumpFormat);
1556         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1557             m_lastRefPic,
1558             CodechalDbgAttr::attrReferenceSurfaces,
1559             "LastRefSurface"));
1560     }
1561 
1562     if (m_goldenRefPic)
1563     {
1564       //  CodecHal_DbgMapSurfaceFormatToDumpFormat(m_goldenRefPic->Format, &dumpFormat);
1565         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1566             m_goldenRefPic,
1567             CodechalDbgAttr::attrReferenceSurfaces,
1568             "GoldenRefSurface"));
1569     }
1570 
1571     if (m_altRefPic)
1572     {
1573       //  CodecHal_DbgMapSurfaceFormatToDumpFormat(m_altRefPic->Format, &dumpFormat);
1574         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1575             m_altRefPic,
1576             CodechalDbgAttr::attrReferenceSurfaces,
1577             "_AltRefSurface"));
1578     }
1579     );
1580 
1581 
1582     m_setRequestedEUSlices = ((m_frameHeight * m_frameWidth) >= m_ssdResolutionThreshold &&
1583         m_targetUsage <= m_ssdTargetUsageThreshold) ? true : false;
1584 
1585     m_hwInterface->m_numRequestedEuSlices = (m_setRequestedEUSlices) ?
1586         m_sliceShutdownRequestState : m_sliceShutdownDefaultState;
1587 
1588     // While this streamin isn't a kernel function, we 0 the surface here which is needed before HME kernel
1589     SetupSegmentationStreamIn();
1590 
1591     // Super HME
1592     if (m_16xMeSupported)
1593     {
1594         //4x Downscaling
1595         CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
1596         MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
1597         cscScalingKernelParams.bLastTaskInPhaseCSC =
1598             cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled);
1599         cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled);
1600         cscScalingKernelParams.bLastTaskInPhase32xDS = !m_hmeEnabled;
1601 
1602         m_firstTaskInPhase = true;
1603         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
1604     }
1605 
1606     if (m_16xMeEnabled)
1607     {
1608         //Initialize the ME struct for HME kernel calls
1609         MeCurbeParams meParams;
1610         MOS_ZeroMemory(&meParams, sizeof(MeCurbeParams));
1611         SetMeCurbeParams(&meParams);
1612 
1613         MeSurfaceParams meSurfaceParams;
1614         MOS_ZeroMemory(&meSurfaceParams, sizeof(MeSurfaceParams));
1615         SetMeSurfaceParams(&meSurfaceParams);
1616 
1617         // P_HME kernel (16x HME)
1618         m_lastTaskInPhase = false;
1619         CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_16x));
1620 
1621         //StreamIn kernel, 4xME
1622         m_lastTaskInPhase = true;
1623         meParams.segmapProvided = m_segmentMapProvided;
1624         CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_4x));
1625     }
1626     //CODECHAL_DEBUG_TOOL
1627     // (
1628         /*CodecHal_DbgDumpVp9VdEncHMEData(
1629             m_encoder,
1630             dumpFormat,
1631             MeSurfaceParams.ps4xMeMvDataBuffer,
1632             MeSurfaceParams.ps16xMeMvDataBuffer,
1633             MeSurfaceParams.psMeDistortionBuffer);
1634         */
1635 
1636     //if (m_scalingEnabled)
1637     //{
1638     //    // Dump 4x scaling and HME buffers
1639     //    m_debugInterface->DumpYUVSurface(
1640     //        m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER),
1641     //        CodechalDbgAttr::attrReferenceSurfaces,
1642     //        "4xScaledSurf");
1643     //    m_debugInterface->DumpBuffer(
1644     //        &meSurfaceParams.ps4xMeMvDataBuffer->OsResource,
1645     //        CodechalDbgAttr::attrOutput,
1646     //        "MvData",
1647     //        meSurfaceParams.ps4xMeMvDataBuffer->dwHeight * meSurfaceParams.ps4xMeMvDataBuffer->dwPitch,
1648     //        CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) * (m_downscaledFrameFieldHeightInMb4x * 4) : 0,
1649     //        CODECHAL_MEDIA_STATE_4X_ME);
1650     //    m_debugInterface->DumpBuffer(
1651     //        &meSurfaceParams.psMeDistortionBuffer->OsResource,
1652     //        CodechalDbgAttr::attrOutput,
1653     //        "MeDist",
1654     //        meSurfaceParams.psMeDistortionBuffer->dwHeight *meSurfaceParams.psMeDistortionBuffer->dwPitch,
1655     //        CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4 * 10), 8) : 0,
1656     //        CODECHAL_MEDIA_STATE_4X_ME);
1657 
1658     //    if (m_16xMeSupported)
1659     //    {
1660     //        // Dump 16x scaling and HME buffers
1661     //        m_debugInterface->DumpYUVSurface(
1662     //            m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER),
1663     //            CodechalDbgAttr::attrReferenceSurfaces,
1664     //            "16xScaledSurf");
1665     //        if (m_16xMeEnabled)
1666     //        {
1667     //            m_debugInterface->DumpBuffer(
1668     //                &meSurfaceParams.ps16xMeMvDataBuffer->OsResource,
1669     //                CodechalDbgAttr::attrOutput,
1670     //                "MvData",
1671     //                meSurfaceParams.ps16xMeMvDataBuffer->dwHeight *meSurfaceParams.ps16xMeMvDataBuffer->dwPitch,
1672     //                CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) * (m_downscaledFrameFieldHeightInMb16x * 4) : 0,
1673     //                CODECHAL_MEDIA_STATE_16X_ME);
1674     //        }
1675     //    }
1676     //}
1677     // dump VDEncStreamin
1678     /* m_debugInterface->DumpBuffer(
1679         &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
1680         CodechalDbgAttr::attrOutput,
1681         "Output",
1682         (MOS_ALIGN_CEIL(m_frameHeight, 32) * (MOS_ALIGN_CEIL(m_frameFieldHeight, 32)) / 16),
1683         0,
1684         CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN);*/
1685 
1686     if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
1687     {
1688         MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
1689         syncParams.GpuContext = m_renderContext;
1690         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
1691 
1692         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
1693         m_waitForEnc = true;
1694     }
1695 #endif
1696 
1697     return eStatus;
1698 }
1699 
StatusReportCleanup(EncodeStatusReport * encodeStatusReport,HCPPakHWTileSizeRecord_G11 * tileStatusReport,CODECHAL_ENCODE_BUFFER * tileSizeStreamoutBuffer,PMOS_INTERFACE osInterface,uint8_t * tempBsBuffer,uint8_t * bitstream)1700 static void StatusReportCleanup(
1701     EncodeStatusReport* encodeStatusReport,
1702     HCPPakHWTileSizeRecord_G11* tileStatusReport,
1703     CODECHAL_ENCODE_BUFFER* tileSizeStreamoutBuffer,
1704     PMOS_INTERFACE osInterface,
1705     uint8_t* tempBsBuffer,
1706     uint8_t* bitstream)
1707 {
1708 
1709     if (tempBsBuffer)
1710     {
1711         MOS_FreeMemory(tempBsBuffer);
1712     }
1713 
1714     if (bitstream)
1715     {
1716         osInterface->pfnUnlockResource(osInterface, &encodeStatusReport->pCurrRefList->resBitstreamBuffer);
1717     }
1718 
1719     if (tileStatusReport)
1720     {
1721         // clean-up the tile status report buffer
1722         if (encodeStatusReport->CodecStatus == CODECHAL_STATUS_SUCCESSFUL)
1723         {
1724             for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1725             {
1726                 MOS_ZeroMemory(&tileStatusReport[i], sizeof(tileStatusReport[i]));
1727             }
1728         }
1729 
1730         osInterface->pfnUnlockResource(osInterface, &tileSizeStreamoutBuffer->sResource);
1731     }
1732 }
1733 
~CodechalVdencVp9StateG11()1734 CodechalVdencVp9StateG11::~CodechalVdencVp9StateG11()
1735 {
1736     CODECHAL_ENCODE_FUNCTION_ENTER;
1737 
1738     if (m_scalabilityState)
1739     {
1740         MOS_FreeMemAndSetNull(m_scalabilityState);
1741     }
1742     //Note: virtual engine interface destroy is done in MOS layer
1743 
1744     return;
1745 }
1746 
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1747 MOS_STATUS CodechalVdencVp9StateG11::GetStatusReport(
1748     EncodeStatus*       encodeStatus,
1749     EncodeStatusReport* encodeStatusReport)
1750 {
1751     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1752 
1753     CODECHAL_ENCODE_FUNCTION_ENTER;
1754 
1755     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1756     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1757 
1758     if (encodeStatusReport->UsedVdBoxNumber == 1)
1759     {
1760         encodeStatusReport->bitstreamSize = encodeStatus->dwMFCBitstreamByteCountPerFrame + encodeStatus->dwHeaderBytesInserted;
1761         encodeStatusReport->NumberPasses  = (uint8_t)encodeStatus->dwNumberPasses;
1762 
1763         encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1764         return eStatus;
1765     }
1766 
1767     // Tile record always in m_tileRecordBuffer even in scala mode
1768     PCODECHAL_ENCODE_BUFFER presTileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1769 
1770     MOS_LOCK_PARAMS lockFlags;
1771     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1772     HCPPakHWTileSizeRecord_G11* tileStatusReport = (HCPPakHWTileSizeRecord_G11*)m_osInterface->pfnLockResource(
1773         m_osInterface,
1774         &presTileSizeStatusReport->sResource,
1775         &lockFlags);
1776     CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1777 
1778     encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1779     encodeStatusReport->PanicMode = false;
1780     encodeStatusReport->AverageQp = 0;
1781     encodeStatusReport->QpY = 0;
1782     encodeStatusReport->SuggestedQpYDelta = 0;
1783     encodeStatusReport->NumberPasses = 1;
1784     encodeStatusReport->bitstreamSize = 0;
1785     encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1786 
1787     double sum_qp = 0.0;
1788     uint32_t totalCU = 0;
1789     CODECHAL_ENCODE_CHK_COND_RETURN((encodeStatusReport->NumberTilesInFrame == 0), "ERROR - invalid number of tiles in frame");
1790     for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1791     {
1792         if (tileStatusReport[i].Length == 0)
1793         {
1794             encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1795             StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
1796             return eStatus;
1797         }
1798 
1799         encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1800         totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
1801         sum_qp += tileStatusReport[i].Hcp_Qp_Status_Count;
1802     }
1803 
1804     encodeStatusReport->QpY = encodeStatusReport->AverageQp =
1805         (uint8_t)((sum_qp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
1806 
1807     if (m_enableTileStitchByHW)
1808     {
1809         StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
1810         return eStatus;
1811     }
1812 
1813     uint8_t* bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
1814     uint8_t* tempBsBuffer = bufPtr;
1815     CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
1816 
1817     CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList);
1818     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1819     lockFlags.ReadOnly = 1;
1820     uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
1821         m_osInterface,
1822         &currRefList.resBitstreamBuffer,
1823         &lockFlags);
1824     if (bitstream == nullptr)
1825     {
1826         MOS_SafeFreeMemory(tempBsBuffer);
1827         CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
1828     }
1829 
1830     for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1831     {
1832         uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
1833         uint32_t len = tileStatusReport[i].Length;
1834 
1835         if (offset + len >= m_bitstreamUpperBound)
1836         {
1837             eStatus = MOS_STATUS_INVALID_FILE_SIZE;
1838             CODECHAL_ENCODE_ASSERTMESSAGE("Error: Tile offset and length add up to more than bitstream upper bound");
1839             encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
1840             encodeStatusReport->bitstreamSize = 0;
1841             StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
1842             return eStatus;
1843         }
1844 
1845         MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
1846         bufPtr += len;
1847     }
1848 
1849     MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
1850     MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],
1851         m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
1852 
1853     StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
1854 
1855     return eStatus;
1856 }
1857 
DecideEncodingPipeNumber()1858 MOS_STATUS CodechalVdencVp9StateG11::DecideEncodingPipeNumber()
1859 {
1860     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1861 
1862     CODECHAL_ENCODE_FUNCTION_ENTER;
1863 
1864     m_numPipe = m_numVdbox;
1865 
1866     uint8_t num_tile_columns = (1 << m_vp9PicParams->log2_tile_columns);
1867 
1868     if (num_tile_columns > m_numPipe)
1869     {
1870         m_numPipe = 1;
1871     }
1872 
1873     if (num_tile_columns < m_numPipe)
1874     {
1875         if (num_tile_columns >= 1 && num_tile_columns <= 4)
1876         {
1877             m_numPipe = num_tile_columns;
1878         }
1879         else
1880         {
1881             m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode
1882         }
1883     }
1884 
1885     if (m_numPipe == 0 || m_numPipe > CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE)
1886     {
1887         m_numPipe = 1;
1888     }
1889 
1890     if (m_numPipe > 1)
1891     {
1892         m_scalableMode = true; // KMD VE is now enabled by default. Mediasolo can also use the VE interface.
1893     }
1894     else
1895     {
1896         m_scalableMode = false;
1897     }
1898 
1899     if (m_scalabilityState)
1900     {
1901         // Create/ re-use a GPU context with 2 pipes
1902         m_scalabilityState->ucScalablePipeNum = m_numPipe;
1903     }
1904 
1905     return eStatus;
1906 }
1907 
PlatformCapabilityCheck()1908 MOS_STATUS CodechalVdencVp9StateG11::PlatformCapabilityCheck()
1909 {
1910     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1911 
1912     CODECHAL_ENCODE_FUNCTION_ENTER;
1913 
1914     CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
1915 
1916     if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
1917     {
1918         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
1919             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
1920     }
1921 
1922     if (m_numPipe > 1)
1923     {
1924         m_singleTaskPhaseSupported = m_singleTaskPhaseSupportedInPak = false;
1925     }
1926 
1927     //so far only validate Tiling for VDEnc VP9
1928     uint8_t col = (1 << (m_vp9PicParams->log2_tile_columns));
1929     uint8_t row = (1 << (m_vp9PicParams->log2_tile_rows));
1930 
1931     // Single pipe cannot handle N X M or M X N tile row/column cases, either one has to be 1, return error
1932     if ((col > 1) && (row > 1) && (m_numPipe == 1))
1933     {
1934         CODECHAL_ENCODE_ASSERTMESSAGE("Single pipe cannot handle N X M or M X N tile row/column cases, either one has to be 1");
1935         return MOS_STATUS_INVALID_PARAMETER;
1936     }
1937 
1938     // Handling invalid tiling and scalability cases. When NumTilingColumn does not match NumPipe fall back to single pipe mode
1939     if (m_numPipe > 1 && (col != m_numPipe))
1940     {
1941         if ((col == 1) || (row == 1))
1942         {
1943             m_numPipe = 1; // number of tile columns cannot be greater than number of pipes (VDBOX), run in single pipe mode
1944             m_scalableMode = false;
1945         }
1946         else
1947         {
1948             CODECHAL_ENCODE_ASSERTMESSAGE("Number of tile columns cannot be greater than number of pipes (VDBOX) when number of rows > 1");
1949             return MOS_STATUS_INVALID_PARAMETER;
1950         }
1951     }
1952 
1953     //num columns must be either 2 or 4 for scalability mode, H/W limitation
1954     if ((m_numPipe > 1) && (m_numPipe != 2) && (m_numPipe != 4))
1955     {
1956         CODECHAL_ENCODE_ASSERTMESSAGE("Num pipes must be either 2 or 4 for scalability mode, H/W limitation");
1957         return MOS_STATUS_INVALID_PARAMETER;
1958     }
1959 
1960     // Tile width needs to be minimum size 256, error out if less
1961     if ((col != 1) && ((m_vp9PicParams->SrcFrameWidthMinus1 + 1) < col * CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH))
1962     {
1963         CODECHAL_ENCODE_ASSERTMESSAGE("Incorrect number of columns input parameter, Tile width is < 256");
1964         return MOS_STATUS_INVALID_PARAMETER;
1965     }
1966 
1967     if (row > 4)
1968     {
1969         CODECHAL_ENCODE_ASSERTMESSAGE("Max number of rows cannot exceeds 4 by VP9 Spec.");
1970         return MOS_STATUS_INVALID_PARAMETER;
1971     }
1972 
1973     if (m_vdencEnabled && VP9_ENCODED_CHROMA_FORMAT_YUV444 == m_vp9SeqParams->SeqFlags.fields.EncodedFormat && m_vp9SeqParams->TargetUsage == 7)
1974     {
1975         CODECHAL_ENCODE_ASSERTMESSAGE("Speed mode is not supported in VDENC 444, resetting TargetUsage to Normal mode\n");
1976         m_vp9SeqParams->TargetUsage = 4;
1977     }
1978 
1979     // number of tiles for this frame
1980     m_numberTilesInFrame = col * row;
1981     m_numUsedVdbox = m_numPipe;
1982 
1983     if (!m_newSeq)
1984     {
1985         // If there is no new SEQ header, then the number of passes is decided here.
1986         // Otherwise, it is done in SetSequenceStructs. For example, BRC setting may be changed.
1987         m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
1988     }
1989     // Last place where scalable mode is decided
1990     if (m_frameNum == 0)
1991     {
1992         m_lastFrameScalableMode = m_scalableMode;
1993     }
1994     return eStatus;
1995 }
1996 
SetGpuCtxCreatOption()1997 MOS_STATUS CodechalVdencVp9StateG11::SetGpuCtxCreatOption()
1998 {
1999     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2000 
2001     CODECHAL_ENCODE_FUNCTION_ENTER;
2002 
2003     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2004     {
2005         CodechalEncoderState::SetGpuCtxCreatOption();
2006     }
2007     else
2008     {
2009         m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
2010         CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
2011 
2012         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
2013             m_scalabilityState,
2014             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2015     }
2016 
2017     return eStatus;
2018 }
2019 
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)2020 MOS_STATUS CodechalVdencVp9StateG11::SetAndPopulateVEHintParams(
2021     PMOS_COMMAND_BUFFER  cmdBuffer)
2022 {
2023     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
2024 
2025     CODECHAL_ENCODE_FUNCTION_ENTER;
2026 
2027     if (!MOS_VE_SUPPORTED(m_osInterface))
2028     {
2029         return eStatus;
2030     }
2031 
2032     CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
2033     MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
2034 
2035     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2036     {
2037         scalSetParms.bNeedSyncWithPrevious = true;
2038     }
2039 
2040     int32_t currentPass = GetCurrentPass();
2041     uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2042     // Scalable mode only
2043     if (m_scalableMode)
2044     {
2045         for (auto i = 0; i < m_numPipe; i++)
2046         {
2047             scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex].OsResource;
2048         }
2049     }
2050 
2051     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
2052     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2053     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
2054 
2055     return eStatus;
2056 }
2057 
SetTileData()2058 MOS_STATUS CodechalVdencVp9StateG11::SetTileData()
2059 {
2060     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2061 
2062     CODECHAL_ENCODE_FUNCTION_ENTER;
2063 
2064     MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 *tileCodingParams = m_tileParams;
2065 
2066     tileCodingParams->Mode = CODECHAL_ENCODE_MODE_VP9;
2067 
2068     uint32_t numTileRows    = (1 << m_vp9PicParams->log2_tile_rows);
2069     uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
2070     uint32_t numTiles = numTileRows * numTileColumns;
2071 
2072     uint32_t bitstreamSizePerTile = m_bitstreamUpperBound / (numTiles * CODECHAL_CACHELINE_SIZE);
2073     uint32_t numLcusInTiles = 0, numCuRecord = 64;
2074     uint32_t cuLevelStreamoutOffset = 0, sliceSizeStreamoutOffset = 0, bitstreamByteOffset = 0, sseRowstoreOffset = 0;
2075 
2076     for (uint32_t tileCntr = 0; tileCntr < numTiles; tileCntr++)
2077     {
2078         uint32_t tileX, tileY, tileStartSbX, tileStartSbY, tileWidthInSb, tileHeightInSb, lastTileColWidth, lastTileRowHeight, numLcuInTile;
2079         bool isLastTileCol, isLastTileRow;
2080 
2081         tileX = tileCntr % numTileColumns;
2082         tileY = tileCntr / numTileColumns;
2083 
2084         isLastTileCol = ((numTileColumns - 1) == tileX);
2085         isLastTileRow = ((numTileRows - 1) == tileY);
2086 
2087         tileStartSbX = (tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns;
2088         tileStartSbY = (tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows;
2089 
2090         tileWidthInSb  = (isLastTileCol ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) - tileStartSbX;
2091         tileHeightInSb = (isLastTileRow ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) - tileStartSbY;
2092 
2093         lastTileColWidth  = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameWidthMinus1 + 1 - tileStartSbX * CODEC_VP9_SUPER_BLOCK_WIDTH), CODEC_VP9_MIN_BLOCK_WIDTH) / CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
2094         lastTileRowHeight = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameHeightMinus1 + 1 - tileStartSbY * CODEC_VP9_SUPER_BLOCK_HEIGHT), CODEC_VP9_MIN_BLOCK_HEIGHT) / CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
2095 
2096         numLcuInTile = tileWidthInSb * tileHeightInSb;
2097         tileCodingParams[tileCntr].NumberOfActiveBePipes     = m_numPipe;
2098         tileCodingParams[tileCntr].NumOfTilesInFrame         = numTiles;
2099         tileCodingParams[tileCntr].NumOfTileColumnsInFrame   = numTileColumns;
2100         tileCodingParams[tileCntr].TileStartLCUX             = tileStartSbX;
2101         tileCodingParams[tileCntr].TileStartLCUY             = tileStartSbY;
2102         tileCodingParams[tileCntr].IsLastTileofColumn        = isLastTileRow;
2103         tileCodingParams[tileCntr].IsLastTileofRow           = isLastTileCol;
2104 
2105         tileCodingParams[tileCntr].TileWidthInMinCbMinus1    = isLastTileCol ? lastTileColWidth : (tileWidthInSb * CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
2106         tileCodingParams[tileCntr].TileHeightInMinCbMinus1   = isLastTileRow ? lastTileRowHeight : (tileHeightInSb * CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
2107 
2108         if (m_scalableMode)
2109         {
2110             sseRowstoreOffset = (tileStartSbX + (3 * tileX)) << 5;
2111 
2112             tileCodingParams[tileCntr].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * 64),
2113                 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
2114             tileCodingParams[tileCntr].presHcpSyncBuffer        = &m_hcpScalabilitySyncBuffer.sResource;
2115             tileCodingParams[tileCntr].SliceSizeStreamoutOffset = sliceSizeStreamoutOffset;
2116             tileCodingParams[tileCntr].SseRowstoreOffset        = sseRowstoreOffset;
2117             tileCodingParams[tileCntr].BitstreamByteOffset      = bitstreamByteOffset;
2118             tileCodingParams[tileCntr].CuLevelStreamoutOffset   = cuLevelStreamoutOffset;
2119 
2120             cuLevelStreamoutOffset   += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
2121             sliceSizeStreamoutOffset += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
2122             sseRowstoreOffset        += (numLcuInTile * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
2123             bitstreamByteOffset      += bitstreamSizePerTile;
2124             numLcusInTiles           += numLcuInTile;
2125 
2126             tileCodingParams[tileCntr].TileSizeStreamoutOffset = (tileCntr*m_hcpInterface->GetPakHWTileSizeRecordSize() + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
2127 
2128             //DW5
2129             const uint32_t frameStatsStreamoutSize = m_brcPakStatsBufSize;
2130             tileCodingParams[tileCntr].PakTileStatisticsOffset = (tileCntr*frameStatsStreamoutSize + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
2131 
2132             //DW12
2133             tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = ((tileCntr * m_probabilityCounterBufferSize) + (CODECHAL_CACHELINE_SIZE - 1)) / CODECHAL_CACHELINE_SIZE;
2134         }
2135         else
2136         {
2137             tileCodingParams[tileCntr].CuRecordOffset = 0;
2138             tileCodingParams[tileCntr].presHcpSyncBuffer = nullptr;
2139             tileCodingParams[tileCntr].SliceSizeStreamoutOffset = 0;
2140             tileCodingParams[tileCntr].SseRowstoreOffset = 0;
2141             tileCodingParams[tileCntr].BitstreamByteOffset = 0;
2142             tileCodingParams[tileCntr].CuLevelStreamoutOffset = 0;
2143             tileCodingParams[tileCntr].TileSizeStreamoutOffset = 0;
2144 
2145             //DW5
2146             tileCodingParams[tileCntr].PakTileStatisticsOffset = 0;
2147 
2148             //DW12
2149             tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = 0;
2150         }
2151     }
2152 
2153     return eStatus;
2154 }
2155 
SetTileCommands(PMOS_COMMAND_BUFFER cmdBuffer)2156 MOS_STATUS CodechalVdencVp9StateG11::SetTileCommands(
2157     PMOS_COMMAND_BUFFER cmdBuffer)
2158 {
2159     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2160 
2161     CODECHAL_ENCODE_FUNCTION_ENTER;
2162 
2163     MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G11 vdencWalkerStateParams;
2164     vdencWalkerStateParams.Mode             = CODECHAL_ENCODE_MODE_VP9;
2165     vdencWalkerStateParams.pVp9EncPicParams = m_vp9PicParams;
2166     vdencWalkerStateParams.dwNumberOfPipes  = VDENC_PIPE_SINGLE_PIPE;
2167 
2168     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2169     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2170     // MFXPipeDone should not be set for tail insertion
2171     vdPipelineFlushParams.Flags.bWaitDoneMFX =
2172         (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
2173     vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
2174     vdPipelineFlushParams.Flags.bFlushVDENC    = 1;
2175     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2176 
2177     if (IsFirstPipe() && IsFirstPass())
2178     {
2179         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData());
2180     }
2181 
2182     MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
2183     uint32_t                             numTileColumns  = (1 << m_vp9PicParams->log2_tile_columns);
2184     uint32_t                             numTileRows     = (1 << m_vp9PicParams->log2_tile_rows);
2185     int currentPipe     = GetCurrentPipe();
2186     for (uint32_t tileRow = 0, tileIdx = 0; tileRow < numTileRows; tileRow++)
2187     {
2188         for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++, tileIdx++)
2189         {
2190             if (m_numPipe > 1)
2191             {
2192                 if (tileCol != currentPipe)
2193                 {
2194                     continue;
2195                 }
2196             }
2197 
2198             // HCP_TILE_CODING commmand
2199             CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG11 *>(m_hcpInterface)->AddHcpTileCodingCmd(cmdBuffer, &m_tileParams[tileIdx]));
2200 
2201             MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
2202             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(cmdBuffer, nullptr, &vdencWeightOffsetParams));
2203 
2204             vdencWalkerStateParams.pTileCodingParams = &m_tileParams[tileIdx];
2205             vdencWalkerStateParams.dwTileId          = tileIdx;
2206             switch (m_numPipe)
2207             {
2208             case 0:
2209             case 1:
2210                 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
2211                 break;
2212             case 2:
2213                 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
2214                 break;
2215             case 4:
2216                 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
2217                 break;
2218             default:
2219                 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID;
2220                 CODECHAL_ENCODE_ASSERTMESSAGE("Num Pipes invalid");
2221                 return eStatus;
2222                 break;
2223             }
2224             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
2225 
2226             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipelineFlushParams));
2227         }
2228     }
2229 
2230     return eStatus;
2231 }
2232 
ExecuteTileLevel()2233 MOS_STATUS CodechalVdencVp9StateG11::ExecuteTileLevel()
2234 {
2235     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2236 
2237     CODECHAL_ENCODE_FUNCTION_ENTER;
2238 
2239     int currentPipe = GetCurrentPipe();
2240     int currentPass  = GetCurrentPass();
2241 
2242     if(currentPipe < 0 || currentPass < 0)
2243     {
2244         return MOS_STATUS_INVALID_PARAMETER;
2245     }
2246 
2247     MOS_COMMAND_BUFFER cmdBuffer;
2248     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2249 
2250     if (IsFirstPipe())
2251     {
2252         MHW_BATCH_BUFFER secondLevelBatchBuffer;
2253         MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
2254         secondLevelBatchBuffer.dwOffset = 0;
2255         secondLevelBatchBuffer.bSecondLevel = true;
2256 
2257         if (!m_hucEnabled)
2258         {
2259             secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
2260         }
2261         else
2262         {
2263             secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
2264         }
2265         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
2266             &cmdBuffer,
2267             &secondLevelBatchBuffer));
2268     }
2269 
2270     // Setup Tile level PAK commands
2271     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileCommands(&cmdBuffer));
2272 
2273     // Send MI_FLUSH command
2274     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2275     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2276     flushDwParams.bVideoPipelineCacheInvalidate = true;
2277     if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[currentPipe].sResource))
2278     {
2279         flushDwParams.pOsResource = &m_stitchWaitSemaphoreMem[currentPipe].sResource;
2280         flushDwParams.dwDataDW1 = currentPass + 1;
2281     }
2282     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2283 
2284     if (IsFirstPipe())
2285     {
2286         if (m_numPipe > 1 && m_enableTileStitchByHW)
2287         {
2288             for (auto i = 1; i < m_numPipe; i++)
2289             {
2290                 if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource) && m_hucEnabled)
2291                 {
2292                     // This semaphore waits for all pipes except pipe 1 vdenc+pak to finish processing before stitching bitstream
2293                     SendHWWaitCommand(&m_stitchWaitSemaphoreMem[i].sResource, &cmdBuffer, (currentPass + 1));
2294                 }
2295             }
2296 
2297             uint32_t index = m_virtualEngineBBIndex;
2298             HucCopyParams copyParams;
2299 
2300             copyParams.size = m_tileRecordBuffer[index].sResource.iSize;
2301             copyParams.presSrc = &m_tileRecordBuffer[index].sResource;
2302 
2303             copyParams.presDst = &m_resBitstreamBuffer;
2304             copyParams.lengthOfTable = (uint8_t)(m_numberTilesInFrame);
2305 
2306             auto hucCmdInitializer = static_cast<CodechalCmdInitializerG11*>(m_hucCmdInitializer);
2307             CODECHAL_ENCODE_CHK_STATUS_RETURN(hucCmdInitializer->AddCopyCmds(&cmdBuffer, &copyParams));
2308         }
2309         // PAK integration kernel to integrate stats for next HUC pass
2310         if (m_scalableMode && m_hucEnabled && m_isTilingSupported && IsFirstPipe())
2311         {
2312             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9PakInt(&cmdBuffer));
2313         }
2314 
2315         if (m_scalableMode && IsLastPass())
2316         {
2317             // In scalablemode, singletaskPhaseSupported is off. On last pass we have to make sure to signal
2318             // m_lastTaskInPhase flag to true so that end status report can send a sync tag
2319             m_lastTaskInPhase = true;
2320         }
2321 
2322         CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2323 
2324         if (!m_scalableMode) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2325         {
2326             CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2327         }
2328     }
2329 
2330     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2331     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2332     if (m_singleTaskPhaseSupported && m_hucEnabled && IsLastPass())
2333     {
2334         m_lastTaskInPhase = true; //HPU singletask phase mode only
2335     }
2336 
2337     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2338     {
2339         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2340     }
2341 
2342     std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
2343     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
2344         &cmdBuffer,
2345         CODECHAL_NUM_MEDIA_STATES,
2346         currPassName.data())));
2347 
2348     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2349 
2350     if (IsFirstPipe() &&
2351         m_waitForEnc &&
2352         IsFirstPass() &&
2353         !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2354     {
2355         MOS_SYNC_PARAMS syncParams;
2356         syncParams = g_cInitSyncParams;
2357         syncParams.GpuContext = m_videoContext;
2358         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
2359 
2360         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
2361         m_waitForEnc = false;
2362     }
2363 
2364     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2365     {
2366         bool renderFlags = m_videoContextUsesNullHw;
2367 
2368         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
2369         m_lastTaskInPhase = false;
2370 
2371         CODECHAL_DEBUG_TOOL(
2372             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2373                 m_resVdencPakObjCmdStreamOutBuffer,
2374                 CodechalDbgAttr::attrPakObjStreamout,
2375                 currPassName.data(),
2376                 m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE,
2377                 0,
2378                 CODECHAL_NUM_MEDIA_STATES));
2379 
2380             if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
2381                 //CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
2382                 //m_debugInterface->DumpBuffer(
2383                 //    (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
2384                 //    CodechalDbgAttr::attrOutput,
2385                 //    "SegMap_Out",
2386                 //    CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
2387                 //    0,
2388                 //    CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
2389             }
2390 
2391             if (m_mmcState) {
2392                 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2393             });
2394     }
2395 
2396     if (IsFirstPipe() && IsLastPass())
2397     {
2398         if (m_vp9PicParams->PicFlags.fields.super_frame && m_tsEnabled)
2399         {
2400             CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructSuperFrame());
2401         }
2402     }
2403 
2404     // Increment the second level batch buffer index at the end of every pass
2405     if (IsLastPipe())
2406     {
2407         if (m_hucEnabled)
2408         {
2409             // We save the index of the 2nd level batch buffer in case there is a pass that needs the last SLBB
2410             m_lastVdencPictureState2ndLevelBBIndex = m_vdencPictureState2ndLevelBBIndex;
2411         }
2412         m_vdencPictureState2ndLevelBBIndex = (m_vdencPictureState2ndLevelBBIndex + 1) % CODECHAL_VP9_ENCODE_RECYCLED_BUFFER_NUM;
2413     }
2414 
2415     // Reset parameters for next PAK execution
2416     if (IsLastPipe() && IsLastPass())
2417     {
2418 
2419         if ((currentPipe == 0) &&
2420             m_signalEnc &&
2421             !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
2422         {
2423             // signal semaphore
2424             MOS_SYNC_PARAMS syncParams;
2425             syncParams = g_cInitSyncParams;
2426             syncParams.GpuContext = m_videoContext;
2427             syncParams.presSyncResource = &m_resSyncObjectVideoContextInUse;
2428 
2429             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2430             m_semaphoreObjCount++;
2431         }
2432 
2433         m_prevFrameInfo.KeyFrame    = !m_vp9PicParams->PicFlags.fields.frame_type;
2434         m_prevFrameInfo.IntraOnly   = (m_vp9PicParams->PicFlags.fields.frame_type == CODEC_VP9_KEY_FRAME) || m_vp9PicParams->PicFlags.fields.intra_only;
2435         m_prevFrameInfo.ShowFrame   = m_vp9PicParams->PicFlags.fields.show_frame;
2436         m_prevFrameInfo.FrameWidth  = m_oriFrameWidth;
2437         m_prevFrameInfo.FrameHeight = m_oriFrameHeight;
2438         m_currMvTemporalBufferIndex ^= 0x01;
2439         m_contextFrameTypes[m_vp9PicParams->PicFlags.fields.frame_context_idx] = m_vp9PicParams->PicFlags.fields.frame_type;
2440         m_prevFrameSegEnabled                                                  = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
2441 
2442         // Reset parameters for next PAK execution
2443         if (!m_singleTaskPhaseSupported)
2444         {
2445             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2446         }
2447 
2448         m_newPpsHeader = 0;
2449         m_newSeqHeader = 0;
2450         m_frameNum++;
2451         // Save the last frame's scalable mode flag to prevent switching buffers when doing next pass
2452         m_lastFrameScalableMode = m_scalableMode;
2453 
2454     }
2455 
2456     return eStatus;
2457 }
2458 
ExecuteSliceLevel()2459 MOS_STATUS CodechalVdencVp9StateG11::ExecuteSliceLevel()
2460 {
2461     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2462 
2463     CODECHAL_ENCODE_FUNCTION_ENTER;
2464 
2465     return ExecuteTileLevel();
2466 }
2467 
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & pipeModeSelectParams)2468 void CodechalVdencVp9StateG11::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& pipeModeSelectParams)
2469 {
2470     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2471 
2472     CODECHAL_ENCODE_FUNCTION_ENTER;
2473 
2474     CodechalVdencVp9State::SetHcpPipeModeSelectParams(pipeModeSelectParams);
2475 
2476     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11& pipeModeSelectParamsG11 = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11&>(pipeModeSelectParams);
2477 
2478     pipeModeSelectParamsG11.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2479     pipeModeSelectParamsG11.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
2480     if (m_scalableMode)
2481     {
2482         // Running in the multiple VDBOX mode
2483         if (IsFirstPipe())
2484         {
2485             pipeModeSelectParamsG11.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2486         }
2487         else
2488         {
2489             if (IsLastPipe())
2490             {
2491                 pipeModeSelectParamsG11.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2492             }
2493             else
2494             {
2495                 pipeModeSelectParamsG11.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2496             }
2497         }
2498 
2499         pipeModeSelectParamsG11.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
2500     }
2501 
2502     return;
2503 }
2504 
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)2505 void CodechalVdencVp9StateG11::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
2506 {
2507     CodechalVdencVp9State::SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2508 
2509     PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBBIndex];
2510     bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
2511 
2512     if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
2513     {
2514         // overwrite presProbabilityCounterBuffer and it's params for scalable mode
2515         indObjBaseAddrParams.presProbabilityCounterBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
2516         indObjBaseAddrParams.dwProbabilityCounterOffset = m_tileStatsOffset.counterBuffer;
2517         indObjBaseAddrParams.dwProbabilityCounterSize = m_statsSize.counterBuffer;
2518     }
2519 
2520     indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer? &tileRecordBuffer->sResource : nullptr;
2521     indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer? ((m_statsSize.tileSizeRecord) * GetNumTilesInFrame()) : 0;
2522     indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer? m_tileStatsOffset.tileSizeRecord: 0;
2523 
2524 }
2525 
VerifyCommandBufferSize()2526 MOS_STATUS CodechalVdencVp9StateG11::VerifyCommandBufferSize()
2527 {
2528     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2529 
2530     CODECHAL_ENCODE_FUNCTION_ENTER;
2531 
2532     if (UseLegacyCommandBuffer())   // legacy mode & resize CommandBuffer Size for every BRC pass
2533     {
2534         if (!m_singleTaskPhaseSupported)
2535         {
2536             CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
2537         }
2538     }
2539     else    // virtual engine
2540     {
2541         uint32_t requestedSize =
2542             m_pictureStatesSize +
2543             m_picturePatchListSize +
2544             m_extraPictureStatesSize +
2545             (m_sliceStatesSize * m_numSlices);
2546         requestedSize += requestedSize*m_numPassesInOnePipe;
2547         if (m_hucEnabled && m_brcEnabled)
2548         {
2549             requestedSize += m_brcMaxNumPasses*(m_defaultHucCmdsSize+m_defaultHucPatchListSize);
2550         }
2551         // Running in the multiple VDBOX mode
2552         int currentPipe = GetCurrentPipe();
2553         int currentPass = GetCurrentPass();
2554         if (currentPipe < 0 || currentPipe >= m_numPipe)
2555         {
2556             return MOS_STATUS_INVALID_PARAMETER;
2557         }
2558         if (currentPass < 0 || currentPass >= m_brcMaxNumPasses)
2559         {
2560             return MOS_STATUS_INVALID_PARAMETER;
2561         }
2562 
2563         if (IsFirstPipe() && m_osInterface->bUsesPatchList)
2564         {
2565             CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
2566         }
2567         uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2568         PMOS_COMMAND_BUFFER cmdBuffer;
2569         if (m_osInterface->phasedSubmission)
2570         {
2571             cmdBuffer = &m_realCmdBuffer;
2572         }
2573         else
2574         {
2575             cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][(uint32_t)currentPipe][passIndex];
2576         }
2577 
2578         if (Mos_ResourceIsNull(&cmdBuffer->OsResource) ||
2579             m_sizeOfVEBatchBuffer < requestedSize)
2580         {
2581             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
2582 
2583             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
2584             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
2585             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
2586             allocParamsForBufferLinear.Format = Format_Buffer;
2587             allocParamsForBufferLinear.dwBytes = requestedSize;
2588             allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
2589 
2590             if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
2591             {
2592                 if (cmdBuffer->pCmdBase)
2593                 {
2594                     m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
2595                 }
2596                 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
2597             }
2598 
2599             eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
2600                 m_osInterface,
2601                 &allocParamsForBufferLinear,
2602                 &cmdBuffer->OsResource);
2603             CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
2604 
2605             m_sizeOfVEBatchBuffer = requestedSize;
2606         }
2607 
2608         if (cmdBuffer->pCmdBase == 0)
2609         {
2610             MOS_LOCK_PARAMS lockParams;
2611             MOS_ZeroMemory(&lockParams, sizeof(lockParams));
2612             lockParams.WriteOnly = true;
2613             cmdBuffer->pCmdPtr = cmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &cmdBuffer->OsResource, &lockParams);
2614             cmdBuffer->iRemaining = m_sizeOfVEBatchBuffer;
2615             cmdBuffer->iOffset = 0;
2616 
2617             if (cmdBuffer->pCmdBase == nullptr)
2618             {
2619                 return MOS_STATUS_NULL_POINTER;
2620             }
2621         }
2622     }
2623 
2624     return eStatus;
2625 }
2626 
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)2627 MOS_STATUS CodechalVdencVp9StateG11::GetCommandBuffer(
2628     PMOS_COMMAND_BUFFER cmdBuffer)
2629 {
2630     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2631 
2632     CODECHAL_ENCODE_FUNCTION_ENTER;
2633 
2634     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2635 
2636     if (UseLegacyCommandBuffer())        // legacy mode
2637     {
2638         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
2639     }
2640     else    // virtual engine
2641     {
2642         if (m_osInterface->phasedSubmission)
2643         {
2644             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
2645             *cmdBuffer = m_realCmdBuffer;
2646         }
2647         else
2648         {
2649             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
2650 
2651             int currentPipe = GetCurrentPipe();
2652             int currentPass = GetCurrentPass();
2653             if (currentPipe < 0 || currentPipe >= m_numPipe)
2654             {
2655                 return MOS_STATUS_INVALID_PARAMETER;
2656             }
2657             uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2658             *cmdBuffer = m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex];
2659         }
2660     }
2661 
2662     return eStatus;
2663 }
2664 
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)2665 MOS_STATUS CodechalVdencVp9StateG11::ReturnCommandBuffer(
2666     PMOS_COMMAND_BUFFER cmdBuffer)
2667 {
2668     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2669 
2670     CODECHAL_ENCODE_FUNCTION_ENTER;
2671 
2672     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2673 
2674     if (UseLegacyCommandBuffer())        // legacy mode
2675     {
2676         m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
2677     }
2678     else    // virtual engine
2679     {
2680         if (m_osInterface->phasedSubmission)
2681         {
2682             m_realCmdBuffer = *cmdBuffer;
2683             m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
2684         }
2685         else
2686         {
2687             int currentPipe = GetCurrentPipe();
2688             int currentPass = GetCurrentPass();
2689             if (currentPipe < 0 || currentPipe >= m_numPipe)
2690             {
2691                 return MOS_STATUS_INVALID_PARAMETER;
2692             }
2693 
2694             if (eStatus == MOS_STATUS_SUCCESS)
2695             {
2696                 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2697                 m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex] = *cmdBuffer;
2698                 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
2699             }
2700         }
2701     }
2702 
2703     return eStatus;
2704 }
2705 
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)2706 MOS_STATUS CodechalVdencVp9StateG11::SubmitCommandBuffer(
2707     PMOS_COMMAND_BUFFER cmdBuffer,
2708     bool bNullRendering)
2709 {
2710     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2711 
2712     CODECHAL_ENCODE_FUNCTION_ENTER;
2713 
2714     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2715 
2716     if (UseLegacyCommandBuffer())        // legacy mode
2717     {
2718         if (!IsRenderContext()) // Set VE Hints for video contexts only
2719         {
2720             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
2721         }
2722         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
2723     }
2724     else // virtual engine
2725     {
2726         if (m_osInterface->phasedSubmission)
2727         {
2728             CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(),&m_realCmdBuffer);
2729             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
2730         }
2731         else
2732         {
2733             if (!IsLastPipe())
2734             {
2735                 return eStatus;
2736             }
2737             int currentPass = GetCurrentPass();
2738             for (auto i = 0; i < m_numPipe; i++)
2739             {
2740                 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2741                 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex];
2742 
2743                 if (cmdBuffer->pCmdBase)
2744                 {
2745                     m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
2746                 }
2747 
2748                 cmdBuffer->pCmdBase = 0;
2749                 cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
2750             }
2751 
2752             if (eStatus == MOS_STATUS_SUCCESS)
2753             {
2754                 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
2755                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
2756             }
2757         }
2758     }
2759 
2760     return eStatus;
2761 }
2762 
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)2763 MOS_STATUS CodechalVdencVp9StateG11::SendPrologWithFrameTracking(
2764     PMOS_COMMAND_BUFFER cmdBuffer,
2765     bool frameTrackingRequested,
2766     MHW_MI_MMIOREGISTERS *mmioRegister)
2767 {
2768     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2769 
2770     CODECHAL_ENCODE_FUNCTION_ENTER;
2771 
2772     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2773 
2774     if (IsRenderContext()) //Render context only
2775     {
2776         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested));
2777         return eStatus;
2778     }
2779     else // Legacy mode or virtual engine mode
2780     {
2781         if (!IsLastPipe())
2782         {
2783             return eStatus;
2784         }
2785         PMOS_COMMAND_BUFFER commandBufferInUse;
2786         if (m_realCmdBuffer.pCmdBase && m_scalableMode)
2787         {
2788             commandBufferInUse = &m_realCmdBuffer; //virtual engine mode
2789         }
2790         else
2791         {
2792             if (cmdBuffer && cmdBuffer->pCmdBase)
2793             {
2794                 commandBufferInUse = cmdBuffer; //legacy mode
2795             }
2796             else
2797             {
2798                 eStatus = MOS_STATUS_INVALID_PARAMETER;
2799                 return eStatus;
2800             }
2801         }
2802         commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
2803         commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
2804         commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
2805         commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
2806         commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
2807         if (frameTrackingRequested && m_frameTrackingEnabled)
2808         {
2809             commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
2810             commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
2811                 &m_encodeStatusBuf.resStatusBuffer;
2812             commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
2813             // Set media frame tracking address offset(the offset from the encoder status buffer page)
2814             commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
2815         }
2816 
2817         MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
2818         MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
2819         genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
2820         genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
2821         genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
2822         genericPrologParams.dwStoreDataValue = m_storeData - 1;
2823         CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
2824     }
2825 
2826     return eStatus;
2827 }
2828 
SetSemaphoreMem(PMOS_RESOURCE semaphoreMem,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t value)2829 MOS_STATUS CodechalVdencVp9StateG11::SetSemaphoreMem(
2830     PMOS_RESOURCE               semaphoreMem,
2831     PMOS_COMMAND_BUFFER         cmdBuffer,
2832     uint32_t                    value)
2833 {
2834     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2835 
2836     CODECHAL_ENCODE_FUNCTION_ENTER;
2837 
2838     CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
2839     MHW_MI_STORE_DATA_PARAMS storeDataParams;
2840     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2841     storeDataParams.pOsResource = semaphoreMem;
2842     storeDataParams.dwResourceOffset = 0;
2843     storeDataParams.dwValue = value;
2844 
2845     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2846         cmdBuffer,
2847         &storeDataParams));
2848 
2849     return eStatus;
2850 }
2851 
SendHWWaitCommand(PMOS_RESOURCE semaphoreMem,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t value)2852 MOS_STATUS CodechalVdencVp9StateG11::SendHWWaitCommand(
2853     PMOS_RESOURCE               semaphoreMem,
2854     PMOS_COMMAND_BUFFER         cmdBuffer,
2855     uint32_t                    value)
2856 {
2857     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2858 
2859     CODECHAL_ENCODE_FUNCTION_ENTER;
2860     CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
2861 
2862     MHW_MI_SEMAPHORE_WAIT_PARAMS semaphoreWaitParams;
2863     MOS_ZeroMemory(&semaphoreWaitParams, sizeof(semaphoreWaitParams));
2864     semaphoreWaitParams.presSemaphoreMem = semaphoreMem;
2865     semaphoreWaitParams.bPollingWaitMode = true;
2866     semaphoreWaitParams.dwSemaphoreData = value;
2867     semaphoreWaitParams.CompareOperation = MHW_MI_SAD_EQUAL_SDD;
2868     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiSemaphoreWaitCmd(cmdBuffer, &semaphoreWaitParams));
2869 
2870     return eStatus;
2871 }
2872 
SetDmemHuCPakInt()2873 MOS_STATUS CodechalVdencVp9StateG11::SetDmemHuCPakInt()
2874 {
2875     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2876 
2877     CODECHAL_ENCODE_FUNCTION_ENTER;
2878 
2879     uint8_t currPass = (uint8_t)GetCurrentPass();
2880 
2881     MOS_LOCK_PARAMS lockFlags;
2882     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2883     lockFlags.WriteOnly = 1;
2884     // All bytes in below dmem for fields not used by VP9 to be set to 0xFF.
2885     HucPakIntDmem* dmem = (HucPakIntDmem*)m_osInterface->pfnLockResource(
2886         m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass], &lockFlags);
2887     CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
2888 
2889     MOS_ZeroMemory(dmem, sizeof(HucPakIntDmem));
2890     // CODECHAL_VDENC_VP9_PAK_INT_DMEM_OFFSETS_SIZE size of offsets in the CODECHAL_VDENC_VP9_HUC_PAK_INT_DMEM struct.
2891     // Reset offsets to 0xFFFFFFFF as unavailable
2892     memset(dmem, 0xFF, m_pakIntDmemOffsetsSize);
2893 
2894     dmem->totalSizeInCommandBuffer = 0;
2895     dmem->offsetInCommandBuffer = 0xFFFF; // Not used for VP9, all bytes in dmem for fields not used are 0xFF
2896     dmem->picWidthInPixel = (uint16_t)m_frameWidth;
2897     dmem->picHeightInPixel = (uint16_t)m_frameHeight;
2898     dmem->totalNumberOfPaks = m_numPipe;
2899     dmem->codec = m_pakIntVp9CodecId;
2900     dmem->maxPass = m_brcMaxNumPasses; // Only VDEnc CQP and BRC
2901     dmem->currentPass = currPass + 1;
2902     dmem->lastTileBSStartInBytes = 0xFFFF;
2903     dmem->picStateStartInBytes = 0xFFFF;
2904 
2905     // Offset 0 is for region 1 - output of integrated frame stats from PAK integration kernel
2906 
2907     dmem->tileSizeRecordOffset[0] = m_frameStatsOffset.tileSizeRecord;
2908     dmem->vdencStatOffset[0] = m_frameStatsOffset.vdencStats;
2909     dmem->vp9PakStatOffset[0] = m_frameStatsOffset.pakStats;
2910     dmem->vp9CounterBufferOffset[0] = m_frameStatsOffset.counterBuffer;
2911 
2912     //Offset 1 - 4 is for region 0 - Input to PAK integration kernel for all tile statistics per pipe
2913     for (auto i = 1; i <= m_numPipe; i++)
2914     {
2915         dmem->numTiles[i - 1] = (GetNumTilesInFrame()) / m_numPipe;
2916         dmem->tileSizeRecordOffset[i] = m_tileStatsOffset.tileSizeRecord + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.tileSizeRecord);
2917         dmem->vdencStatOffset[i] = m_tileStatsOffset.vdencStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.vdencStats);
2918         dmem->vp9PakStatOffset[i] = m_tileStatsOffset.pakStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.pakStats);
2919         dmem->vp9CounterBufferOffset[i] = m_tileStatsOffset.counterBuffer + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.counterBuffer);
2920     }
2921     m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass]);
2922 
2923     return eStatus;
2924 }
2925 
HuCVp9PakInt(PMOS_COMMAND_BUFFER cmdBuffer)2926 MOS_STATUS CodechalVdencVp9StateG11::HuCVp9PakInt(
2927     PMOS_COMMAND_BUFFER cmdBuffer)
2928 {
2929     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2930 
2931     CODECHAL_ENCODE_FUNCTION_ENTER;
2932 
2933     if (!IsFirstPipe())
2934     {
2935         return eStatus;
2936     }
2937 
2938     CODECHAL_DEBUG_TOOL(
2939         uint32_t    hucRegionSize[16]   = { 0 };
2940         const char* hucRegionName[16] = { "\0" };
2941 
2942         hucRegionName[0] = "_MultiPakStreamout_input";
2943         hucRegionSize[0] = m_tileStatsPakIntegrationBufferSize;
2944         hucRegionName[1] = "_IntegratedStreamout_output";
2945         hucRegionSize[1] = m_frameStatsPakIntegrationBufferSize;
2946         hucRegionName[4] = "_BitStream_input";
2947         hucRegionSize[4] = MOS_ALIGN_CEIL(m_bitstreamUpperBound, CODECHAL_PAGE_SIZE);
2948         hucRegionName[5] = "_BitStream_output";
2949         hucRegionSize[5] = MOS_ALIGN_CEIL(m_bitstreamUpperBound, CODECHAL_PAGE_SIZE);
2950         hucRegionName[6] = "_HistoryBufferOutput";
2951         hucRegionSize[6] = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
2952         hucRegionName[7] = "_HCPPICSTATEInputDummy";
2953         hucRegionSize[7] = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
2954         hucRegionName[8] = "_HCPPICSTATEOutputDummy";
2955         hucRegionSize[8] = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
2956         hucRegionName[9] = "_BrcDataOutputBuffer"; // This is the pak MMIO region 7 , not 4, of BRC update
2957         hucRegionSize[9] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
2958         hucRegionName[15] = "_TileRecordBuffer"; // This is the pak MMIO region 7 , not 4, of BRC update
2959         hucRegionSize[15] = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
2960     )
2961 
2962     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
2963     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
2964     imemParams.dwKernelDescriptor =  m_vdboxHucPakIntegrationKernelDescriptor;
2965     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(cmdBuffer, &imemParams));
2966 
2967     // pipe mode select
2968     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
2969     pipeModeSelectParams.Mode = m_mode;
2970     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
2971 
2972     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakInt());
2973 
2974     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
2975     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
2976     dmemParams.presHucDataSource = &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()];
2977     dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
2978     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
2979     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
2980 
2981     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
2982     MOS_ZeroMemory(&virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
2983     virtualAddrParams.regionParams[0].presRegion = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
2984     virtualAddrParams.regionParams[0].dwOffset = 0;
2985     virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource; // Region 1 - HuC Frame statistics output
2986     virtualAddrParams.regionParams[1].isWritable = true;
2987     virtualAddrParams.regionParams[4].presRegion = &m_hucPakIntDummyBuffer;             // Region 4 - Not used for VP9
2988     virtualAddrParams.regionParams[5].presRegion = &m_hucPakIntDummyBuffer;             // Region 5 - Not used for VP9
2989     virtualAddrParams.regionParams[5].isWritable = true;
2990     virtualAddrParams.regionParams[6].presRegion = &m_hucPakIntDummyBuffer;             // Region 6 - Not used for VP9
2991     virtualAddrParams.regionParams[6].isWritable = true;
2992     virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntDummyBuffer;             // Region 7 - Not used for VP9
2993     virtualAddrParams.regionParams[8].presRegion = &m_hucPakIntDummyBuffer;             // Region 8 - Not used for VP9
2994     virtualAddrParams.regionParams[8].isWritable = true;
2995     virtualAddrParams.regionParams[9].presRegion = &m_hucPakIntBrcDataBuffer;           // Region 9 - HuC outputs BRC data
2996     virtualAddrParams.regionParams[9].isWritable = true;
2997     virtualAddrParams.regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource;          // Region 15 [In/Out] - Tile Record Buffer
2998     virtualAddrParams.regionParams[15].dwOffset   = 0;
2999 
3000     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
3001 
3002     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
3003 
3004     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(cmdBuffer, true));
3005 
3006     // wait Huc completion (use HEVC bit for now)
3007     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3008     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3009     vdPipeFlushParams.Flags.bFlushHEVC = 1;
3010     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3011     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
3012 
3013     // Flush the engine to ensure memory written out
3014     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3015     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3016     flushDwParams.bVideoPipelineCacheInvalidate = true;
3017     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
3018 
3019     return eStatus;
3020 }
3021 
ConstructPicStateBatchBuf(PMOS_RESOURCE picStateBuffer)3022 MOS_STATUS CodechalVdencVp9StateG11::ConstructPicStateBatchBuf(
3023     PMOS_RESOURCE picStateBuffer)
3024 {
3025     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3026 
3027     CODECHAL_ENCODE_FUNCTION_ENTER;
3028 
3029     CODECHAL_ENCODE_CHK_NULL_RETURN(picStateBuffer);
3030 
3031     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hucCmdInitializer);
3032 
3033     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CommandInitializerSetVp9Params(this));
3034 
3035     MOS_COMMAND_BUFFER cmdBuffer;
3036     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3037 
3038     if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
3039     {
3040         // Send command buffer header at the beginning (OS dependent)
3041         bool requestFrameTracking = false;
3042         //For Superframes, there is an extra submission at the end, so submit with frame tracking there
3043         if (!m_vp9PicParams->PicFlags.fields.super_frame) {
3044             requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
3045         }
3046         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3047         m_firstTaskInPhase = false;
3048     }
3049 
3050     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerVp9Execute(&cmdBuffer, picStateBuffer));
3051 
3052     if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
3053     {
3054         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3055     }
3056 
3057     ReturnCommandBuffer(&cmdBuffer);
3058 
3059     if (!m_singleTaskPhaseSupported)
3060     {
3061         bool renderFlags = m_videoContextUsesNullHw;
3062         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
3063     }
3064 
3065     MOS_LOCK_PARAMS lockFlagsWriteOnly;
3066     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3067     lockFlagsWriteOnly.WriteOnly = 1;
3068     uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, picStateBuffer, &lockFlagsWriteOnly);
3069     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3070 
3071     MOS_COMMAND_BUFFER constructedCmdBuf;
3072     MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
3073     constructedCmdBuf.pCmdBase  = (uint32_t *)data;
3074     constructedCmdBuf.pCmdPtr   = (uint32_t *)data;
3075     constructedCmdBuf.iOffset   = 0;
3076     constructedCmdBuf.iRemaining = m_vdencPicStateSecondLevelBatchBufferSize;
3077 
3078     // HCP_VP9_PIC_STATE
3079     MHW_VDBOX_VP9_ENCODE_PIC_STATE picState;
3080     MOS_ZeroMemory(&picState, sizeof(picState));
3081     picState.pVp9PicParams                    = m_vp9PicParams;
3082     picState.pVp9SeqParams                    = m_vp9SeqParams;
3083     picState.ppVp9RefList                     = &(m_refList[0]);
3084     picState.PrevFrameParams.fields.KeyFrame  = m_prevFrameInfo.KeyFrame;
3085     picState.PrevFrameParams.fields.IntraOnly = m_prevFrameInfo.IntraOnly;
3086     picState.PrevFrameParams.fields.Display   = m_prevFrameInfo.ShowFrame;
3087     picState.dwPrevFrmWidth                   = m_prevFrameInfo.FrameWidth;
3088     picState.dwPrevFrmHeight                  = m_prevFrameInfo.FrameHeight;
3089     picState.ucTxMode                         = m_txMode;
3090     picState.bSSEEnable                       = m_vdencBrcEnabled;
3091     picState.bUseDysRefSurface                = (m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled;
3092     picState.bVdencPakOnlyPassFlag            = m_vdencPakonlyMultipassEnabled;
3093     picState.uiMaxBitRate                     = m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
3094     picState.uiMinBitRate                     = m_vp9SeqParams->MinBitRate * CODECHAL_ENCODE_BRC_KBPS;
3095     constructedCmdBuf.iOffset += m_cmd1Size;
3096     m_hucPicStateOffset = (uint16_t)constructedCmdBuf.iOffset;
3097     constructedCmdBuf.pCmdPtr += constructedCmdBuf.iOffset/sizeof(uint32_t);
3098     eStatus = m_hcpInterface->AddHcpVp9PicStateEncCmd(&constructedCmdBuf, nullptr, &picState);
3099     if (eStatus != MOS_STATUS_SUCCESS)
3100     {
3101         m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
3102         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add HCP_VP9_PIC_STATE command.");
3103         return eStatus;
3104     }
3105 
3106     // HCP_VP9_SEGMENT_STATE
3107     MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
3108     MOS_ZeroMemory(&segmentState, sizeof(segmentState));
3109     segmentState.Mode                    = m_mode;
3110     segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
3111     uint8_t segmentCount                 = (m_vp9PicParams->PicFlags.fields.segmentation_enabled) ? CODEC_VP9_MAX_SEGMENTS : 1;
3112 
3113     for (uint8_t i = 0; i < segmentCount; i++)
3114     {
3115         segmentState.ucCurrentSegmentId = i;
3116         eStatus = m_hcpInterface->AddHcpVp9SegmentStateCmd(&constructedCmdBuf, nullptr, &segmentState);
3117         if (eStatus != MOS_STATUS_SUCCESS)
3118         {
3119             m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
3120             CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MHW_VDBOX_VP9_SEGMENT_STATE command.");
3121             return eStatus;
3122         }
3123     }
3124 
3125     // Adjust cmd buffer offset to have 8 segment state blocks
3126     if (segmentCount < CODEC_VP9_MAX_SEGMENTS)
3127     {
3128         // Max 7 segments, 32 bytes each
3129         uint8_t zeroBlock[m_segmentStateBlockSize * (CODEC_VP9_MAX_SEGMENTS - 1)];
3130         MOS_ZeroMemory(zeroBlock, sizeof(zeroBlock));
3131         Mhw_AddCommandCmdOrBB(m_osInterface, &constructedCmdBuf, nullptr, zeroBlock, (CODEC_VP9_MAX_SEGMENTS - segmentCount) * m_segmentStateBlockSize);
3132     }
3133     m_slbbImgStateOffset = (uint16_t)constructedCmdBuf.iOffset;
3134     constructedCmdBuf.iOffset += m_cmd2Size;
3135     constructedCmdBuf.pCmdPtr += m_cmd2Size/ sizeof(uint32_t);
3136 
3137     // BB_END
3138     eStatus = m_miInterface->AddMiBatchBufferEnd(&constructedCmdBuf, nullptr);
3139     if (eStatus != MOS_STATUS_SUCCESS)
3140     {
3141         m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
3142         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MI Batch Buffer End command.");
3143         return eStatus;
3144     }
3145     m_hucSlbbSize = (uint16_t)constructedCmdBuf.iOffset;
3146 
3147     m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
3148 
3149     return eStatus;
3150 }
3151 
HuCVp9Prob()3152 MOS_STATUS CodechalVdencVp9StateG11::HuCVp9Prob()
3153 {
3154     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3155 
3156     CODECHAL_ENCODE_FUNCTION_ENTER;
3157     if (!IsFirstPipe())
3158     {
3159         return eStatus;
3160     }
3161 
3162     CODECHAL_DEBUG_TOOL(
3163         uint32_t hucRegionSize[16] = { 0 };
3164         const char* hucRegionName[16] = { "\0" };
3165 
3166         hucRegionName[0] = "_UpdatedProbBuffer";   // hucRegionName[0] is used to dump region 0 after HuC is run, which has updated probabilities. Input Region 0 is dumped separetely before HuC.
3167         hucRegionSize[0] = 32 * CODECHAL_CACHELINE_SIZE;
3168         hucRegionName[1] = "_CountersBuffer";
3169         hucRegionSize[1] = 193 * CODECHAL_CACHELINE_SIZE;
3170         hucRegionName[2] = "_ProbBuffer";
3171         hucRegionSize[2] = 32 * CODECHAL_CACHELINE_SIZE;
3172         hucRegionName[3] = "_ProbDeltaBuffer";
3173         hucRegionSize[3] = 29 * CODECHAL_CACHELINE_SIZE;
3174         hucRegionName[4] = "_UncompressedHdr";
3175         hucRegionSize[4] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
3176         hucRegionName[5] = "_CompressedHdr";
3177         hucRegionSize[5] = 32 * CODECHAL_CACHELINE_SIZE;
3178         hucRegionName[6] = "_SecondLevelBatchBuffer";
3179         hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
3180         hucRegionName[7] = "_SecondLevelBatchBuffer";
3181         hucRegionSize[7] = m_vdencPicStateSecondLevelBatchBufferSize;
3182         hucRegionName[8] = "_UncompressedHdr";
3183         hucRegionSize[8] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
3184         hucRegionName[9] = "_DefaultProbs";
3185         hucRegionSize[9] = sizeof(Keyframe_Default_Probs) + sizeof(Inter_Default_Probs);
3186         hucRegionName[10] = "_SuperFrameBuffer";
3187         hucRegionSize[10] = CODECHAL_ENCODE_VP9_BRC_SUPER_FRAME_BUFFER_SIZE;
3188         hucRegionName[11] = "_DataExtension";
3189         hucRegionSize[11] = CODECHAL_ENCODE_VP9_VDENC_DATA_EXTENSION_SIZE;
3190     )
3191 
3192     MOS_COMMAND_BUFFER cmdBuffer;
3193     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3194 
3195     if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
3196     {
3197         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3198         // Send command buffer header at the beginning (OS dependent)
3199         // frame tracking tag is only added in the last command buffer header
3200         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
3201         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3202         m_firstTaskInPhase = false;
3203     }
3204     int currPass = GetCurrentPass();
3205 
3206     // load kernel from WOPCM into L2 storage RAM
3207     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
3208     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
3209     imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencProbKernelDescriptor;
3210     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
3211 
3212     // pipe mode select
3213     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
3214     pipeModeSelectParams.Mode = m_mode;
3215     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
3216 
3217     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCVp9Prob());
3218 
3219     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
3220     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
3221     dmemParams.presHucDataSource = &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx];
3222     dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucProbDmem), CODECHAL_CACHELINE_SIZE);
3223     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
3224     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
3225 
3226     // Add Virtual addr
3227     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
3228     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3229     // Input regions
3230     virtualAddrParams.regionParams[0].presRegion = &m_resProbBuffer[m_vp9PicParams->PicFlags.fields.frame_context_idx];
3231     virtualAddrParams.regionParams[0].isWritable = true;        // Region 0 is both read and write for HuC. Has input probabilities before running HuC and updated probabilities after running HuC, which will then be input to next pass
3232     if (m_scalableMode)
3233     {
3234         virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3235         virtualAddrParams.regionParams[1].dwOffset = m_frameStatsOffset.counterBuffer;
3236     }
3237     else
3238     {
3239         virtualAddrParams.regionParams[1].presRegion = &m_resProbabilityCounterBuffer;
3240         virtualAddrParams.regionParams[1].dwOffset = 0;
3241     }
3242     // If BRC enabled, BRC Pass 2 output SLBB -> input SLBB for HPU on pass 3 (HPU pass 1 and 3. BRC Update pass 1 and 2)
3243     //                 BRC Pass 1 output SLBB -> input SLBB for HPU on pass 1
3244     // If BRC not on , Driver prepared SLBB   -> input to HPU on both passes
3245 
3246     if (m_vdencBrcEnabled)
3247     {
3248         virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
3249     }
3250     else
3251     {
3252         virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
3253     }
3254 
3255     virtualAddrParams.regionParams[8].presRegion = &m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
3256     virtualAddrParams.regionParams[9].presRegion = &m_resHucDefaultProbBuffer;
3257 
3258     // Output regions
3259     virtualAddrParams.regionParams[2].presRegion  = &m_resHucProbOutputBuffer;  // Final probability output from HuC after each pass
3260     virtualAddrParams.regionParams[2].isWritable = true;
3261     virtualAddrParams.regionParams[3].presRegion  = &m_resProbabilityDeltaBuffer;
3262     virtualAddrParams.regionParams[3].isWritable = true;
3263     virtualAddrParams.regionParams[4].presRegion  = &m_resHucPakInsertUncompressedHeaderWriteBuffer;
3264     virtualAddrParams.regionParams[4].isWritable = true;
3265     virtualAddrParams.regionParams[5].presRegion  = &m_resCompressedHeaderBuffer;
3266     virtualAddrParams.regionParams[5].isWritable = true;
3267     virtualAddrParams.regionParams[6].presRegion  = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
3268     virtualAddrParams.regionParams[6].isWritable = true;
3269     virtualAddrParams.regionParams[10].presRegion = &m_resBitstreamBuffer;
3270     virtualAddrParams.regionParams[10].isWritable = true;
3271     virtualAddrParams.regionParams[11].presRegion = &m_resVdencDataExtensionBuffer;
3272     virtualAddrParams.regionParams[11].isWritable = true;
3273 
3274     m_hpuVirtualAddrParams = virtualAddrParams;
3275     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
3276     // Store HUC_STATUS2 register bit 6 before HUC_Start command
3277     // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
3278     // (HUC_Start command with last start bit set).
3279     CODECHAL_DEBUG_TOOL(
3280         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
3281     )
3282 
3283     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
3284 
3285     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
3286 
3287     // wait Huc completion (use HEVC bit for now)
3288     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3289     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3290     vdPipeFlushParams.Flags.bFlushHEVC = 1;
3291     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3292     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
3293 
3294     // Flush the engine to ensure memory written out
3295     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3296     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3297     flushDwParams.bVideoPipelineCacheInvalidate = true;
3298     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3299 
3300     // Write HUC_STATUS mask: DW1 (mask value)
3301     MHW_MI_STORE_DATA_PARAMS storeDataParams;
3302     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3303     storeDataParams.pOsResource = &m_resHucPakMmioBuffer;
3304     storeDataParams.dwResourceOffset = sizeof(uint32_t);
3305     storeDataParams.dwValue = 1 << 31; //Repak bit for HUC is bit 31
3306     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
3307 
3308     // store HUC_STATUS register
3309     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
3310     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
3311     storeRegParams.presStoreBuffer = &m_resHucPakMmioBuffer;
3312     storeRegParams.dwOffset = 0;
3313     storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1)->hucStatusRegOffset;
3314     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
3315 
3316     // For superframe pass, after HuC executes, write the updated size (combined frame size) to status report
3317     // So app knows total size instead of just the showframe size
3318     if (m_superFrameHucPass)
3319     {
3320         EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
3321         uint32_t baseOffset =
3322             (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) +
3323             sizeof(uint32_t) * 2;  // encodeStatus is offset by 2 DWs in the resource
3324 
3325         MHW_MI_COPY_MEM_MEM_PARAMS copyMemMemParams;
3326         MOS_ZeroMemory(&copyMemMemParams, sizeof(copyMemMemParams));
3327 
3328         copyMemMemParams.presSrc = virtualAddrParams.regionParams[11].presRegion;
3329         copyMemMemParams.dwSrcOffset = 0; // Updated framesize is 1st DW in buffer
3330         copyMemMemParams.presDst = &encodeStatusBuf->resStatusBuffer;
3331         copyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf->dwBSByteCountOffset;
3332 
3333         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(
3334             &cmdBuffer,
3335             &copyMemMemParams));
3336     }
3337 
3338     if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
3339     {
3340         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3341         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3342     }
3343 
3344     // Dump input probabilites before running HuC
3345     CODECHAL_DEBUG_TOOL(
3346        m_debugInterface->DumpHucRegion(
3347             virtualAddrParams.regionParams[0].presRegion,
3348             0,
3349             hucRegionSize[0],
3350             0,
3351             "_ProbBuffer",
3352             (virtualAddrParams.regionParams[0].isWritable ? true : false),
3353             currPass,
3354             CodechalHucRegionDumpType::hucRegionDumpHpu);
3355     )
3356 
3357     ReturnCommandBuffer(&cmdBuffer);
3358 
3359     // For Temporal scaling, super frame pass is initiated after the command buffer submission in ExecuteSliceLevel.
3360     // So if Single Task Phase is enabled, then we need to explicitly submit the command buffer here to call HuC
3361     if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
3362     {
3363         bool renderFlags = m_videoContextUsesNullHw;
3364 
3365         CODECHAL_DEBUG_TOOL(
3366             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
3367             &cmdBuffer,
3368             CODECHAL_NUM_MEDIA_STATES,
3369             ((currPass == 0)? "HPU_Pass0":"HPU_Pass1"))));
3370 
3371         if (m_superFrameHucPass) {
3372             CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3373             //For superframe submission, this is the last submission so add frame tracking header
3374             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, m_vp9PicParams->PicFlags.fields.super_frame));
3375             ReturnCommandBuffer(&cmdBuffer);
3376         }
3377 
3378         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
3379 
3380         CODECHAL_DEBUG_TOOL(
3381         if(m_superFrameHucPass)
3382         {
3383             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3384                 &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx],
3385                 sizeof(HucProbDmem),
3386                 currPass,
3387                 CodechalHucRegionDumpType::hucRegionDumpHpuSuperFrame));
3388         }
3389         else
3390         {
3391             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3392                 &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx],
3393                 sizeof(HucProbDmem),
3394                 currPass,
3395                 CodechalHucRegionDumpType::hucRegionDumpHpu));
3396         }
3397 
3398             for (auto i = 0; i < 16; i++) {
3399                 if (virtualAddrParams.regionParams[i].presRegion)
3400                 {
3401                     if (m_scalableMode && m_isTilingSupported && virtualAddrParams.regionParams[i].isWritable && i != 11)
3402                     {
3403                         continue;
3404                     }
3405                     m_debugInterface->DumpHucRegion(
3406                         virtualAddrParams.regionParams[i].presRegion,
3407                         virtualAddrParams.regionParams[i].dwOffset,
3408                         hucRegionSize[i],
3409                         i,
3410                         hucRegionName[i],
3411                         !virtualAddrParams.regionParams[i].isWritable,
3412                         currPass,
3413                         CodechalHucRegionDumpType::hucRegionDumpHpu);
3414                 }
3415             })
3416     }
3417 
3418     return eStatus;
3419 }
3420 
3421 /*----------------------------------------------------------------------------
3422 | Name      : HuCBrcUpdate
3423 | Purpose   : Start/Submit VP9 HuC BrcUpdate kernel to HW
3424 |
3425 | Returns   : MOS_STATUS
3426 \---------------------------------------------------------------------------*/
HuCBrcUpdate()3427 MOS_STATUS CodechalVdencVp9StateG11::HuCBrcUpdate()
3428 {
3429     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3430 
3431     CODECHAL_ENCODE_FUNCTION_ENTER;
3432 
3433     int currPass = GetCurrentPass();
3434 
3435     CODECHAL_DEBUG_TOOL(
3436         uint32_t hucRegionSize[16];
3437         const char* hucRegionName[16];
3438 
3439         hucRegionName[0] = "_BrcHistory";
3440         hucRegionSize[0] = m_brcHistoryBufferSize;
3441         hucRegionName[1] = "_VDEncStats";
3442         hucRegionSize[1] = m_vdencBrcStatsBufferSize;
3443         hucRegionName[2] = "_PAKStats";
3444         hucRegionSize[2] = m_vdencBrcPakStatsBufferSize;
3445         hucRegionName[3] = "_InputSLBB";
3446         hucRegionSize[3] = m_vdencPicStateSecondLevelBatchBufferSize;
3447         hucRegionName[4] = "_BRCData";
3448         hucRegionSize[4] = CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE;
3449         hucRegionName[5] = "_ConstData";
3450         hucRegionSize[5] = m_brcConstantSurfaceSize;
3451         hucRegionName[6] = "_OutputSLBB";
3452         hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
3453         hucRegionName[7] = "_PAKMMIO";
3454         hucRegionSize[7] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
3455     )
3456 
3457     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
3458 #if (_DEBUG || _RELEASE_INTERNAL)
3459     if (m_swBrcMode)
3460     {
3461         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
3462         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType));
3463         // Set region params for dumping only
3464         MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3465         virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
3466         virtualAddrParams.regionParams[0].isWritable = true;
3467         virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
3468         virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
3469         virtualAddrParams.regionParams[3].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
3470         virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
3471         virtualAddrParams.regionParams[4].isWritable = true;
3472         virtualAddrParams.regionParams[5].presRegion = GetBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType);
3473         virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
3474         virtualAddrParams.regionParams[6].isWritable = true;
3475         virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
3476         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(// Dump history IN since it's both IN/OUT, OUT will dump at end of function, rest of buffers are IN XOR OUT (not both)
3477             virtualAddrParams.regionParams[0].presRegion,
3478             virtualAddrParams.regionParams[0].dwOffset,
3479             hucRegionSize[0],
3480             0,
3481             hucRegionName[0],
3482             true,
3483             currPass,
3484             CodechalHucRegionDumpType::hucRegionDumpUpdate));
3485         CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(true));
3486 
3487         CODECHAL_DEBUG_TOOL(
3488             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3489                 &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx],
3490                 sizeof(HucBrcUpdateDmem),  // Change buffer and size to update dmem
3491                 currPass,
3492                 CodechalHucRegionDumpType::hucRegionDumpUpdate));
3493 
3494             for (auto i = 0; i < 16; i++) {
3495                 if (virtualAddrParams.regionParams[i].presRegion)
3496                 {
3497                     m_debugInterface->DumpHucRegion(
3498                         virtualAddrParams.regionParams[i].presRegion,
3499                         virtualAddrParams.regionParams[i].dwOffset,
3500                         hucRegionSize[i],
3501                         i,
3502                         hucRegionName[i],
3503                         !virtualAddrParams.regionParams[i].isWritable,
3504                         currPass,
3505                         CodechalHucRegionDumpType::hucRegionDumpUpdate);
3506                 }
3507             });
3508         // We increment by the average frame value once for each frame
3509         if (IsFirstPass())
3510         {
3511             m_curTargetFullness += m_inputBitsPerFrame;
3512         }
3513 
3514         return eStatus;
3515     }
3516 #endif
3517 
3518     MOS_COMMAND_BUFFER cmdBuffer;
3519     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3520 
3521     if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit)) && !m_scalableMode)
3522     {
3523         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3524         // Send command buffer header at the beginning (OS dependent)
3525         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
3526         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3527 
3528         m_firstTaskInPhase = false;
3529     }
3530 
3531     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType));
3532 
3533     // load kernel from WOPCM into L2 storage RAM
3534     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
3535     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
3536     imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcUpdateKernelDescriptor;
3537     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
3538 
3539     // pipe mode select
3540     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
3541     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
3542 
3543     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
3544 
3545     // set HuC DMEM param
3546     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
3547     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
3548     dmemParams.presHucDataSource = &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx];
3549     dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcUpdateDmem), CODECHAL_CACHELINE_SIZE);
3550     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; // how to set?
3551     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
3552 
3553     // Set surfaces to HuC regions
3554     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3555 
3556     // History Buffer - IN/OUT
3557     virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
3558     virtualAddrParams.regionParams[0].isWritable = true;
3559     if (IsFirstPass()) // First BRC pass needs stats from last frame
3560     {
3561         if (m_lastFrameScalableMode) // Frame (n-1) Scalable mode stats output -> input for frame n, BRC Pass 0
3562         {
3563             // VDEnc Stats Buffer - IN
3564             virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3565             virtualAddrParams.regionParams[1].dwOffset = m_tileStatsOffset.vdencStats;
3566             // Frame (not PAK) Stats Buffer - IN
3567             virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3568             virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
3569             // PAK MMIO - IN
3570             virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
3571         }
3572         else
3573         {
3574             virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
3575             virtualAddrParams.regionParams[1].dwOffset = 0;
3576             virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
3577             virtualAddrParams.regionParams[2].dwOffset = 0;
3578             virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
3579         }
3580     }
3581     else // Second BRC Update Pass
3582     {
3583         if (m_scalableMode)
3584         {
3585             // VDEnc Stats Buffer - IN
3586             virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3587             virtualAddrParams.regionParams[1].dwOffset = m_tileStatsOffset.vdencStats;
3588             // Frame (not PAK) Stats Buffer - IN
3589             virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
3590             virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
3591             // PAK MMIO - IN
3592             virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
3593         }
3594         else
3595         {
3596             virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
3597             virtualAddrParams.regionParams[1].dwOffset = 0;
3598             virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
3599             virtualAddrParams.regionParams[2].dwOffset = 0;
3600             virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
3601         }
3602     }
3603 
3604     //For Dys + BRC Pass 0, use the resVdencDysPictureState2ndLevelBatchBuffer as input buffer
3605     virtualAddrParams.regionParams[3].presRegion = (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled) ? &m_resVdencDysPictureState2NdLevelBatchBuffer : &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
3606     // BRC Data - OUT
3607     virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
3608     virtualAddrParams.regionParams[4].isWritable = true;
3609 
3610     // Const Data - IN
3611     virtualAddrParams.regionParams[5].presRegion = GetBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType);
3612 
3613     // Output SLBB - OUT
3614     virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
3615     virtualAddrParams.regionParams[6].isWritable = true;
3616 
3617     // Load HuC Regions into Cmd Buf
3618     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
3619 
3620     // Store HUC_STATUS2 register bit 6 before HUC_Start command
3621     // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
3622     // (HUC_Start command with last start bit set).
3623     CODECHAL_DEBUG_TOOL(
3624         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
3625     )
3626 
3627     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
3628 
3629     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
3630 
3631     // wait Huc completion (use HEVC bit for now)
3632     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3633     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3634     vdPipeFlushParams.Flags.bFlushHEVC = 1;
3635     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3636     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
3637 
3638     // Flush the engine to ensure memory written out
3639     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3640     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3641     flushDwParams.bVideoPipelineCacheInvalidate = true;
3642     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3643 
3644     if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
3645     {
3646         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3647         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3648     }
3649 
3650     ReturnCommandBuffer(&cmdBuffer);
3651 
3652     if (!m_singleTaskPhaseSupported)
3653     {
3654         bool renderingFlags = m_videoContextUsesNullHw;
3655 
3656         // Dump history input before HuC runs
3657         CODECHAL_DEBUG_TOOL(
3658             m_debugInterface->DumpHucRegion(
3659                 virtualAddrParams.regionParams[0].presRegion,
3660                 0,
3661                 hucRegionSize[0],
3662                 0,
3663                 hucRegionName[0],
3664                 true,
3665                 currPass,
3666                 CodechalHucRegionDumpType::hucRegionDumpUpdate);
3667         );
3668 
3669         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
3670 
3671         CODECHAL_DEBUG_TOOL(
3672             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3673                 &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx],
3674                 sizeof(HucBrcUpdateDmem),  // Change buffer and size to update dmem
3675                 currPass,
3676                 CodechalHucRegionDumpType::hucRegionDumpUpdate));
3677 
3678             for (auto i = 0; i < 16; i++) {
3679                 if (virtualAddrParams.regionParams[i].presRegion)
3680                 {
3681                     m_debugInterface->DumpHucRegion(
3682                         virtualAddrParams.regionParams[i].presRegion,
3683                         virtualAddrParams.regionParams[i].dwOffset,
3684                         hucRegionSize[i],
3685                         i,
3686                         hucRegionName[i],
3687                         !virtualAddrParams.regionParams[i].isWritable,
3688                         currPass,
3689                         CodechalHucRegionDumpType::hucRegionDumpUpdate);
3690                 }
3691             })
3692     }
3693 
3694     // We increment by the average frame value once for each frame
3695     if (IsFirstPass())
3696     {
3697         m_curTargetFullness += m_inputBitsPerFrame;
3698     }
3699 
3700     return eStatus;
3701 }
3702 
3703 /*----------------------------------------------------------------------------
3704 | Name      : HuCBrcInitReset
3705 | Purpose   : Start/Submit VP9 HuC BrcInit kernel to HW
3706 |
3707 | Returns   : MOS_STATUS
3708 \---------------------------------------------------------------------------*/
HuCBrcInitReset()3709 MOS_STATUS CodechalVdencVp9StateG11::HuCBrcInitReset()
3710 {
3711     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3712 
3713     CODECHAL_ENCODE_FUNCTION_ENTER;
3714 
3715     int currPass = GetCurrentPass();
3716 
3717     CODECHAL_DEBUG_TOOL(
3718         uint32_t hucRegionSize[16];
3719         const char* hucRegionName[16];
3720 
3721         hucRegionName[0] = "_BrcHistoryBuffer";
3722         hucRegionSize[0] = m_brcHistoryBufferSize;
3723     )
3724 
3725     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
3726 #if (_DEBUG || _RELEASE_INTERNAL)
3727     if (m_swBrcMode)
3728     {
3729         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
3730         CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(false));
3731         // Set region params for dumping only
3732         MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3733         virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
3734         virtualAddrParams.regionParams[0].isWritable = true;
3735         m_inputBitsPerFrame                          = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
3736         m_curTargetFullness                          = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
3737 
3738         CODECHAL_DEBUG_TOOL(
3739             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3740                 &m_resVdencBrcInitDmemBuffer,
3741                 sizeof(HucBrcInitDmem),
3742                 0,
3743                 CodechalHucRegionDumpType::hucRegionDumpInit));
3744 
3745             for (auto i = 0; i < 16; i++) {
3746                 if (virtualAddrParams.regionParams[i].presRegion)
3747                 {
3748                     m_debugInterface->DumpHucRegion(
3749                         virtualAddrParams.regionParams[i].presRegion,
3750                         virtualAddrParams.regionParams[i].dwOffset,
3751                         hucRegionSize[i],
3752                         i,
3753                         hucRegionName[i],
3754                         !virtualAddrParams.regionParams[i].isWritable,
3755                         currPass,
3756                         CodechalHucRegionDumpType::hucRegionDumpInit);
3757                 }
3758             })
3759         return eStatus;
3760     }
3761 #endif
3762     MOS_COMMAND_BUFFER cmdBuffer;
3763     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3764 
3765     if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
3766     {
3767         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3768         // Send command buffer header at the beginning (OS dependent)
3769         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
3770         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3771 
3772         m_firstTaskInPhase = false;
3773     }
3774 
3775     // load kernel from WOPCM into L2 storage RAM
3776     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
3777     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
3778     imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcInitKernelDescriptor;
3779     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
3780 
3781     // pipe mode select
3782     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
3783     pipeModeSelectParams.Mode = m_mode;
3784     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
3785 
3786     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
3787 
3788     m_inputBitsPerFrame = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
3789     m_curTargetFullness = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
3790 
3791     // set HuC DMEM param
3792     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
3793     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
3794     dmemParams.presHucDataSource = &m_resVdencBrcInitDmemBuffer;
3795     dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcInitDmem), CODECHAL_CACHELINE_SIZE);
3796     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
3797     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
3798 
3799     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
3800     virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
3801     virtualAddrParams.regionParams[0].isWritable = true;
3802     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
3803 
3804     // Store HUC_STATUS2 register bit 6 before HUC_Start command
3805     // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
3806     // (HUC_Start command with last start bit set).
3807     CODECHAL_DEBUG_TOOL(
3808         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
3809     )
3810 
3811     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
3812 
3813     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
3814 
3815     // wait Huc completion (use HEVC bit for now)
3816     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3817     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3818     vdPipeFlushParams.Flags.bFlushHEVC = 1;
3819     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3820     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
3821 
3822     // Flush the engine to ensure memory written out
3823     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3824     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3825     flushDwParams.bVideoPipelineCacheInvalidate = true;
3826     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3827 
3828     if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
3829     {
3830         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
3831         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3832     }
3833 
3834     ReturnCommandBuffer(&cmdBuffer);
3835 
3836     if (!m_singleTaskPhaseSupported)
3837     {
3838         bool renderingFlags = m_videoContextUsesNullHw;
3839 
3840         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
3841 
3842         CODECHAL_DEBUG_TOOL(
3843             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
3844                 &m_resVdencBrcInitDmemBuffer,
3845                 sizeof(HucBrcInitDmem),
3846                 0,
3847                 CodechalHucRegionDumpType::hucRegionDumpInit));
3848 
3849             for (auto i = 0; i < 16; i++) {
3850                 if (virtualAddrParams.regionParams[i].presRegion)
3851                 {
3852                     m_debugInterface->DumpHucRegion(
3853                         virtualAddrParams.regionParams[i].presRegion,
3854                         virtualAddrParams.regionParams[i].dwOffset,
3855                         hucRegionSize[i],
3856                         i,
3857                         hucRegionName[i],
3858                         !virtualAddrParams.regionParams[i].isWritable,
3859                         0,
3860                         CodechalHucRegionDumpType::hucRegionDumpInit);
3861                 }
3862             })
3863     }
3864 
3865     return eStatus;
3866 }
3867 
SetSequenceStructs()3868 MOS_STATUS CodechalVdencVp9StateG11::SetSequenceStructs()
3869 {
3870     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3871 
3872     CODECHAL_ENCODE_FUNCTION_ENTER;
3873 
3874     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetSequenceStructs());
3875 
3876     // All pipe need to go through the picture-level and slice-level commands
3877     m_numPassesInOnePipe = m_numPasses;
3878     m_numPasses = (m_numPasses + 1) * m_numPipe - 1;
3879 
3880     return eStatus;
3881 }
3882 
SetPictureStructs()3883 MOS_STATUS CodechalVdencVp9StateG11::SetPictureStructs()
3884 {
3885     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3886 
3887     CODECHAL_ENCODE_FUNCTION_ENTER;
3888 
3889     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetPictureStructs());
3890 
3891     m_virtualEngineBBIndex = m_currOriginalPic.FrameIdx;
3892 
3893     if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
3894     {
3895         if (!m_hucEnabled)
3896         {
3897             m_numPassesInOnePipe = (m_dysRefFrameFlags != DYS_REF_NONE);
3898         }
3899         if (m_vdencBrcEnabled)
3900         {
3901             //Reduce per pipe passes by 1, as m_numPassesInOnePipe == 1 becomes m_numPassesInOnePipe = 0 for Huc to run
3902             m_dysBrc = true;
3903             m_numPassesInOnePipe = (m_numPassesInOnePipe > 0 ) ? m_numPassesInOnePipe - 1 : m_numPassesInOnePipe;
3904         }
3905         else
3906         {
3907             m_dysCqp = true;
3908         }
3909         m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
3910     }
3911     return eStatus;
3912 }
3913 
ExecutePictureLevel()3914 MOS_STATUS CodechalVdencVp9StateG11::ExecutePictureLevel()
3915 {
3916     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3917 
3918     CODECHAL_ENCODE_FUNCTION_ENTER;
3919 
3920     CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
3921 
3922     PerfTagSetting perfTag;
3923     perfTag.Value = 0;
3924     perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
3925     perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
3926     perfTag.PictureCodingType = m_pictureCodingType;
3927     m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
3928 
3929     // Scalable Mode header
3930     if (m_scalableMode)
3931     {
3932         MOS_COMMAND_BUFFER cmdBuffer;
3933         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3934 
3935         bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass();
3936         // In scalable mode, command buffer header is sent on last pipe only
3937         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3938         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
3939     }
3940 
3941     if (IsFirstPass() && IsFirstPipe())
3942     {
3943         CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPakInsertObjBatchBuf(&m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx]));
3944     }
3945 
3946     // For VDENC dynamic scaling, here are the steps we need to process
3947     //   Pass 0. VDENC + PAK Pass
3948     //     a. If this is Dys + BRC case, then run BRC Pass 0
3949     //     b. Ref frame scaling
3950     //    c. VDENC + PAK pass to stream out PakObjCmd
3951     //   Pass 1 -> Reset to Pass 0 so as to run HPU Pass 0
3952     //     a. If this is Dys + BRC case, then run BRC Pass 1
3953     //     b. Run HPU Pass 0
3954     //     c. Lite Pass (Pak only multi pass enabled) to stream in
3955     //        PakObjCmd from previous pass
3956     //   Pass 1 -> Only run HPU Pass 1 to update the probabilities for
3957     //   next frame. Repak is disabled for performance reasons
3958     if (m_dysRefFrameFlags != DYS_REF_NONE)
3959     {
3960         if (m_currPass == 0)
3961         {
3962             if (m_dysVdencMultiPassEnabled)
3963             {
3964                 if (Mos_ResourceIsNull(&m_resVdencDysPictureState2NdLevelBatchBuffer))
3965                 {
3966                     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3967 
3968                     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3969                     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3970                     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3971                     allocParamsForBufferLinear.Format = Format_Buffer;
3972                     allocParamsForBufferLinear.dwBytes = m_vdencPicStateSecondLevelBatchBufferSize;
3973                     allocParamsForBufferLinear.pBufName = "VDEnc DYS Picture Second Level Batch Buffer";
3974 
3975                     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
3976                         m_osInterface,
3977                         &allocParamsForBufferLinear,
3978                         &m_resVdencDysPictureState2NdLevelBatchBuffer);
3979 
3980                     if (eStatus != MOS_STATUS_SUCCESS)
3981                     {
3982                         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate VDEnc DYS Picture Second Level Batch Buffer.");
3983                         return eStatus;
3984                     }
3985                 }
3986 
3987                 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
3988             }
3989         }
3990         else if (m_currPass == 1)
3991         {
3992             m_hucEnabled = m_dysHucEnabled; // recover huc state
3993             m_vdencPakonlyMultipassEnabled = true;
3994             m_dysRefFrameFlags = DYS_REF_NONE;
3995             m_currPass = 0; // reset ucCurrPass = 0 to run the Huc
3996             m_lastTaskInPhase = false;
3997         }
3998     }
3999     else
4000     {
4001         if (IsFirstPass() && m_vdencBrcEnabled)
4002         {
4003             m_vdencPakObjCmdStreamOutEnabled = true;
4004             m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
4005         }
4006         else
4007         {
4008             m_vdencPakObjCmdStreamOutEnabled = false;
4009         }
4010     }
4011     if (m_isTilingSupported)
4012     {
4013         MOS_LOCK_PARAMS lockFlagsWriteOnly;
4014         uint8_t* tileStatsData = nullptr;
4015         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4016         lockFlagsWriteOnly.WriteOnly = 1;
4017         if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBBIndex].sResource))
4018         {
4019             // Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
4020             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4021             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4022             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4023             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4024             allocParamsForBufferLinear.Format = Format_Buffer;
4025             auto size = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
4026             allocParamsForBufferLinear.dwBytes = size;
4027             allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
4028 
4029             CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4030                 m_osInterface,
4031                 &allocParamsForBufferLinear,
4032                 &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource));
4033             m_tileRecordBuffer[m_virtualEngineBBIndex].dwSize = size;
4034 
4035             auto tileRecordData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
4036 
4037             MOS_ZeroMemory(tileRecordData, allocParamsForBufferLinear.dwBytes);
4038             m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource);
4039         }
4040     }
4041     // Running in the multiple VDBOX mode, Allocate Required Buffers for Tile based operation. Do this only once per frame.
4042     if (m_isTilingSupported && m_scalableMode && m_hucEnabled && IsFirstPipe() && IsFirstPass())
4043     {
4044         // Max row is 4 by VP9 Spec
4045         uint32_t m_maxScalableModeRows = 4;
4046         uint32_t m_maxScalableModeTiles = m_numVdbox * m_maxScalableModeRows;
4047 
4048         // Fill Pak integration kernel input tile stats structure
4049         MOS_ZeroMemory(&m_tileStatsOffset, sizeof(StatsInfo));
4050         // TileSizeRecord has to be 4k aligned
4051         m_tileStatsOffset.tileSizeRecord = 0; // TileReord is in a separated resource
4052         // VdencStats has to be 4k aligned
4053         m_tileStatsOffset.vdencStats = 0; // vdencStats is head of m_tileStatsPakIntegrationBuffer
4054         // VP9PAKStats has to be 64 byte aligned
4055         m_tileStatsOffset.pakStats = MOS_ALIGN_CEIL((m_tileStatsOffset.vdencStats + (m_maxScalableModeTiles * m_statsSize.vdencStats)), CODECHAL_PAGE_SIZE);
4056         // VP9CounterBuffer has to be 4k aligned
4057         m_tileStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_tileStatsOffset.pakStats + (m_maxScalableModeTiles * m_statsSize.pakStats)), CODECHAL_PAGE_SIZE);
4058 
4059         MOS_LOCK_PARAMS lockFlagsWriteOnly;
4060         uint8_t* tileStatsData = nullptr;
4061         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4062         lockFlagsWriteOnly.WriteOnly = 1;
4063 
4064         if (Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource))
4065         {
4066             // Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
4067             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4068             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4069             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4070             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4071             allocParamsForBufferLinear.Format = Format_Buffer;
4072             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL((m_tileStatsOffset.counterBuffer + (m_maxScalableModeTiles * m_statsSize.counterBuffer)), CODECHAL_PAGE_SIZE);
4073             allocParamsForBufferLinear.pBufName = "GEN11 Tile Level Statistics Buffer";
4074 
4075             m_tileStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
4076 
4077             CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4078                 m_osInterface,
4079                 &allocParamsForBufferLinear,
4080                 &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource));
4081             m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].dwSize = allocParamsForBufferLinear.dwBytes;
4082 
4083             tileStatsData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
4084 
4085             MOS_ZeroMemory(tileStatsData, allocParamsForBufferLinear.dwBytes);
4086             m_osInterface->pfnUnlockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource);
4087         }
4088     }
4089 
4090     int currPass = GetCurrentPass();
4091     if ((m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled)
4092     {
4093         CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencDysPictureState2NdLevelBatchBuffer));
4094     }
4095     else
4096     {
4097         if (IsFirstPipe())
4098         {
4099             CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex]));
4100         }
4101     }
4102 
4103     if (IsFirstPipe() && m_vdencBrcEnabled)
4104     {
4105         // Invoke BRC init/reset FW
4106         if (m_brcInit || m_brcReset)
4107         {
4108             if (!m_singleTaskPhaseSupported)
4109             {
4110                 //Reset earlier set PAK perf tag
4111                 m_osInterface->pfnResetPerfBufferID(m_osInterface);
4112                 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET);
4113             }
4114             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
4115             m_brcInit = m_brcReset = false;
4116         }
4117         // For multipass and singlepass+RePAK we call BRC update for all passes except last pass (RePAK)
4118         // For single pass w/o RePAK (1 total pass) we call BRC update on one and only pass
4119         if (!IsLastPass() || (m_currPass == 0 && m_numPasses == 0))
4120         {
4121             bool origSingleTaskPhase = m_singleTaskPhaseSupported;
4122             bool origFrameTrackingHeader = false;
4123 
4124             // If this is the case of Dynamic Scaling + BRC Pass 0'  VDENC + Pak  pass
4125             // Disable SingleTaskPhase before running 1st BRC update
4126             // To run HPU0 on the next pass i.e Pak only pass, we make Pass 1 as Pass 0 in which case the
4127             // BRC dmem buffer( resVdencBrcUpdateDmemBuffer[0] ) will get overridden if we do not submit BRC command now.
4128             if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
4129             {
4130                 m_singleTaskPhaseSupported = false;
4131 
4132                 //Reset Frame Tracking Header for this submission
4133                 MOS_COMMAND_BUFFER cmdBuffer;
4134                 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
4135                 origFrameTrackingHeader = cmdBuffer.Attributes.bEnableMediaFrameTracking;
4136                 cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
4137                 ReturnCommandBuffer(&cmdBuffer);
4138             }
4139 
4140             if (!m_singleTaskPhaseSupported)
4141             {
4142                 //Reset performance buffer used for BRC init
4143                 m_osInterface->pfnResetPerfBufferID(m_osInterface);
4144                 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
4145             }
4146             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
4147             //Restore the original state of SingleTaskPhaseSupported flag
4148             m_singleTaskPhaseSupported = origSingleTaskPhase;
4149 
4150             //Restore Original Frame Tracking Header
4151             if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
4152             {
4153                 MOS_COMMAND_BUFFER cmdBuffer;
4154                 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
4155                 cmdBuffer.Attributes.bEnableMediaFrameTracking = origFrameTrackingHeader;
4156                 ReturnCommandBuffer(&cmdBuffer);
4157             }
4158         }
4159     }
4160 
4161     // run HuC_VP9Prob first pass (it runs in parallel with ENC)
4162     if (m_hucEnabled)
4163     {
4164         if (IsFirstPipe() && (IsFirstPass() || IsLastPass() || (m_vdencBrcEnabled)))  // Before the first PAK pass, for RePak pass and for BRC case, HuC_VP9Prob needs to be called on Pass 1 as well
4165         {
4166             if (!m_singleTaskPhaseSupported)
4167             {
4168                 //Reset earlier set PAK perf tag
4169                 m_osInterface->pfnResetPerfBufferID(m_osInterface);
4170                 // Add Hpu tag here after updated
4171                 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
4172             }
4173             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9Prob());
4174             if (!m_singleTaskPhaseSupported)
4175             {
4176                 //reset performance buffer used for HPU update
4177                 m_osInterface->pfnResetPerfBufferID(m_osInterface);
4178             }
4179         }
4180     }
4181     else
4182     {
4183         CODECHAL_ENCODE_CHK_STATUS_RETURN(RefreshFrameInternalBuffers());
4184     }
4185 
4186     if (m_dysRefFrameFlags != DYS_REF_NONE && IsFirstPass())
4187     {
4188         // Turn off scalability and Tiling for Dynamic scaling pass 0 for reference scaling
4189         uint8_t logTileRows = m_vp9PicParams->log2_tile_rows;
4190         uint8_t logTileColumns = m_vp9PicParams->log2_tile_columns;
4191         bool scalableMode = m_scalableMode;
4192         uint8_t numPipe = m_numPipe;
4193         m_vp9PicParams->log2_tile_rows = 0;
4194         m_vp9PicParams->log2_tile_columns = 0;
4195         m_scalableMode = false;
4196         m_numPipe = 1;
4197         // Execute Reference scaling pass
4198         CODECHAL_ENCODE_CHK_STATUS_RETURN(DysRefFrames());
4199 
4200         // Restore scalability and Tiling status for subsequent passes
4201         m_vp9PicParams->log2_tile_rows = logTileRows;
4202         m_vp9PicParams->log2_tile_columns = logTileColumns;
4203         m_scalableMode = scalableMode;
4204         m_numPipe = numPipe;
4205 
4206         if (m_dysVdencMultiPassEnabled)
4207         {
4208             m_singleTaskPhaseSupported = true;
4209             m_firstTaskInPhase = true;
4210             m_vdencPakObjCmdStreamOutEnabled = true;
4211             m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
4212         }
4213         else
4214         {
4215             m_hucEnabled = m_dysHucEnabled; //recover huc state
4216         }
4217     }
4218 
4219     // set HCP_SURFACE_STATE values
4220     MHW_VDBOX_SURFACE_PARAMS surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID + 1];
4221     for (uint8_t i = 0; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
4222     {
4223         MOS_ZeroMemory(&surfaceParams[i], sizeof(surfaceParams[i]));
4224         surfaceParams[i].Mode = m_mode;
4225         surfaceParams[i].ucSurfaceStateId = i;
4226         surfaceParams[i].ChromaType = m_outputChromaFormat;
4227         surfaceParams[i].bSrc8Pak10Mode   = (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth) && (!m_vp9SeqParams->SeqFlags.fields.SourceBitDepth);
4228 
4229         switch (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth)
4230         {
4231             case VP9_ENCODED_BIT_DEPTH_10: //10 bit encoding
4232             {
4233                 surfaceParams[i].ucBitDepthChromaMinus8 = 2;
4234                 surfaceParams[i].ucBitDepthLumaMinus8 = 2;
4235                 break;
4236             }
4237             default:
4238             {
4239                 surfaceParams[i].ucBitDepthChromaMinus8 = 0;
4240                 surfaceParams[i].ucBitDepthLumaMinus8 = 0;
4241                 break;
4242             }
4243         }
4244     }
4245 
4246     // For PAK engine, we do NOT use scaled reference images even if dynamic scaling is enabled
4247     PMOS_SURFACE refSurface[3], refSurfaceNonScaled[3], dsRefSurface4x[3], dsRefSurface8x[3];
4248     for (auto i = 0; i < 3; i++)
4249     {
4250         refSurface[i] = refSurfaceNonScaled[i] = dsRefSurface4x[i] = dsRefSurface8x[i] = nullptr;
4251     }
4252     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetHcpSrcSurfaceParams(surfaceParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x));
4253 
4254     MOS_COMMAND_BUFFER cmdBuffer;
4255     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
4256 
4257     if (!m_singleTaskPhaseSupported)
4258     {
4259         CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
4260     }
4261 
4262     // Non scalable mode header
4263     if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
4264     {
4265         // Send command buffer header at the beginning (OS dependent)
4266         // frame tracking tag is only added in the last command buffer header
4267         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
4268         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
4269     }
4270 
4271     // Place hw semaphore on all other pipe to wait for first pipe HUC to finish. Apply for all passes after extend the Dmen HPU buffer size
4272     int currPipe = GetCurrentPipe();
4273     if (m_scalableMode)
4274     {
4275         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
4276 
4277         //HW Semaphore cmd to make sure all pipes start encode at the same time
4278         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSync, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
4279         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
4280             &m_resPipeStartSync,
4281             &cmdBuffer,
4282             m_numPipe));
4283 
4284         // Program some placeholder cmds to resolve the hazard between pipe sync
4285         MHW_MI_STORE_DATA_PARAMS dataParams;
4286         dataParams.pOsResource = &m_resDelayMinus;
4287         dataParams.dwResourceOffset = 0;
4288         dataParams.dwValue = 0xDE1A;
4289         for (uint32_t i = 0; i < m_numDelay; i++)
4290         {
4291             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
4292                 &cmdBuffer,
4293                 &dataParams));
4294         }
4295 
4296         //clean HW semaphore memory
4297         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSync, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
4298 
4299         //Start Watchdog Timer
4300         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
4301     }
4302 
4303     // clean-up per VDBOX semaphore memory, only in the first BRC pass. Same semaphore is re-used across BRC passes for stitch command
4304     if (IsFirstPass())
4305     {
4306         if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[currPipe].sResource))
4307         {
4308             CODECHAL_ENCODE_CHK_STATUS_RETURN(
4309                 SetSemaphoreMem(
4310                     &m_stitchWaitSemaphoreMem[currPipe].sResource,
4311                     &cmdBuffer,
4312                     false));
4313         }
4314     }
4315 
4316     // Repak conditional batch buffer end based on repak flag written by Huc to HUC_STATUS regster
4317     if (m_hucEnabled && (m_numPasses > 0) && IsLastPass())
4318     {
4319         // Insert conditional batch buffer end
4320         // Bit 30 has been added as a success condition, therefore this needs to be masked to only check 31 for RePAK
4321         // or else if HuC decides not to do RePAK for conditional RePAK yet terminates successfully RePAK will still happen.
4322         // Success = bit 30 set to 1, Do RePAK = bit 31 set to 1, value is always 0; if 0 < memory, continue
4323         MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
4324         MOS_ZeroMemory(
4325             &miConditionalBatchBufferEndParams,
4326             sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
4327 
4328         miConditionalBatchBufferEndParams.presSemaphoreBuffer =
4329             &m_resHucPakMmioBuffer;
4330         // Make the DisableCompareMask 0, so that the HW will do AND operation on DW0 with Mask DW1, refer to HuCVp9Prob() for the settings
4331         // and compare the result against the Semaphore data which in our case dwValue = 0.
4332         // If result > dwValue then continue execution otherwise terminate the batch buffer
4333         miConditionalBatchBufferEndParams.bDisableCompareMask = false;
4334 
4335         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
4336             &cmdBuffer,
4337             &miConditionalBatchBufferEndParams));
4338     }
4339 
4340     if (IsFirstPipe())
4341     {
4342         CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
4343     }
4344 
4345     // set HCP_PIPE_BUF_ADDR_STATE values
4346     PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams = nullptr;
4347     pipeBufAddrParams = CreateHcpPipeBufAddrParams(pipeBufAddrParams);
4348     CODECHAL_ENCODE_CHK_NULL_RETURN(pipeBufAddrParams);
4349     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetHcpPipeBufAddrParams(*pipeBufAddrParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x));
4350     pipeBufAddrParams->pRawSurfParam = &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID];
4351     pipeBufAddrParams->pDecodedReconParam = &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID];
4352 #ifdef _MMC_SUPPORTED
4353     // In case of dynamic scaling refSurface is scaled for pass 0 and nonscaled for last pass
4354     // This ensures correct references are passed in for MMC
4355     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetPipeBufAddr(pipeBufAddrParams, refSurface, &cmdBuffer));
4356 #endif
4357 
4358     // set HCP_PIPE_MODE_SELECT values
4359     PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams = nullptr;
4360     pipeModeSelectParams = m_vdencInterface->CreateMhwVdboxPipeModeSelectParams();
4361     CODECHAL_ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
4362     SetHcpPipeModeSelectParams(*pipeModeSelectParams);
4363 
4364     auto delete_func = [&]()
4365     {
4366         if (pipeModeSelectParams)
4367         {
4368             MOS_Delete(pipeModeSelectParams);
4369             pipeModeSelectParams = nullptr;
4370         }
4371     };
4372 
4373     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), delete_func);
4374 
4375     // This wait cmd is needed to make sure copy is done as suggested by HW folk
4376     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, false), delete_func);
4377 
4378     // Decoded picture
4379     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]), delete_func);
4380 
4381     // Source input
4382     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]), delete_func);
4383 
4384     // Last reference picture
4385     if (refSurface[0])
4386     {
4387         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]), delete_func);
4388     }
4389 
4390     if (MEDIA_IS_WA(m_waTable, Wa_Vp9UnalignedHeight))
4391     {
4392         uint32_t real_height = m_oriFrameHeight;
4393         uint32_t aligned_height = MOS_ALIGN_CEIL(real_height, CODEC_VP9_MIN_BLOCK_HEIGHT);
4394 
4395         fill_pad_with_value(m_rawSurfaceToPak, real_height, aligned_height);
4396     }
4397 
4398     // Golden reference picture
4399     if (refSurface[1])
4400     {
4401         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]), delete_func);
4402     }
4403 
4404     // Alt reference picture
4405     if (refSurface[2])
4406     {
4407         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]), delete_func);
4408     }
4409 
4410     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams), delete_func);
4411 
4412     // set HCP_IND_OBJ_BASE_ADDR_STATE values
4413     MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
4414     SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
4415     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams), delete_func);
4416 
4417     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), delete_func);
4418     if (pipeModeSelectParams)
4419     {
4420         MOS_Delete(pipeModeSelectParams);
4421         pipeModeSelectParams = nullptr;
4422     }
4423 
4424     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
4425     if (m_pictureCodingType == I_TYPE)
4426     {
4427         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
4428     }
4429     else
4430     {
4431         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]));
4432         if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4433         {
4434             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]));
4435             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]));
4436         }
4437     }
4438 
4439     MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2];     // 8x and 4x DS surfaces
4440     SetHcpDsSurfaceParams(&dsSurfaceParams[0]);
4441     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2));
4442 
4443     if (pipeBufAddrParams)
4444     {
4445         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams));
4446         MOS_Delete(pipeBufAddrParams);
4447         pipeBufAddrParams = nullptr;
4448     }
4449 
4450     MHW_BATCH_BUFFER secondLevelBatchBuffer;
4451     MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
4452     secondLevelBatchBuffer.dwOffset = 0;
4453     secondLevelBatchBuffer.bSecondLevel = true;
4454     if (m_hucEnabled)
4455     {
4456         secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferWrite[0];
4457     }
4458     else
4459     {
4460         if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
4461         {
4462             // For DyS + BRC case, we run BRC on Pass 0, so although we dont run HPU on Pass 0
4463             // (VDENC + PAK pass) we will still use the write buffer here
4464             if (m_dysBrc)
4465             {
4466                 secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferWrite[0];
4467             }
4468             else //CQP case for Pass 0 , HPU has not run yet.. so use this buffer
4469             {
4470                 secondLevelBatchBuffer.OsResource = m_resVdencDysPictureState2NdLevelBatchBuffer;
4471             }
4472         }
4473         else
4474         {
4475             secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
4476         }
4477     }
4478 
4479     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
4480         &cmdBuffer,
4481         &secondLevelBatchBuffer));
4482 
4483     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
4484 
4485     return eStatus;
4486 }
4487 
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams,PMOS_SURFACE * refSurface,PMOS_SURFACE * refSurfaceNonScaled,PMOS_SURFACE * dsRefSurface4x,PMOS_SURFACE * dsRefSurface8x)4488 MOS_STATUS CodechalVdencVp9StateG11::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams,
4489     PMOS_SURFACE* refSurface,
4490     PMOS_SURFACE* refSurfaceNonScaled,
4491     PMOS_SURFACE* dsRefSurface4x,
4492     PMOS_SURFACE* dsRefSurface8x)
4493 {
4494     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4495 
4496     CODECHAL_ENCODE_FUNCTION_ENTER;
4497 
4498     pipeBufAddrParams = {};
4499     pipeBufAddrParams.Mode = m_mode;
4500     pipeBufAddrParams.psPreDeblockSurface = &m_reconSurface;
4501     pipeBufAddrParams.psPostDeblockSurface = &m_reconSurface;
4502     pipeBufAddrParams.psRawSurface = m_rawSurfaceToPak;
4503 
4504     pipeBufAddrParams.presMfdDeblockingFilterRowStoreScratchBuffer =
4505         &m_resDeblockingFilterLineBuffer;
4506 
4507     pipeBufAddrParams.presDeblockingFilterTileRowStoreScratchBuffer =
4508         &m_resDeblockingFilterTileLineBuffer;
4509 
4510     pipeBufAddrParams.presDeblockingFilterColumnRowStoreScratchBuffer =
4511         &m_resDeblockingFilterTileColumnBuffer;
4512 
4513     pipeBufAddrParams.presMetadataLineBuffer       = &m_resMetadataLineBuffer;
4514     pipeBufAddrParams.presMetadataTileLineBuffer   = &m_resMetadataTileLineBuffer;
4515     pipeBufAddrParams.presMetadataTileColumnBuffer = &m_resMetadataTileColumnBuffer;
4516     pipeBufAddrParams.presCurMvTempBuffer = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex);
4517 
4518     // Huc first pass doesn't write probabilities to output prob region but only updates to the input region. HuC run before repak writes to the ouput region.
4519     uint8_t frameCtxIdx = 0;
4520     if (m_hucEnabled && IsLastPass())
4521     {
4522         pipeBufAddrParams.presVp9ProbBuffer = &m_resHucProbOutputBuffer;
4523     }
4524     else
4525     {
4526         frameCtxIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
4527         CODECHAL_ENCODE_ASSERT(frameCtxIdx < CODEC_VP9_NUM_CONTEXTS);
4528         pipeBufAddrParams.presVp9ProbBuffer = &m_resProbBuffer[frameCtxIdx];
4529     }
4530 
4531     pipeBufAddrParams.presVp9SegmentIdBuffer              = &m_resSegmentIdBuffer;
4532     pipeBufAddrParams.presHvdTileRowStoreBuffer           = &m_resHvcTileRowstoreBuffer;
4533     pipeBufAddrParams.ps4xDsSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
4534     pipeBufAddrParams.ps8xDsSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
4535     pipeBufAddrParams.presVdencIntraRowStoreScratchBuffer = &m_resVdencIntraRowStoreScratchBuffer;
4536     pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1           = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
4537     if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
4538     {
4539         pipeBufAddrParams.presVdencStreamOutBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
4540         pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_tileStatsOffset.vdencStats;
4541     }
4542     else
4543     {
4544         pipeBufAddrParams.presVdencStreamOutBuffer    = &m_resVdencBrcStatsBuffer;
4545         pipeBufAddrParams.dwVdencStatsStreamOutOffset = 0;
4546     }
4547     pipeBufAddrParams.presStreamOutBuffer = nullptr;
4548     pipeBufAddrParams.presFrameStatStreamOutBuffer  = &m_resFrameStatStreamOutBuffer;
4549     pipeBufAddrParams.presSseSrcPixelRowStoreBuffer = &m_resSseSrcPixelRowStoreBuffer;
4550     pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
4551     pipeBufAddrParams.presSegmentMapStreamOut       = &m_resVdencSegmentMapStreamOut;
4552     pipeBufAddrParams.presPakCuLevelStreamoutBuffer =
4553         Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ? nullptr : &m_resPakcuLevelStreamoutData.sResource;
4554     if (m_dysRefFrameFlags != DYS_REF_NONE)
4555     {
4556         pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer =
4557             (m_vdencPakObjCmdStreamOutEnabled) ? m_resVdencPakObjCmdStreamOutBuffer : nullptr;
4558     }
4559     else
4560     {
4561         pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer = m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
4562     }
4563 
4564     if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
4565     {
4566         PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex];
4567         bool useTileStatisticsBuffer = tileStatisticsBuffer && !Mos_ResourceIsNull(&tileStatisticsBuffer->sResource);
4568         // the new framestats streamout will now be the tile level stats buffer because each pak is spewing out tile level stats
4569         pipeBufAddrParams.presFrameStatStreamOutBuffer = useTileStatisticsBuffer ? &tileStatisticsBuffer->sResource : nullptr;
4570         pipeBufAddrParams.dwFrameStatStreamOutOffset = useTileStatisticsBuffer ? m_tileStatsOffset.pakStats : 0;
4571         //Main Frame Stats are integrated by PAK integration kernel
4572     }
4573     else
4574     {
4575         pipeBufAddrParams.presFrameStatStreamOutBuffer = &m_resFrameStatStreamOutBuffer;
4576         pipeBufAddrParams.dwFrameStatStreamOutOffset = 0;
4577     }
4578 
4579     if (m_pictureCodingType != I_TYPE)
4580     {
4581         for (auto i = 0; i < 3; i++)
4582         {
4583             CODECHAL_ENCODE_CHK_NULL_RETURN(refSurface[i]);
4584             CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface4x[i]);
4585             CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface8x[i]);
4586 
4587             pipeBufAddrParams.presReferences[i] = &refSurface[i]->OsResource;
4588             pipeBufAddrParams.presVdencReferences[i] = &refSurface[i]->OsResource;
4589             pipeBufAddrParams.presVdenc4xDsSurface[i] = &dsRefSurface4x[i]->OsResource;
4590             pipeBufAddrParams.presVdenc8xDsSurface[i] = &dsRefSurface8x[i]->OsResource;
4591 
4592             if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4593             {
4594                 pipeBufAddrParams.presReferences[i + 4] = &refSurfaceNonScaled[i]->OsResource;
4595             }
4596         }
4597 
4598         pipeBufAddrParams.presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex ^ 0x01);
4599     }
4600 
4601     return eStatus;
4602 }
4603 
GetNumTilesInFrame()4604 uint16_t CodechalVdencVp9StateG11::GetNumTilesInFrame()
4605 {
4606     return ((1 << m_vp9PicParams->log2_tile_rows) * (1 << m_vp9PicParams->log2_tile_columns));
4607 }
4608 
AllocateResources()4609 MOS_STATUS CodechalVdencVp9StateG11::AllocateResources()
4610 {
4611     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4612 
4613     CODECHAL_ENCODE_FUNCTION_ENTER;
4614 
4615     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::AllocateResources());
4616 
4617     // create the tile coding state parameters
4618     CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams =
4619                                         (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)MOS_AllocAndZeroMemory(sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11) * m_maxTileNumber));
4620 
4621     if (m_isTilingSupported)
4622     {
4623         uint32_t maxPicWidthInSb = MOS_ROUNDUP_DIVIDE(m_maxPicWidth, CODEC_VP9_SUPER_BLOCK_WIDTH);
4624         uint32_t maxPicHeightInSb = MOS_ROUNDUP_DIVIDE(m_maxPicHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT);
4625 
4626         //PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
4627         uint32_t size = maxPicWidthInSb * maxPicHeightInSb * 64 * CODECHAL_CACHELINE_SIZE; // One CU has 16-byte, and there are 64 CU in one SB. But, each tile needs to be aliged to the cache line
4628         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4629         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4630         allocParamsForBufferLinear.dwBytes  = size;
4631         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4632         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4633         allocParamsForBufferLinear.Format = Format_Buffer;
4634         allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
4635 
4636         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4637             m_osInterface,
4638             &allocParamsForBufferLinear,
4639             &m_resPakcuLevelStreamoutData.sResource);
4640         CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4641 
4642         //PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command
4643         // one LCU has one cache line. Use CU as LCU during creation
4644         allocParamsForBufferLinear.dwBytes  = size;
4645         allocParamsForBufferLinear.pBufName = "PAK Slice Level Streamout Data";
4646 
4647         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4648             m_osInterface,
4649             &allocParamsForBufferLinear,
4650             &m_resPakSliceLevelStreamutData.sResource);
4651         CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4652 
4653         //HCP scalability Sync buffer
4654         size = CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
4655         allocParamsForBufferLinear.dwBytes  = size;
4656         allocParamsForBufferLinear.pBufName = "Hcp scalability Sync buffer ";
4657 
4658         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4659             m_osInterface,
4660             &allocParamsForBufferLinear,
4661             &m_hcpScalabilitySyncBuffer.sResource);
4662         CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4663         m_hcpScalabilitySyncBuffer.dwSize = size;
4664 
4665         // PAK integration related
4666         if (m_isTilingSupported && m_scalableMode && m_hucEnabled)
4667         {
4668             // HUC Pak Int DMEM buffer
4669             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
4670             allocParamsForBufferLinear.pBufName = "Huc Pak Int Dmem Buffer";
4671             for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4672             {
4673                 for (auto j = 0; j < m_brcMaxNumPasses; j++)
4674                 {
4675                     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4676                         m_osInterface,
4677                         &allocParamsForBufferLinear,
4678                         &m_hucPakIntDmemBuffer[i][j]);
4679                     CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4680                 }
4681             }
4682 
4683             // HuC PAK Int region 7, 8
4684             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
4685             allocParamsForBufferLinear.pBufName = "HUC PAK Int Dummy Buffer";
4686 
4687             eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4688                     m_osInterface,
4689                     &allocParamsForBufferLinear,
4690                     &m_hucPakIntDummyBuffer);
4691             CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4692 
4693             MOS_LOCK_PARAMS lockFlags;
4694             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4695             lockFlags.WriteOnly = 1;
4696             uint8_t* data = nullptr;
4697 
4698             data = (uint8_t*)m_osInterface->pfnLockResource(
4699                 m_osInterface,
4700                 &m_hucPakIntDummyBuffer,
4701                 &lockFlags);
4702 
4703             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4704 
4705             MOS_ZeroMemory(
4706                 data,
4707                 allocParamsForBufferLinear.dwBytes);
4708 
4709             m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDummyBuffer);
4710 
4711             // HuC PAK Int Region 1 programming related stats
4712             MOS_ZeroMemory(&m_frameStatsOffset, sizeof(StatsInfo));
4713             MOS_ZeroMemory(&m_statsSize, sizeof(StatsInfo));
4714 
4715             //Sizes of each buffer to be loaded into the region 0 as input and 1 loaded out as output.
4716             m_statsSize.tileSizeRecord = m_hcpInterface->GetPakHWTileSizeRecordSize();
4717             m_statsSize.vdencStats = m_brcStatsBufSize;
4718             m_statsSize.pakStats = m_brcPakStatsBufSize;
4719             m_statsSize.counterBuffer = m_probabilityCounterBufferSize;
4720 
4721             //Offsets for output of all integrated frame statistics (region 1) from PAK integration kernel
4722             m_frameStatsOffset.tileSizeRecord = 0;
4723             // Vdenc stats has to be 4K aligned
4724             m_frameStatsOffset.vdencStats = 0;
4725             // VP9 PAK stats/ BRC pak stats / Frame Stats have to be 4K aligned
4726             m_frameStatsOffset.pakStats = MOS_ALIGN_CEIL((m_frameStatsOffset.vdencStats + m_statsSize.vdencStats), CODECHAL_PAGE_SIZE);
4727             // VP9 CounterBuffer goes as input to HUC region so it has to be 4k aligned
4728             m_frameStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_frameStatsOffset.pakStats + m_statsSize.pakStats), CODECHAL_PAGE_SIZE);
4729 
4730             // HuC PAK Int DMEM region 1 buffer allocation
4731             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_frameStatsOffset.counterBuffer + m_statsSize.counterBuffer, CODECHAL_PAGE_SIZE);
4732             allocParamsForBufferLinear.pBufName = "PAK HUC Integrated Frame Stats Buffer";
4733             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4734             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4735             allocParamsForBufferLinear.Format = Format_Buffer;
4736 
4737             m_frameStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
4738 
4739             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4740                 m_osInterface,
4741                 &allocParamsForBufferLinear,
4742                 &m_frameStatsPakIntegrationBuffer.sResource));
4743             m_frameStatsPakIntegrationBuffer.dwSize = allocParamsForBufferLinear.dwBytes;
4744 
4745             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4746             lockFlags.WriteOnly = 1;
4747             data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource, &lockFlags);
4748             MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
4749             m_osInterface->pfnUnlockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource);
4750 
4751             // Allocate region 9 of pak integration to be fed as input to HUC BRC region 7
4752             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4753             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4754             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4755             allocParamsForBufferLinear.Format = Format_Buffer;
4756             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
4757             allocParamsForBufferLinear.pBufName = "GEN11 PAK Integration FrameByteCount output";
4758             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4759                 m_osInterface,
4760                 &allocParamsForBufferLinear,
4761                 &m_hucPakIntBrcDataBuffer));
4762 
4763             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4764             lockFlags.WriteOnly = 1;
4765             data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_hucPakIntBrcDataBuffer, &lockFlags);
4766             MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
4767             m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntBrcDataBuffer);
4768 
4769             // Allocate Semaphore memory for VDEnc/PAK on all pipes to signal stitch command to stop waiting
4770             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4771             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4772             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4773             allocParamsForBufferLinear.Format = Format_Buffer;
4774             allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4775             allocParamsForBufferLinear.pBufName = "GEN11 VDEnc PAK done Semaphore Memory";
4776 
4777             for (auto i = 0; i < m_numPipe; i++)
4778             {
4779                 uint32_t* data = nullptr;
4780 
4781                 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4782                     m_osInterface,
4783                     &allocParamsForBufferLinear,
4784                     &m_stitchWaitSemaphoreMem[i].sResource));
4785 
4786                 m_stitchWaitSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
4787 
4788                 data = (uint32_t*)m_osInterface->pfnLockResource(
4789                     m_osInterface,
4790                     &m_stitchWaitSemaphoreMem[i].sResource,
4791                     &lockFlags);
4792 
4793                 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4794 
4795                 *data = 1;
4796 
4797                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
4798                     m_osInterface,
4799                     &m_stitchWaitSemaphoreMem[i].sResource));
4800             }
4801 
4802         }
4803         uint32_t* data = nullptr;
4804         MOS_LOCK_PARAMS lockFlagsWriteOnly;
4805         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4806         lockFlagsWriteOnly.WriteOnly = 1;
4807 
4808         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4809         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4810         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4811         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4812         allocParamsForBufferLinear.Format = Format_Buffer;
4813         allocParamsForBufferLinear.pBufName = "Pipe Start Sync memory";
4814 
4815         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4816             m_osInterface,
4817             &allocParamsForBufferLinear,
4818             &m_resPipeStartSync));
4819 
4820         data = (uint32_t *)m_osInterface->pfnLockResource(
4821             m_osInterface,
4822             &m_resPipeStartSync,
4823             &lockFlagsWriteOnly);
4824 
4825         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4826 
4827         MOS_ZeroMemory(data, sizeof(uint32_t));
4828 
4829         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
4830             m_osInterface,
4831             &m_resPipeStartSync));
4832 
4833         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4834         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4835         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4836         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4837         allocParamsForBufferLinear.Format = Format_Buffer;
4838         allocParamsForBufferLinear.pBufName = "Frame Start Sync memory";
4839 
4840         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4841             m_osInterface,
4842             &allocParamsForBufferLinear,
4843             &m_resFrameStartSync));
4844 
4845         data = (uint32_t *)m_osInterface->pfnLockResource(
4846             m_osInterface,
4847             &m_resFrameStartSync,
4848             &lockFlagsWriteOnly);
4849 
4850         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4851 
4852         MOS_ZeroMemory(data, sizeof(uint32_t));
4853 
4854         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
4855             m_osInterface,
4856             &m_resFrameStartSync));
4857 
4858         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4859         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4860         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4861         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4862         allocParamsForBufferLinear.Format = Format_Buffer;
4863         allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
4864 
4865         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4866             m_osInterface,
4867             &allocParamsForBufferLinear,
4868             &m_resDelayMinus));
4869 
4870         data = (uint32_t*)m_osInterface->pfnLockResource(
4871             m_osInterface,
4872             &m_resDelayMinus,
4873             &lockFlagsWriteOnly);
4874 
4875         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4876 
4877         MOS_ZeroMemory(data, sizeof(uint32_t));
4878 
4879         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
4880             m_osInterface,
4881             &m_resDelayMinus));
4882     }
4883 
4884     return eStatus;
4885 }
4886 
FreeResources()4887 void CodechalVdencVp9StateG11::FreeResources()
4888 {
4889     CodechalVdencVp9State::FreeResources();
4890 
4891     MOS_FreeMemory(m_tileParams);
4892     if (m_isTilingSupported)
4893     {
4894         if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource))
4895         {
4896             m_osInterface->pfnFreeResource(
4897                 m_osInterface,
4898                 &m_resPakcuLevelStreamoutData.sResource);
4899         }
4900 
4901         if (!Mos_ResourceIsNull(&m_resPakSliceLevelStreamutData.sResource))
4902         {
4903             m_osInterface->pfnFreeResource(
4904                 m_osInterface,
4905                 &m_resPakSliceLevelStreamutData.sResource);
4906         }
4907 
4908         // Release Hcp scalability Sync buffer
4909         if (!Mos_ResourceIsNull(&m_hcpScalabilitySyncBuffer.sResource))
4910         {
4911             m_osInterface->pfnFreeResource(
4912                 m_osInterface,
4913                 &m_hcpScalabilitySyncBuffer.sResource);
4914         }
4915 
4916         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
4917         {
4918             if (!Mos_ResourceIsNull(&m_tileRecordBuffer[i].sResource))
4919             {
4920                 m_osInterface->pfnFreeResource(
4921                     m_osInterface,
4922                     &m_tileRecordBuffer[i].sResource);
4923             }
4924         }
4925 
4926         for (auto i = 0; i < m_numUncompressedSurface; i++)
4927         {
4928             for (auto j = 0; j < CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE; j++)
4929             {
4930                 for (auto k = 0; k < 3; k++)
4931                 {
4932                     PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
4933 
4934                     if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
4935                     {
4936                         if (cmdBuffer->pCmdBase)
4937                         {
4938                             m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
4939                         }
4940                         m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
4941                     }
4942                 }
4943             }
4944         }
4945         for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4946         {
4947             for (auto j = 0; j < m_brcMaxNumPasses; j++)
4948             {
4949                 if (!Mos_ResourceIsNull(&m_hucPakIntDmemBuffer[i][j]))
4950                 {
4951                     m_osInterface->pfnFreeResource(
4952                         m_osInterface,
4953                         &m_hucPakIntDmemBuffer[i][j]);
4954                 }
4955             }
4956         }
4957 
4958         if (!Mos_ResourceIsNull(&m_hucPakIntDummyBuffer))
4959         {
4960             m_osInterface->pfnFreeResource(
4961                 m_osInterface,
4962                 &m_hucPakIntDummyBuffer);
4963         }
4964 
4965         if (!Mos_ResourceIsNull(&m_frameStatsPakIntegrationBuffer.sResource))
4966         {
4967             m_osInterface->pfnFreeResource(
4968                 m_osInterface,
4969                 &m_frameStatsPakIntegrationBuffer.sResource);
4970         }
4971 
4972         if (!Mos_ResourceIsNull(&m_hucPakIntBrcDataBuffer))
4973         {
4974             m_osInterface->pfnFreeResource(
4975                 m_osInterface,
4976                 &m_hucPakIntBrcDataBuffer);
4977         }
4978 
4979         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileStatsPakIntegrationBuffer); i++)
4980         {
4981             if (!Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[i].sResource))
4982             {
4983                 m_osInterface->pfnFreeResource(
4984                     m_osInterface,
4985                     &m_tileStatsPakIntegrationBuffer[i].sResource);
4986             }
4987         }
4988 
4989         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_stitchWaitSemaphoreMem); i++)
4990         {
4991             if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource))
4992             {
4993                 m_osInterface->pfnFreeResource(
4994                     m_osInterface,
4995                     &m_stitchWaitSemaphoreMem[i].sResource);
4996             }
4997         }
4998 
4999         if (!Mos_ResourceIsNull(&m_resPipeStartSync))
5000         {
5001             m_osInterface->pfnFreeResource(
5002                 m_osInterface,
5003                 &m_resPipeStartSync);
5004         }
5005 
5006         if (!Mos_ResourceIsNull(&m_resFrameStartSync))
5007         {
5008             m_osInterface->pfnFreeResource(
5009                 m_osInterface,
5010                 &m_resFrameStartSync);
5011         }
5012 
5013         if (!Mos_ResourceIsNull(&m_resDelayMinus))
5014         {
5015             m_osInterface->pfnFreeResource(
5016                 m_osInterface,
5017                 &m_resDelayMinus);
5018         }
5019     }
5020 
5021     return;
5022 }
5023 
SendMIAtomicCmd(PMOS_RESOURCE semaMem,uint32_t immData,MHW_COMMON_MI_ATOMIC_OPCODE opCode,PMOS_COMMAND_BUFFER cmdBuffer)5024 MOS_STATUS CodechalVdencVp9StateG11::SendMIAtomicCmd(
5025     PMOS_RESOURCE               semaMem,
5026     uint32_t                    immData,
5027     MHW_COMMON_MI_ATOMIC_OPCODE opCode,
5028     PMOS_COMMAND_BUFFER         cmdBuffer
5029 )
5030 {
5031     MHW_MI_ATOMIC_PARAMS       atomicParams;
5032     MOS_STATUS                 eStatus = MOS_STATUS_SUCCESS;
5033 
5034     CODECHAL_ENCODE_FUNCTION_ENTER;
5035 
5036     MOS_ZeroMemory((&atomicParams), sizeof(atomicParams));
5037     atomicParams.pOsResource = semaMem;
5038     atomicParams.dwDataSize = sizeof(uint32_t);
5039     atomicParams.Operation = opCode;
5040     atomicParams.bInlineData = true;
5041     atomicParams.dwOperand1Data[0] = immData;
5042     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(cmdBuffer, &atomicParams));
5043 
5044     return eStatus;
5045 }
5046 
Initialize(CodechalSetting * settings)5047 MOS_STATUS CodechalVdencVp9StateG11::Initialize(CodechalSetting * settings)
5048 {
5049     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5050     uint32_t   maxRows = 1;
5051 
5052     CODECHAL_ENCODE_FUNCTION_ENTER;
5053 
5054     //Create and register huc Cmd Initializer
5055     m_hucCmdInitializer = MOS_New(CodechalCmdInitializerG11, this);
5056 
5057     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::Initialize(settings));
5058 
5059     GetSystemPipeNumberCommon();
5060 
5061     if (MOS_VE_SUPPORTED(m_osInterface))
5062     {
5063         m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
5064         CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
5065         //scalability initialize
5066         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
5067     }
5068 
5069     maxRows = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT;
5070     //Max num of rows = 4 by VP9 Spec
5071     maxRows = MOS_MIN(maxRows, 4);
5072     //Max tile numbers = max of number tiles for single pipe or max muber of tiles for scalable pipes
5073     m_maxTileNumber = MOS_MAX((MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH), m_numVdbox * maxRows);
5074 
5075     m_dysVdencMultiPassEnabled = true;
5076 
5077     m_numPipe = m_numVdbox;
5078 
5079     m_scalableMode = (m_numPipe > 1);
5080 
5081     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
5082 
5083     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5084     MOS_STATUS eStatusKey = MOS_UserFeature_ReadValue_ID(
5085         nullptr,
5086         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH,
5087         &userFeatureData,
5088         m_osInterface->pOsContext);
5089     m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
5090 
5091     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5092     userFeatureData.i32Data = 1;
5093     userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
5094     MOS_UserFeature_ReadValue_ID(
5095         nullptr,
5096         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_HUC_ENABLE_ID,
5097         &userFeatureData,
5098         m_osInterface->pOsContext);
5099     m_hucEnabled = (userFeatureData.i32Data) ? true : false;
5100 
5101     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5102     userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
5103     userFeatureData.i32Data = 1;
5104     MOS_UserFeature_ReadValue_ID(
5105         nullptr,
5106         __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
5107         &userFeatureData,
5108         m_osInterface->pOsContext);
5109     m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
5110     m_singleTaskPhaseSupportedInPak = m_singleTaskPhaseSupported;
5111     // For dynamic scaling, the SingleTaskPhaseSupported is set to true and it does not get restored
5112     // to the original value after encoding of the frame. So need to restore to the original state
5113     m_storeSingleTaskPhaseSupported = m_singleTaskPhaseSupported; //Save the SingleTaskPhase state here
5114 
5115     // Multi-Pass BRC: currently disabled by default, plan to enable by default
5116     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5117     MOS_UserFeature_ReadValue_ID(
5118         nullptr,
5119         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_MULTIPASS_BRC_ENABLE_ID,
5120         &userFeatureData,
5121         m_osInterface->pOsContext);
5122     m_multipassBrcSupported = (userFeatureData.i32Data) ? true : false;
5123     m_vdencBrcStatsBufferSize     = m_brcStatsBufSize;
5124     m_vdencBrcPakStatsBufferSize  = m_brcPakStatsBufSize;
5125     m_brcHistoryBufferSize        = m_brcHistoryBufSize;
5126 
5127     // HME enabled by default for VP9
5128     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5129     MOS_UserFeature_ReadValue_ID(
5130         NULL,
5131         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ME_ENABLE_ID,
5132         &userFeatureData,
5133         m_osInterface->pOsContext);
5134     m_hmeSupported = (userFeatureData.i32Data) ? true : false;
5135 
5136     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
5137     MOS_UserFeature_ReadValue_ID(
5138         NULL,
5139         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_16xME_ENABLE_ID,
5140         &userFeatureData,
5141         m_osInterface->pOsContext);
5142     m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
5143 
5144     // disable superHME when HME is disabled
5145     if (m_hmeSupported == false)
5146     {
5147         m_16xMeSupported = false;
5148     }
5149 
5150     // UHME disabled
5151     m_32xMeSupported = false;
5152     // VP9 uses a different streamin kernel
5153     m_useNonLegacyStreamin = true;
5154 
5155     // Initialize kernel State
5156     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStates());
5157 
5158     // Get max binding table count
5159     m_maxBtCount = GetMaxBtCount();    // Need to add the correct BTcount when HME is enabled
5160 
5161     return eStatus;
5162 }
5163 
5164 /*----------------------------------------------------------------------------
5165 | Name      : GetSegmentBlockIndexInFrame
5166 | Purpose   : Returns the offset of 32x32 block in the frame based on current x,y 32 block location in current tile
5167 |
5168 | Returns   : MOS_STATUS
5169 \---------------------------------------------------------------------------*/
GetSegmentBlockIndexInFrame(uint32_t frameWidth,uint32_t curr32XInTile,uint32_t curr32YInTile,uint32_t currTileStartY64aligned,uint32_t currTileStartX64aligned)5170 uint32_t CodechalVdencVp9StateG11::GetSegmentBlockIndexInFrame(
5171     uint32_t frameWidth,
5172     uint32_t curr32XInTile,
5173     uint32_t curr32YInTile,
5174     uint32_t currTileStartY64aligned,
5175     uint32_t currTileStartX64aligned)
5176 {
5177     uint32_t frameWidthIn32 = MOS_ALIGN_CEIL(frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
5178     uint32_t curr32XInFrame = currTileStartX64aligned / 32 + curr32XInTile;
5179     uint32_t curr32YInFrame = currTileStartY64aligned / 32 + curr32YInTile;
5180     uint32_t curr32BlockInFrame = curr32YInFrame * frameWidthIn32 + curr32XInFrame;
5181     return curr32BlockInFrame;
5182 }
5183 
5184 /*----------------------------------------------------------------------------
5185 | Name      : InitZigZagToRasterLUTPerTile
5186 | Purpose   : Rasterize a tile's 32 blocks' segmap indices, add to frame mapbuffer created for these indices
5187 |
5188 | Returns   : MOS_STATUS
5189 \---------------------------------------------------------------------------*/
InitZigZagToRasterLUTPerTile(uint32_t tileHeight,uint32_t tileWidth,uint32_t currTileStartYInFrame,uint32_t currTileStartXInFrame)5190 MOS_STATUS CodechalVdencVp9StateG11::InitZigZagToRasterLUTPerTile(
5191     uint32_t tileHeight,
5192     uint32_t tileWidth,
5193     uint32_t currTileStartYInFrame,
5194     uint32_t currTileStartXInFrame)
5195 {
5196     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
5197 
5198     // Allocate space for zig-zag to raster LUT used for vdenc streamin (1 int32_t for every 32x32 block (pic 64 aligned))
5199     // We only do this when the 1st tile of new frame is being processed and keep it the same unless tile resolutions changed.
5200     // We keep this map around until sequence is finished, it's deleted at device destruction.
5201     if (currTileStartXInFrame == 0 && currTileStartYInFrame == 0)
5202     {
5203         if (m_mapBuffer) // free previous if it exists - it may exist if this isn't first seg streamin frame, but it's a new tile with different res
5204         {
5205             MOS_FreeMemory(m_mapBuffer);
5206         }
5207         // Allocate one integer space for each 32*32 block in the whole frame to hold the segmentation index.
5208         m_mapBuffer = (uint32_t*)MOS_AllocAndZeroMemory(
5209             (MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
5210             (MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
5211             sizeof(int32_t)); //Framewidth and height are 64 aligned already
5212     }
5213     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mapBuffer);
5214 
5215     uint32_t align64Width32 = MOS_ALIGN_CEIL(tileWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
5216     uint32_t align64Height32 = MOS_ALIGN_CEIL(tileHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
5217     uint32_t* mapBufferZigZagPerTile = (uint32_t*)MOS_AllocAndZeroMemory(align64Width32*align64Height32 * sizeof(uint32_t));
5218     CODECHAL_ENCODE_CHK_NULL_RETURN(mapBufferZigZagPerTile);
5219 
5220     m_segStreamInHeight = m_frameHeight;
5221     m_segStreamInWidth = m_frameWidth;
5222 
5223     uint32_t count32 = 0; //Number of 32 by 32 blocks that will be processed here
5224     for (uint32_t curr32YInTile = 0; curr32YInTile< align64Height32; curr32YInTile++)
5225     {
5226         for (uint32_t curr32XInTile = 0; curr32XInTile < align64Width32; curr32XInTile++)
5227         {
5228             mapBufferZigZagPerTile[count32++] = GetSegmentBlockIndexInFrame(
5229                 m_frameWidth,
5230                 curr32XInTile,
5231                 curr32YInTile,
5232                 currTileStartYInFrame,
5233                 currTileStartXInFrame);
5234         }
5235     }
5236 
5237     //    mapBufferZigZagPerTile --->   m_mapBuffer
5238     //  | a b c d ...               ---> | a b W X c d Y Z ....
5239     //  | W X Y Z ...
5240     uint32_t num32blocks = align64Width32 * align64Height32;
5241     uint32_t tileOffsetIndex = m_32BlocksRasterized;
5242     for (uint32_t i = 0, dwRasterCount = 0; i < num32blocks; i += (align64Width32 * 2))
5243     {
5244         for (uint32_t j = i; j < i + (align64Width32 * 2); j += 4)
5245         {
5246             m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
5247             m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
5248         }
5249         for (uint32_t j = i + 2; j < i + (align64Width32 * 2); j += 4)
5250         {
5251             m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
5252             m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
5253         }
5254     }
5255     if (mapBufferZigZagPerTile) // free per tile map buffer as it has been rasterized and copied into the mapbuffer
5256     {
5257         MOS_FreeMemory(mapBufferZigZagPerTile);
5258     }
5259 
5260     // ^ Zig-zag pattern filled to SB aligned (CEIL), if unaligned then we base seg ID address on previous row/column (data replication)
5261     uint32_t width32 = CODECHAL_GET_WIDTH_IN_BLOCKS(tileWidth, 32);
5262     if (width32 != align64Width32) // replicate last column
5263     {
5264         for (auto i = (align64Width32 * 2) - 1 - 2; i < num32blocks; i += (align64Width32 * 2))
5265         {
5266             m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 1];
5267             m_mapBuffer[i + tileOffsetIndex + 2] = m_mapBuffer[i + tileOffsetIndex + 1];
5268         }
5269     }
5270 
5271     uint32_t height32 = CODECHAL_GET_HEIGHT_IN_BLOCKS(tileHeight, 32);
5272     if (height32 != align64Height32) // replicate last row
5273     {
5274         for (auto i = num32blocks - (align64Width32 * 2) + 2; i < num32blocks; i += 4)
5275         {
5276             m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 2];
5277             m_mapBuffer[i + tileOffsetIndex + 1] = m_mapBuffer[i + tileOffsetIndex + 1 - 2];
5278         }
5279     }
5280     //Index offset to be added to the buffer for the next tile depending on how many blocks were rasterized already in this tile
5281     m_32BlocksRasterized += count32;
5282 
5283     return eStatus;
5284 }
5285 
CalculateVdencPictureStateCommandSize()5286 MOS_STATUS CodechalVdencVp9StateG11::CalculateVdencPictureStateCommandSize()
5287 {
5288     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5289 
5290     CODECHAL_ENCODE_FUNCTION_ENTER;
5291 
5292     MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11 stateCmdSizeParams;
5293     uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
5294     stateCmdSizeParams.bHucDummyStream = true;
5295     m_hwInterface->GetHxxStateCommandSize(
5296         CODECHAL_ENCODE_MODE_VP9,
5297         &vdencPictureStatesSize,
5298         &vdencPicturePatchListSize,
5299         &stateCmdSizeParams);
5300 
5301     m_defaultPictureStatesSize += vdencPictureStatesSize;
5302     m_defaultPicturePatchListSize += vdencPicturePatchListSize;
5303 
5304     m_hwInterface->GetVdencStateCommandsDataSize(
5305         CODECHAL_ENCODE_MODE_VP9,
5306         &vdencPictureStatesSize,
5307         &vdencPicturePatchListSize);
5308 
5309     m_defaultPictureStatesSize += vdencPictureStatesSize;
5310     m_defaultPicturePatchListSize += vdencPicturePatchListSize;
5311 
5312     return eStatus;
5313 }
5314 
CreateHcpPipeBufAddrParams(PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams)5315 PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS CodechalVdencVp9StateG11::CreateHcpPipeBufAddrParams(PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams)
5316 {
5317     pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G11);
5318 
5319     return pipeBufAddrParams;
5320 }
5321 
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)5322 MOS_STATUS CodechalVdencVp9StateG11::UpdateCmdBufAttribute(
5323     PMOS_COMMAND_BUFFER cmdBuffer,
5324     bool                renderEngineInUse)
5325 {
5326     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5327 
5328     // should not be there. Will remove it in the next change
5329     CODECHAL_ENCODE_FUNCTION_ENTER;
5330     if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
5331     {
5332         PMOS_CMD_BUF_ATTRI_VE attriExt =
5333             (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
5334 
5335         memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
5336         attriExt->bUseVirtualEngineHint =
5337             attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
5338     }
5339 
5340     return eStatus;
5341 }