1 /*
2 * Copyright (c) 2020-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     encode_vp9_pak_integrate_packet.cpp
24 //! \brief    Defines the interface for vp9 pak integrate packet
25 //!
26 #include "encode_vp9_pak_integrate_packet.h"
27 #include "mos_os_cp_interface_specific.h"
28 
29 namespace encode
30 {
~Vp9PakIntegratePkt()31 Vp9PakIntegratePkt::~Vp9PakIntegratePkt()
32 {
33     FreeResources();
34 }
35 
Init()36 MOS_STATUS Vp9PakIntegratePkt::Init()
37 {
38     ENCODE_FUNC_CALL();
39 
40     m_basicFeature = dynamic_cast<Vp9BasicFeature *>(m_featureManager->GetFeature(Vp9FeatureIDs::basicFeature));
41     ENCODE_CHK_NULL_RETURN(m_basicFeature);
42 
43     m_hcpInterfaceNew = std::static_pointer_cast<mhw::vdbox::hcp::Itf>(m_hwInterface->GetHcpInterfaceNext());
44     ENCODE_CHK_NULL_RETURN(m_hcpInterfaceNew);
45 
46     ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Init());
47 
48     ENCODE_CHK_NULL_RETURN(m_pipeline);
49 #ifdef _MMC_SUPPORTED
50     m_mmcState = m_pipeline->GetMmcState();
51     ENCODE_CHK_NULL_RETURN(m_mmcState);
52 #endif  // __MMC_SUPPORTED
53 
54     return MOS_STATUS_SUCCESS;
55 }
56 
Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)57 MOS_STATUS Vp9PakIntegratePkt::Submit(MOS_COMMAND_BUFFER *commandBuffer, uint8_t packetPhase)
58 {
59     ENCODE_FUNC_CALL();
60 
61     bool firstTaskPhase = packetPhase & firstPacket;
62     bool requestProlog  = false;
63 
64     if (m_basicFeature->m_hucEnabled)
65     {
66         // Huc Basic
67         ENCODE_CHK_STATUS_RETURN(Execute(commandBuffer, true, requestProlog));
68 
69         // Add huc status update to status buffer
70         PMOS_RESOURCE osResource = nullptr;
71         uint32_t      offset     = 0;
72         ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusRegMask, osResource, offset));
73         ENCODE_CHK_NULL_RETURN(osResource);
74 
75         // Write HUC_STATUS mask
76         auto &storeDataParams            = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
77         storeDataParams                  = {};
78         storeDataParams.pOsResource      = osResource;
79         storeDataParams.dwResourceOffset = offset;
80         storeDataParams.dwValue          = m_hwInterface->GetHucInterfaceNext()->GetHucStatusReEncodeMask();
81         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(commandBuffer));
82 
83         // store HUC_STATUS register
84         osResource = nullptr;
85         offset     = 0;
86         ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusReg, osResource, offset));
87         ENCODE_CHK_NULL_RETURN(osResource);
88 
89         ENCODE_CHK_NULL_RETURN(m_hwInterface->GetHucInterfaceNext());
90         auto  mmioRegisters                 = m_hwInterface->GetHucInterfaceNext()->GetMmioRegisters(m_vdboxIndex);
91         auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
92         miStoreRegMemParams                 = {};
93         miStoreRegMemParams.presStoreBuffer = osResource;
94         miStoreRegMemParams.dwOffset        = offset;
95         miStoreRegMemParams.dwRegister      = mmioRegisters->hucStatusRegOffset;
96         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(commandBuffer));
97 
98         if (m_basicFeature->m_enableTileStitchByHW)
99         {
100             // 2nd level BB buffer for stitching cmd
101             // Current location to add cmds in 2nd level batch buffer
102             m_HucStitchCmdBatchBuffer.iCurrent = 0;
103             // Reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
104             m_HucStitchCmdBatchBuffer.dwOffset = 0;
105             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(commandBuffer, &m_HucStitchCmdBatchBuffer));
106             // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
107             auto &mfxWaitParams               = m_miItf->MHW_GETPAR_F(MFX_WAIT)();
108             mfxWaitParams                     = {};
109             mfxWaitParams.iStallVdboxPipeline = m_osInterface->osCpInterface->IsCpEnabled() ? true : false;
110             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(commandBuffer));
111         }
112     }
113 
114     ENCODE_CHK_STATUS_RETURN(EndStatusReport(statusReportMfx, commandBuffer));
115     if (false == m_pipeline->IsFrameTrackingEnabled())
116     {
117         ENCODE_CHK_STATUS_RETURN(UpdateStatusReportNext(statusReportGlobalCount, commandBuffer));
118     }
119 
120     CODECHAL_DEBUG_TOOL(
121         if (m_mmcState) {
122             m_mmcState->UpdateUserFeatureKey(&(m_basicFeature->m_reconSurface));
123         }
124     )
125 
126     // Reset parameters for next PAK execution
127     if (false == m_pipeline->IsFrameTrackingEnabled())
128     {
129         UpdateParameters();
130     }
131 
132     CODECHAL_DEBUG_TOOL(
133         ENCODE_CHK_STATUS_RETURN(DumpInput());
134     )
135 
136     return MOS_STATUS_SUCCESS;
137 }
138 
CalculateCommandSize(uint32_t & commandBufferSize,uint32_t & requestedPatchListSize)139 MOS_STATUS Vp9PakIntegratePkt::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize)
140 {
141     ENCODE_FUNC_CALL();
142 
143     uint32_t                       hucCommandsSize  = 0;
144     uint32_t                       hucPatchListSize = 0;
145     MHW_VDBOX_STATE_CMDSIZE_PARAMS stateCmdSizeParams;
146 
147     ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucStateCommandSize(
148         m_basicFeature->m_mode, (uint32_t *)&hucCommandsSize, (uint32_t *)&hucPatchListSize, &stateCmdSizeParams));
149 
150     commandBufferSize      = hucCommandsSize;
151     requestedPatchListSize = m_osInterface->bUsesPatchList ? hucPatchListSize : 0;
152 
153     // Reserve cmd size for hw stitch
154     commandBufferSize += m_hwStitchCmdSize;
155 
156     // 4K align since allocation is in chunks of 4K bytes.
157     commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, CODECHAL_PAGE_SIZE);
158 
159     return MOS_STATUS_SUCCESS;
160 }
161 
DumpOutput()162 MOS_STATUS Vp9PakIntegratePkt::DumpOutput()
163 {
164     ENCODE_FUNC_CALL();
165 
166 #if USE_CODECHAL_DEBUG_TOOL
167 
168     // Region 1 - HuC Frame statistics output
169     ENCODE_CHK_STATUS_RETURN(DumpRegion(1, "_IntegratedStreamout_output", false, hucRegionDumpPakIntegrate, m_hwInterface->m_pakIntAggregatedFrameStatsSize));
170     // Region 8 - data buffer read by HUC for stitching cmd generation
171     ENCODE_CHK_STATUS_RETURN(DumpRegion(8, "_HucStitchDataBuffer", false, hucRegionDumpPakIntegrate, MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE)));
172     // Region 9 - HuC outputs BRC data
173     ENCODE_CHK_STATUS_RETURN(DumpRegion(9, "_BrcDataOutputBuffer", false, hucRegionDumpPakIntegrate));
174     // Region 10 - SLB for stitching cmd output from Huc
175     ENCODE_CHK_STATUS_RETURN(DumpRegion(10, "_SLBHucStitchCmdBuffer", false, hucRegionDumpPakIntegrate, m_hwInterface->m_HucStitchCmdBatchBufferSize));
176     // Region 15 [In/Out] - Tile Record Buffer
177     ENCODE_CHK_STATUS_RETURN(DumpRegion(15, "_TileRecordBuffer", false, hucRegionDumpPakIntegrate, m_basicFeature->m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterfaceNew->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE)));
178 
179 #endif  // USE_CODECHAL_DEBUG_TOOL
180 
181     return MOS_STATUS_SUCCESS;
182 }
183 
Completed(void * mfxStatus,void * rcsStatus,void * statusReport)184 MOS_STATUS Vp9PakIntegratePkt::Completed(void *mfxStatus, void *rcsStatus, void *statusReport)
185 {
186     ENCODE_FUNC_CALL();
187 
188     ENCODE_CHK_NULL_RETURN(mfxStatus);
189     ENCODE_CHK_NULL_RETURN(statusReport);
190     ENCODE_CHK_NULL_RETURN(m_basicFeature);
191 
192     EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
193 
194     if (statusReportData->numberTilesInFrame == 1 || !m_basicFeature->m_scalableMode)
195     {
196         // When Tile feature is not enabled or not in scalable mode, not need following complete options
197         return MOS_STATUS_SUCCESS;
198     }
199 
200     ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Completed(mfxStatus, rcsStatus, statusReport));
201 
202     // Tile status data is only update and performed in multi-pipe mode
203     ENCODE_CHK_STATUS_RETURN(SetupTilesStatusData(mfxStatus, statusReport));
204 
205     m_basicFeature->Reset((CODEC_REF_LIST *)statusReportData->currRefList);
206 
207     return MOS_STATUS_SUCCESS;
208 }
209 
AllocateResources()210 MOS_STATUS Vp9PakIntegratePkt::AllocateResources()
211 {
212     ENCODE_FUNC_CALL();
213 
214     ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::AllocateResources());
215 
216     if (m_basicFeature->m_hucPakIntBrcDataBuffer == nullptr)
217     {
218         MOS_RESOURCE *          allocatedBuffer = nullptr;
219         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
220         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
221         allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
222         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
223         allocParamsForBufferLinear.Format   = Format_Buffer;
224 
225         // HUC PAK Int DMEM buffer
226         allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(m_hucPakIntDmemBufferSize, CODECHAL_CACHELINE_SIZE);
227         allocParamsForBufferLinear.pBufName = "Huc Pak Int Dmem Buffer";
228         allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
229         for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; ++i)
230         {
231             for (auto j = 0; j < Vp9EncodeBrc::m_brcMaxNumPasses; ++j)
232             {
233                 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
234                 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
235                 m_hucPakIntDmemBuffer[i][j] = *allocatedBuffer;
236             }
237         }
238 
239         // HuC PAK Int region 7, 8
240         allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
241         allocParamsForBufferLinear.pBufName = "HUC PAK Int Dummy Buffer";
242         allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
243         allocatedBuffer                     = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
244         ENCODE_CHK_NULL_RETURN(allocatedBuffer);
245         m_hucPakIntDummyBuffer = *allocatedBuffer;
246 
247         // Allocate region 9 of PAK integration to be fed as input to HuC BRC region 7
248         allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
249         allocParamsForBufferLinear.pBufName = "HUC PAK Integration FrameByteCount output";
250         allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_WRITE;
251         allocatedBuffer                     = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
252         ENCODE_CHK_NULL_RETURN(allocatedBuffer);
253         m_basicFeature->m_hucPakIntBrcDataBuffer = allocatedBuffer;
254 
255         if (m_basicFeature->m_enableTileStitchByHW)
256         {
257             // HuC stitching data buffer
258             allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
259             allocParamsForBufferLinear.pBufName = "VP9 HuC Stitch Data Buffer";
260             allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
261 
262             for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; ++i)
263             {
264                 for (auto j = 0; j < CODECHAL_ENCODE_VP9_BRC_MAX_NUM_OF_PASSES; ++j)
265                 {
266                     allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
267                     ENCODE_CHK_NULL_RETURN(allocatedBuffer);
268                     m_resHucStitchDataBuffer[i][j] = *allocatedBuffer;
269                 }
270             }
271 
272             // Second level batch buffer for HuC stitching CMD
273             MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
274             m_HucStitchCmdBatchBuffer.bSecondLevel = true;
275             ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
276                 m_osInterface,
277                 &m_HucStitchCmdBatchBuffer,
278                 nullptr,
279                 m_hwInterface->m_HucStitchCmdBatchBufferSize));
280         }
281     }
282 
283     return MOS_STATUS_SUCCESS;
284 }
285 
SetDmemBuffer() const286 MOS_STATUS Vp9PakIntegratePkt::SetDmemBuffer() const
287 {
288     ENCODE_FUNC_CALL();
289 
290     auto currentPass = m_pipeline->GetCurrentPass();
291     if (currentPass >= Vp9EncodeBrc::m_brcMaxNumPasses)
292     {
293         return MOS_STATUS_INVALID_PARAMETER;
294     }
295 
296     HucPakIntDmem *dmem = (HucPakIntDmem *)m_allocator->LockResourceForWrite(const_cast<MOS_RESOURCE *>(&m_hucPakIntDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]));
297     ENCODE_CHK_NULL_RETURN(dmem);
298     MOS_ZeroMemory(dmem, sizeof(HucPakIntDmem));
299 
300     MOS_FillMemory(dmem, m_pakIntDmemOffsetsSize, 0xFF);
301 
302     uint16_t numTileColumns = 1;
303     uint16_t numTileRows    = 1;
304     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
305     uint32_t numTiles = 1;
306     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileNum, numTiles);
307 
308     dmem->totalSizeInCommandBuffer = numTiles * CODECHAL_CACHELINE_SIZE;
309     dmem->offsetInCommandBuffer    = 0xFFFF;  // Not used for VP9, all bytes in dmem for fields not used are 0xFF
310     dmem->picWidthInPixel          = (uint16_t)m_basicFeature->m_frameWidth;
311     dmem->picHeightInPixel         = (uint16_t)m_basicFeature->m_frameHeight;
312     dmem->totalNumberOfPaks        = (uint16_t)m_pipeline->GetPipeNum();
313     dmem->codec                    = m_pakIntVp9CodecId;
314     dmem->maxPass                  = Vp9EncodeBrc::m_brcMaxNumPasses;  // Only VDEnc CQP and BRC
315     dmem->currentPass              = currentPass + 1;
316 
317     uint32_t       lastTileIndex = numTiles - 1;
318     EncodeTileData tileData      = {};
319     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex);
320 
321     dmem->lastTileBSStartInBytes = tileData.tileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
322     dmem->picStateStartInBytes   = 0xFFFF;
323 
324     if (m_basicFeature->m_enableTileStitchByHW)
325     {
326         dmem->StitchEnable        = true;
327         dmem->StitchCommandOffset = 0;
328         dmem->BBEndforStitch      = HUC_BATCH_BUFFER_END;
329     }
330 
331     Vp9TileStatusInfo vp9TileStatsOffset  = {};
332     Vp9TileStatusInfo vp9FrameStatsOffset = {};
333     Vp9TileStatusInfo vp9StatsSize        = {};
334     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileStatusInfo, vp9TileStatsOffset, vp9FrameStatsOffset, vp9StatsSize);
335 
336     // Offset 0 is for region 1 - output of integrated frame stats from PAK integration kernel
337 
338     dmem->tileSizeRecordOffset[0]   = vp9FrameStatsOffset.tileSizeRecord;
339     dmem->vdencStatOffset[0]        = vp9FrameStatsOffset.vdencStats;
340     dmem->vp9PakStatOffset[0]       = vp9FrameStatsOffset.pakStats;
341     dmem->vp9CounterBufferOffset[0] = vp9FrameStatsOffset.counterBuffer;
342 
343     uint16_t numTilesPerPipe = (uint16_t)(numTiles / m_pipeline->GetPipeNum());
344 
345     // Offset 1 - 4 is for region 0 - Input to PAK integration kernel for all tile statistics per pipe
346     for (auto i = 1; i <= m_pipeline->GetPipeNum(); ++i)
347     {
348         dmem->numTiles[i - 1]           = numTilesPerPipe;
349         dmem->tileSizeRecordOffset[i]   = vp9TileStatsOffset.tileSizeRecord + ((i - 1) * (dmem->numTiles[i - 1]) * vp9StatsSize.tileSizeRecord);
350         dmem->vdencStatOffset[i]        = vp9TileStatsOffset.vdencStats + ((i - 1) * (dmem->numTiles[i - 1]) * vp9StatsSize.vdencStats);
351         dmem->vp9PakStatOffset[i]       = vp9TileStatsOffset.pakStats + ((i - 1) * (dmem->numTiles[i - 1]) * vp9StatsSize.pakStats);
352         dmem->vp9CounterBufferOffset[i] = vp9TileStatsOffset.counterBuffer + ((i - 1) * (dmem->numTiles[i - 1]) * vp9StatsSize.counterBuffer);
353     }
354 
355     m_allocator->UnLock(const_cast<MOS_RESOURCE *>(&m_hucPakIntDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]));
356 
357     return MOS_STATUS_SUCCESS;
358 }
359 
ReadHcpStatus(MHW_VDBOX_NODE_IND vdboxIndex,MediaStatusReport * statusReport,MOS_COMMAND_BUFFER & cmdBuffer)360 MOS_STATUS Vp9PakIntegratePkt::ReadHcpStatus(MHW_VDBOX_NODE_IND vdboxIndex, MediaStatusReport *statusReport, MOS_COMMAND_BUFFER &cmdBuffer)
361 {
362     ENCODE_FUNC_CALL();
363 
364     ENCODE_CHK_NULL_RETURN(statusReport);
365     ENCODE_CHK_NULL_RETURN(m_hwInterface);
366 
367     MOS_RESOURCE *osResource = nullptr;
368     uint32_t      offset     = 0;
369 
370     EncodeStatusReadParams params;
371     MOS_ZeroMemory(&params, sizeof(params));
372 
373     ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamByteCountPerFrame, osResource, offset));
374     params.resBitstreamByteCountPerFrame    = osResource;
375     params.bitstreamByteCountPerFrameOffset = offset;
376 
377     ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamSyntaxElementOnlyBitCount, osResource, offset));
378     params.resBitstreamSyntaxElementOnlyBitCount    = osResource;
379     params.bitstreamSyntaxElementOnlyBitCountOffset = offset;
380 
381     ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportQPStatusCount, osResource, offset));
382     params.resQpStatusCount    = osResource;
383     params.qpStatusCountOffset = offset;
384 
385     ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusMask, osResource, offset));
386     params.resImageStatusMask    = osResource;
387     params.imageStatusMaskOffset = offset;
388 
389     ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusCtrl, osResource, offset));
390     params.resImageStatusCtrl    = osResource;
391     params.imageStatusCtrlOffset = offset;
392 
393     ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadHcpStatus(vdboxIndex, params, &cmdBuffer));
394 
395     auto mmioRegisters = m_hcpInterfaceNew->GetMmioRegisters(vdboxIndex);
396 
397     auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
398     miStoreRegMemParams                 = {};
399     miStoreRegMemParams.presStoreBuffer = params.resBitstreamByteCountPerFrame;
400     miStoreRegMemParams.dwOffset        = params.bitstreamByteCountPerFrameOffset;
401     miStoreRegMemParams.dwRegister      = mmioRegisters->hcpVp9EncBitstreamBytecountFrameRegOffset;
402     ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
403 
404     ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadImageStatusForHcp(vdboxIndex, params, &cmdBuffer));
405 
406     HucBrcBuffers *hucBrcBuffers = nullptr;
407     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeBrc, Vp9FeatureIDs::vp9BrcFeature, GetHucBrcBuffers, hucBrcBuffers);
408     ENCODE_CHK_NULL_RETURN(hucBrcBuffers);
409 
410     auto &copyMemMemParams       = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
411     copyMemMemParams             = {};
412     copyMemMemParams.presSrc     = params.resBitstreamByteCountPerFrame;
413     copyMemMemParams.dwSrcOffset = params.bitstreamByteCountPerFrameOffset;
414     copyMemMemParams.presDst     = &(hucBrcBuffers->resBrcBitstreamSizeBuffer);
415     copyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(EncodeVp9BSBuffer, dwHcpBitstreamByteCountFrame);
416     ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(&cmdBuffer));
417 
418     return MOS_STATUS_SUCCESS;
419 }
420 
EndStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)421 MOS_STATUS Vp9PakIntegratePkt::EndStatusReport(uint32_t srType, MOS_COMMAND_BUFFER *cmdBuffer)
422 {
423     ENCODE_FUNC_CALL();
424     ENCODE_CHK_NULL_RETURN(cmdBuffer);
425 
426     auto brcFeature = dynamic_cast<Vp9EncodeBrc *>(m_featureManager->GetFeature(Vp9FeatureIDs::vp9BrcFeature));
427     ENCODE_CHK_NULL_RETURN(brcFeature);
428 
429     if (!m_basicFeature->m_scalableMode)
430     {
431         // Single pipe mode can read the info from MMIO register. Otherwise,
432         // we have to use the tile size statistic buffer
433         ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(m_vdboxIndex, m_statusReport, *cmdBuffer));
434     }
435     ENCODE_CHK_STATUS_RETURN(MediaPacket::EndStatusReportNext(srType, cmdBuffer));
436 
437     MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
438     ENCODE_CHK_NULL_RETURN(perfProfiler);
439     ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd(
440         (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer));
441 
442     return MOS_STATUS_SUCCESS;
443 }
444 
UpdateParameters()445 void Vp9PakIntegratePkt::UpdateParameters()
446 {
447     ENCODE_FUNC_CALL();
448 
449     if (!m_pipeline->IsSingleTaskPhaseSupported())
450     {
451         m_osInterface->pfnResetPerfBufferID(m_osInterface);
452     }
453 }
454 
SetupTilesStatusData(void * mfxStatus,void * statusReport)455 MOS_STATUS Vp9PakIntegratePkt::SetupTilesStatusData(void *mfxStatus, void *statusReport)
456 {
457     ENCODE_FUNC_CALL();
458 
459     ENCODE_CHK_NULL_RETURN(mfxStatus);
460     ENCODE_CHK_NULL_RETURN(statusReport);
461     ENCODE_CHK_NULL_RETURN(m_basicFeature);
462 
463     EncodeStatusMfx *       encodeStatusMfx  = (EncodeStatusMfx *)mfxStatus;
464     EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
465 
466     uint32_t                    statBufIdx     = statusReportData->currOriginalPic.FrameIdx;
467     const EncodeReportTileData *tileReportData = nullptr;
468     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetReportTileData, statBufIdx, tileReportData);
469     ENCODE_CHK_NULL_RETURN(tileReportData);
470 
471     MOS_RESOURCE *tileSizeStatusBuffer = nullptr;
472     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, tileSizeStatusBuffer);
473     ENCODE_CHK_NULL_RETURN(tileSizeStatusBuffer);
474 
475     PakHwTileSizeRecord *tileStatusReport =
476         (PakHwTileSizeRecord *)m_allocator->LockResourceForRead(tileSizeStatusBuffer);
477     ENCODE_CHK_NULL_RETURN(tileStatusReport);
478 
479     statusReportData->codecStatus       = CODECHAL_STATUS_SUCCESSFUL;
480     statusReportData->panicMode         = false;
481     statusReportData->averageQP         = 0;
482     statusReportData->qpY               = 0;
483     statusReportData->suggestedQPYDelta = 0;
484     statusReportData->numberPasses      = 1;
485     statusReportData->bitstreamSize     = 0;
486 
487     encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0;
488 
489     double   sumQP   = 0.0;
490     uint32_t totalCU = 0;
491     for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; ++i)
492     {
493         if (tileStatusReport[i].Length == 0)
494         {
495             statusReportData->codecStatus = CODECHAL_STATUS_INCOMPLETE;
496             // Clean-up the tile status report buffer
497             MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
498             m_allocator->UnLock(tileSizeStatusBuffer);
499             return MOS_STATUS_SUCCESS;
500         }
501         statusReportData->bitstreamSize += tileStatusReport[i].Length;
502         totalCU += (tileReportData[i].tileHeightInMinCbMinus1 + 1) * (tileReportData[i].tileWidthInMinCbMinus1 + 1);
503         sumQP += tileStatusReport[i].Hcp_Qp_Status_Count;
504     }
505 
506     if (totalCU != 0)
507     {
508         statusReportData->qpY = statusReportData->averageQP =
509             (uint8_t)((sumQP / (double)totalCU) / 4.0);  // Due to TU is 4x4 and there are 4 TUs in one CU
510     }
511     else
512     {
513         // Clean-up the tile status report buffer
514         MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
515         m_allocator->UnLock(tileSizeStatusBuffer);
516         return MOS_STATUS_INVALID_PARAMETER;
517     }
518 
519     if (m_basicFeature->m_enableTileStitchByHW)
520     {
521         // Clean-up the tile status report buffer
522         MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
523         m_allocator->UnLock(tileSizeStatusBuffer);
524         return MOS_STATUS_SUCCESS;
525     }
526 
527     uint8_t *bufPtr       = (uint8_t *)MOS_AllocAndZeroMemory(statusReportData->bitstreamSize);
528     uint8_t *tempBsBuffer = bufPtr;
529 
530     auto tempTerminateFunc = [&]()
531     {
532         MOS_SafeFreeMemory(tempBsBuffer);
533 
534         // Clean-up the tile status report buffer
535         MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
536         m_allocator->UnLock(tileSizeStatusBuffer);
537     };
538     ENCODE_CHK_NULL_WITH_DESTROY_RETURN_VALUE(tempBsBuffer, tempTerminateFunc);
539 
540     PCODEC_REF_LIST currRefList = (PCODEC_REF_LIST)statusReportData->currRefList;
541     ENCODE_CHK_NULL_WITH_DESTROY_RETURN_VALUE(currRefList, tempTerminateFunc);
542     uint8_t *bitstream = (uint8_t *)m_allocator->LockResourceForWrite(&currRefList->resBitstreamBuffer);
543     ENCODE_CHK_NULL_WITH_DESTROY_RETURN_VALUE(bitstream, tempTerminateFunc);
544 
545     for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; ++i)
546     {
547         uint32_t offset = tileReportData[i].bitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
548         uint32_t len    = tileStatusReport[i].Length;
549         if (offset + len >= m_basicFeature->m_bitstreamSize)
550         {
551             ENCODE_ASSERTMESSAGE("Error: Tile offset and length add up to more than bitstream upper bound");
552             statusReportData->codecStatus   = CODECHAL_STATUS_ERROR;
553             statusReportData->bitstreamSize = 0;
554 
555             MOS_FreeMemory(tempBsBuffer);
556             m_allocator->UnLock(&currRefList->resBitstreamBuffer);
557 
558             // Clean-up the tile status report buffer
559             MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
560             m_allocator->UnLock(tileSizeStatusBuffer);
561 
562             return MOS_STATUS_INVALID_FILE_SIZE;
563         }
564 
565         MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
566         bufPtr += len;
567     }
568 
569     MOS_SecureMemcpy(bitstream, statusReportData->bitstreamSize, tempBsBuffer, statusReportData->bitstreamSize);
570     MOS_ZeroMemory(&bitstream[statusReportData->bitstreamSize], m_basicFeature->m_bitstreamSize - statusReportData->bitstreamSize);
571 
572     MOS_FreeMemory(tempBsBuffer);
573     m_allocator->UnLock(&currRefList->resBitstreamBuffer);
574 
575     // Clean-up the tile status report buffer
576     MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
577     m_allocator->UnLock(tileSizeStatusBuffer);
578 
579     return MOS_STATUS_SUCCESS;
580 }
581 
FreeResources()582 MOS_STATUS Vp9PakIntegratePkt::FreeResources()
583 {
584     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
585 
586     ENCODE_FUNC_CALL();
587 
588     eStatus = Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
589     ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
590 
591     return eStatus;
592 }
593 
ConfigStitchDataBuffer() const594 MOS_STATUS Vp9PakIntegratePkt::ConfigStitchDataBuffer() const
595 {
596     ENCODE_FUNC_CALL();
597 
598     auto currPass = m_pipeline->GetCurrentPass();
599     HucCommandData *hucStitchDataBuf = (HucCommandData*)m_allocator->LockResourceForWrite(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass]));
600     ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
601 
602     MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
603     hucStitchDataBuf->TotalCommands          = 1;
604     hucStitchDataBuf->InputCOM[0].SizeOfData = 0xf;
605 
606     uint16_t numTileColumns = 1;
607     uint16_t numTileRows    = 1;
608     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
609 
610     HucInputCmdG12 hucInputCmd;
611     MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG12));
612 
613     ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
614     hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
615     hucInputCmd.CmdMode             = HUC_CMD_LIST_MODE;
616     hucInputCmd.LengthOfTable       = numTileRows * numTileColumns;
617     hucInputCmd.CopySize            = m_hwInterface->m_tileRecordSize;
618 
619     // Tile record always in m_tileRecordBuffer even in scalable node
620     uint32_t      statBufIdx = m_basicFeature->m_currOriginalPic.FrameIdx;
621     MOS_RESOURCE *presSrc    = nullptr;
622 
623     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, presSrc);
624     ENCODE_CHK_NULL_RETURN(presSrc);
625 
626     ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
627         m_osInterface,
628         presSrc,
629         false,
630         false));
631 
632     ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
633         m_osInterface,
634         &m_basicFeature->m_resBitstreamBuffer,
635         true,
636         true));
637 
638     uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
639     uint64_t destrAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_basicFeature->m_resBitstreamBuffer);
640     hucInputCmd.SrcAddrBottom  = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
641     hucInputCmd.SrcAddrTop     = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
642     hucInputCmd.DestAddrBottom = (uint32_t)(destrAddr & 0x00000000FFFFFFFF);
643     hucInputCmd.DestAddrTop    = (uint32_t)((destrAddr & 0xFFFFFFFF00000000) >> 32);
644 
645     MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG12), &hucInputCmd, sizeof(HucInputCmdG12));
646 
647     m_allocator->UnLock(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass]));
648 
649     return MOS_STATUS_SUCCESS;
650 }
651 
652 #if USE_CODECHAL_DEBUG_TOOL
DumpInput()653 MOS_STATUS Vp9PakIntegratePkt::DumpInput()
654 {
655     ENCODE_FUNC_CALL();
656 
657     int32_t currentPass = m_pipeline->GetCurrentPass();
658 
659     CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
660     ENCODE_CHK_NULL_RETURN(debugInterface);
661 
662     ENCODE_CHK_STATUS_RETURN(debugInterface->DumpHucDmem(
663         &m_hucPakIntDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass],
664         m_hucPakIntDmemBufferSize,
665         currentPass,
666         hucRegionDumpPakIntegrate));
667 
668     // Region 0 - Tile based input statistics from PAK/ VDEnc
669     ENCODE_CHK_STATUS_RETURN(DumpRegion(0, "_PakIntStitchBuffer", true, hucRegionDumpPakIntegrate, m_hwInterface->m_pakIntTileStatsSize));
670     // Region 15 [In/Out] - Tile Record Buffer
671     ENCODE_CHK_STATUS_RETURN(DumpRegion(15, "_TileRecordBuffer", true, hucRegionDumpPakIntegrate, m_basicFeature->m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterfaceNew->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE)));
672 
673     return MOS_STATUS_SUCCESS;
674 }
675 #endif  // USE_CODECHAL_DEBUG_TOOL
676 
MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE,Vp9PakIntegratePkt)677 MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE, Vp9PakIntegratePkt)
678 {
679     ENCODE_FUNC_CALL();
680 
681     params.kernelDescriptor = m_vdboxHucPakIntegrationKernelDescriptor;
682 
683     return MOS_STATUS_SUCCESS;
684 }
685 
MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE,Vp9PakIntegratePkt)686 MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE, Vp9PakIntegratePkt)
687 {
688     ENCODE_FUNC_CALL();
689 
690     ENCODE_CHK_STATUS_RETURN(SetDmemBuffer());
691 
692     params.function = PAK_INTEGRATE;
693     uint32_t currentPass = m_pipeline->GetCurrentPass();
694     params.hucDataSource = const_cast<PMOS_RESOURCE>(&m_hucPakIntDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]);
695     params.dataLength    = MOS_ALIGN_CEIL(m_hucPakIntDmemBufferSize, CODECHAL_CACHELINE_SIZE);
696     params.dmemOffset    = HUC_DMEM_OFFSET_RTOS_GEMS;
697 
698     return MOS_STATUS_SUCCESS;
699 }
700 
MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE,Vp9PakIntegratePkt)701 MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE, Vp9PakIntegratePkt)
702 {
703     ENCODE_FUNC_CALL();
704 
705     params.function = PAK_INTEGRATE;
706 
707     if (m_basicFeature->m_enableTileStitchByHW)
708     {
709         ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
710     }
711 
712     uint32_t currentPass = m_pipeline->GetCurrentPass();
713 
714     // Region 0, 1, 15
715     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, SetRegionsForPakInt, params);
716 
717     params.regionParams[4].presRegion              = const_cast<PMOS_RESOURCE>(&m_hucPakIntDummyBuffer);  // Region 4 - Not used for VP9
718     params.regionParams[5].presRegion              = const_cast<PMOS_RESOURCE>(&m_hucPakIntDummyBuffer);  // Region 5 - Not used for VP9
719     params.regionParams[5].isWritable              = true;
720     params.regionParams[6].presRegion              = const_cast<PMOS_RESOURCE>(&m_hucPakIntDummyBuffer);  // Region 6 - Not used for VP9
721     params.regionParams[6].isWritable              = true;
722     params.regionParams[7].presRegion              = const_cast<PMOS_RESOURCE>(&m_hucPakIntDummyBuffer);  // Region 7 - Not used for VP9
723 
724     if (m_basicFeature->m_enableTileStitchByHW)
725     {
726         params.regionParams[8].presRegion               = const_cast<PMOS_RESOURCE>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]);  // Region 8 - data buffer read by HUC for stitching cmd generation
727         params.regionParams[8].isWritable               = true;
728         params.regionParams[10].presRegion              = const_cast<PMOS_RESOURCE>(&m_HucStitchCmdBatchBuffer.OsResource);  // Region 10 - SLB for stitching cmd output from Huc
729         params.regionParams[10].isWritable              = true;
730     }
731 
732     params.regionParams[9].presRegion              = m_basicFeature->m_hucPakIntBrcDataBuffer;  // Region 9 - HuC outputs BRC data
733     params.regionParams[9].isWritable = true;
734 
735     return MOS_STATUS_SUCCESS;
736 }
737 
738 }  // namespace encode
739