1 /*
2 * Copyright (c) 2023, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     encode_pak_integrate_packet.cpp
24 //! \brief    Defines the interface for pak integrate packet
25 //!
26 #include "mos_defs.h"
27 #include "encode_av1_pak_integrate_packet.h"
28 #include "mhw_vdbox.h"
29 #include "encode_av1_brc.h"
30 #include "encode_status_report_defs.h"
31 #include "mos_os_cp_interface_specific.h"
32 
33 
34 #define CODECHAL_ENCODE_DEFAULT_VD_COUNT 2
35 
36 namespace encode {
Init()37     MOS_STATUS Av1PakIntegratePkt::Init()
38     {
39         ENCODE_FUNC_CALL();
40 
41         m_basicFeature = dynamic_cast<Av1BasicFeature *>(m_featureManager->GetFeature(Av1FeatureIDs::basicFeature));
42         ENCODE_CHK_NULL_RETURN(m_basicFeature);
43 
44         ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Init());
45 
46         ENCODE_CHK_NULL_RETURN(m_hwInterface);
47         m_osInterface  = m_hwInterface->GetOsInterface();
48         ENCODE_CHK_NULL_RETURN(m_osInterface);
49 
50         m_miItf = m_hwInterface->GetMiInterfaceNext();
51         ENCODE_CHK_NULL_RETURN(m_miItf);
52 
53         ENCODE_CHK_NULL_RETURN(m_pipeline);
54 #ifdef _MMC_SUPPORTED
55         m_mmcState = m_pipeline->GetMmcState();
56         ENCODE_CHK_NULL_RETURN(m_mmcState);
57 #endif
58 
59         return MOS_STATUS_SUCCESS;
60     }
61 
AllocateResources()62     MOS_STATUS Av1PakIntegratePkt::AllocateResources()
63     {
64         ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::AllocateResources());
65 
66         // Only needed when tile & BRC is enabled, but the size is not changing at frame level
67         if (m_resHucPakStitchDmemBuffer[0][0] == nullptr)
68         {
69             uint8_t *data;
70             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
71 
72             // Pak stitch DMEM
73             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
74             allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
75             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
76             allocParamsForBufferLinear.Format   = Format_Buffer;
77             allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(sizeof(HucPakIntegrateDmem), CODECHAL_CACHELINE_SIZE);
78             allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
79             allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
80 
81             for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
82             {
83                 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
84                 {
85                     m_resHucPakStitchDmemBuffer[k][i] = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
86                 }
87             }
88 
89             if (m_basicFeature->m_enableTileStitchByHW || !m_basicFeature -> m_enableSWStitching)
90             {
91                 // HuC stitching data buffer
92                 allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
93                 allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer";
94                 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE;
95                 MOS_RESOURCE *allocatedBuffer       = nullptr;
96                 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; ++i)
97                 {
98                     for (auto j = 0; j < CODECHAL_VDENC_BRC_NUM_OF_PASSES; ++j)
99                     {
100                         allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
101                         ENCODE_CHK_NULL_RETURN(allocatedBuffer);
102                         m_resHucStitchDataBuffer[i][j] = *allocatedBuffer;
103                     }
104                 }
105 
106                 // Second level batch buffer for HuC stitching CMD
107                 MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
108                 m_HucStitchCmdBatchBuffer.bSecondLevel = true;
109                 ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
110                     m_osInterface,
111                     &m_HucStitchCmdBatchBuffer,
112                     nullptr,
113                     m_hwInterface->m_HucStitchCmdBatchBufferSize));
114                 m_HucStitchCmdBatchBuffer.iSize = m_hwInterface->m_HucStitchCmdBatchBufferSize; // for region dump
115             }
116         }
117 
118         return MOS_STATUS_SUCCESS;
119     }
120 
FreeResources()121     MOS_STATUS Av1PakIntegratePkt::FreeResources()
122     {
123         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
124 
125         ENCODE_FUNC_CALL();
126 
127         eStatus = Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
128         ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
129 
130         return eStatus;
131     }
132 
UpdateParameters()133     void Av1PakIntegratePkt::UpdateParameters()
134     {
135         ENCODE_FUNC_CALL();
136 
137         if (!m_pipeline->IsSingleTaskPhaseSupported())
138         {
139             m_osInterface->pfnResetPerfBufferID(m_osInterface);
140         }
141 
142     }
143 
Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)144     MOS_STATUS Av1PakIntegratePkt::Submit(MOS_COMMAND_BUFFER *commandBuffer, uint8_t packetPhase)
145     {
146         ENCODE_FUNC_CALL();
147 
148         bool firstTaskInPhase = packetPhase & firstPacket;
149         bool requestProlog = !m_pipeline->IsSingleTaskPhaseSupported() || firstTaskInPhase;
150 
151         uint16_t perfTag = CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL;
152         SetPerfTag(perfTag, (uint16_t)m_basicFeature->m_mode, m_basicFeature->m_pictureCodingType);
153 
154         auto brcFeature = dynamic_cast<Av1Brc *>(m_featureManager->GetFeature(Av1FeatureIDs::av1BrcFeature));
155         ENCODE_CHK_NULL_RETURN(brcFeature);
156 
157         ENCODE_CHK_STATUS_RETURN(AddCondBBEndFor2ndPass(*commandBuffer))
158 
159         MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
160         ENCODE_CHK_NULL_RETURN(perfProfiler);
161         ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectStartCmd(
162             (void *)m_pipeline, m_osInterface, m_miItf, commandBuffer));
163 
164         if (m_pipeline->GetPipeNum() > 1)
165         {
166             // Huc basic
167             ENCODE_CHK_STATUS_RETURN(Execute(commandBuffer, true, requestProlog));
168 
169             // Add huc status update to status buffer
170             PMOS_RESOURCE osResource = nullptr;
171             uint32_t offset = 0;
172             ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusRegMask, osResource, offset));
173             ENCODE_CHK_NULL_RETURN(osResource);
174 
175             // Write HUC_STATUS mask
176             auto &storeDataParams            = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
177             storeDataParams                  = {};
178             storeDataParams.pOsResource      = osResource;
179             storeDataParams.dwResourceOffset = offset;
180             storeDataParams.dwValue          = m_hwInterface->GetHucInterfaceNext()->GetHucStatusReEncodeMask();
181             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(commandBuffer));
182 
183             // store HUC_STATUS register
184             osResource = nullptr;
185             offset     = 0;
186             ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusReg, osResource, offset));
187             ENCODE_CHK_NULL_RETURN(osResource);
188             auto mmioRegisters             = m_hucItf->GetMmioRegisters(m_vdboxIndex);
189             auto &storeRegParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
190             storeDataParams                = {};
191             storeRegParams.presStoreBuffer = osResource;
192             storeRegParams.dwOffset        = offset;
193             storeRegParams.dwRegister      = mmioRegisters->hucStatusRegOffset;
194             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(commandBuffer));
195         }
196 
197         // Use HW stitch commands only in the scalable & tile split mode
198         // For single pipe with tile replay, stitch also needed
199         if ((m_basicFeature->m_enableTileStitchByHW || !m_basicFeature->m_enableSWStitching) && m_pipeline->GetPipeNum() > 1)
200         {
201             ENCODE_CHK_STATUS_RETURN(PerformHwStitch(commandBuffer));
202         }
203 
204         ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(*commandBuffer));
205 
206         // ENCODE_CHK_STATUS_RETURN(EndStatusReport(statusReportMfx, commandBuffer));
207 
208         CODECHAL_DEBUG_TOOL(
209             if (m_mmcState) {
210                 m_mmcState->UpdateUserFeatureKey(&(m_basicFeature->m_reconSurface));
211             })
212         // Reset parameters for next PAK execution
213         if (false == m_pipeline->IsFrameTrackingEnabled())
214         {
215             UpdateParameters();
216         }
217 
218         CODECHAL_DEBUG_TOOL
219         (
220             ENCODE_CHK_STATUS_RETURN(DumpInput());
221         )
222 
223         return MOS_STATUS_SUCCESS;
224     }
225 
EndStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)226     MOS_STATUS Av1PakIntegratePkt::EndStatusReport(
227         uint32_t            srType,
228         MOS_COMMAND_BUFFER *cmdBuffer)
229     {
230         ENCODE_FUNC_CALL();
231         ENCODE_CHK_NULL_RETURN(cmdBuffer);
232         ENCODE_CHK_STATUS_RETURN(MediaPacket::EndStatusReportNext(srType, cmdBuffer));
233 
234         MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
235         ENCODE_CHK_NULL_RETURN(perfProfiler);
236         ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd(
237             (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer));
238 
239         return MOS_STATUS_SUCCESS;
240     }
241 
CalculateCommandSize(uint32_t & commandBufferSize,uint32_t & requestedPatchListSize)242     MOS_STATUS Av1PakIntegratePkt::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize)
243     {
244         ENCODE_FUNC_CALL();
245 
246         uint32_t hucCommandsSize = 0;
247         uint32_t hucPatchListSize = 0;
248         MHW_VDBOX_STATE_CMDSIZE_PARAMS stateCmdSizeParams;
249 
250         stateCmdSizeParams.uNumStoreDataImm = 2;
251         stateCmdSizeParams.uNumStoreReg     = 4;
252         stateCmdSizeParams.uNumMfxWait      = 11;
253         stateCmdSizeParams.uNumMiCopy       = 5;
254         stateCmdSizeParams.uNumMiFlush      = 2;
255         stateCmdSizeParams.uNumVdPipelineFlush  = 1;
256         stateCmdSizeParams.bPerformHucStreamOut = true;
257         ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucStateCommandSize(
258             m_basicFeature->m_mode, (uint32_t*)&hucCommandsSize, (uint32_t*)&hucPatchListSize, &stateCmdSizeParams));
259 
260         if ((m_basicFeature->m_enableTileStitchByHW || !m_basicFeature->m_enableSWStitching) && m_pipeline->GetPipeNum() > 1)
261         {
262             uint32_t maxSize = 0;
263             uint32_t patchListMaxSize = 0;
264             ENCODE_CHK_NULL_RETURN(m_hwInterface);
265             ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface());
266             MhwCpInterface *cpInterface = m_hwInterface->GetCpInterface();
267             cpInterface->GetCpStateLevelCmdSize(maxSize, patchListMaxSize);
268             hucCommandsSize     += maxSize;
269             hucPatchListSize    += patchListMaxSize;
270         }
271 
272         commandBufferSize = hucCommandsSize;
273         requestedPatchListSize = m_osInterface->bUsesPatchList ? hucPatchListSize : 0;
274 
275         // reserve cmd size for hw stitch
276         commandBufferSize += m_hwStitchCmdSize;
277 
278         // 4K align since allocation is in chunks of 4K bytes.
279         commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, CODECHAL_PAGE_SIZE);
280 
281         return MOS_STATUS_SUCCESS;
282     }
283 
SetDmemBuffer() const284     MOS_STATUS Av1PakIntegratePkt::SetDmemBuffer() const
285     {
286         ENCODE_FUNC_CALL();
287 
288         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
289 
290         int32_t currentPass = m_pipeline->GetCurrentPass();
291         if (currentPass < 0 || currentPass >= CODECHAL_VDENC_BRC_NUM_OF_PASSES)
292         {
293             eStatus = MOS_STATUS_INVALID_PARAMETER;
294             return eStatus;
295         }
296 
297         HucPakIntegrateDmem *hucPakStitchDmem =
298             (HucPakIntegrateDmem *)m_allocator->LockResourceForWrite(m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]);
299 
300         ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
301         MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakIntegrateDmem));
302 
303         // Reset all the offsets to be shared in the huc dmem (6*5 DW's)
304         MOS_FillMemory(hucPakStitchDmem, 6 * (MAX_PAK_NUM + 1) * sizeof(uint32_t), 0xFF);
305 
306         uint16_t numTileColumns = 1;
307         uint16_t numTileRows    = 1;
308         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
309 
310         uint32_t numTiles = 1;
311         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileNum, numTiles);
312 
313         uint16_t numTilesPerPipe     = (uint16_t)(numTiles / m_pipeline->GetPipeNum());
314         uint16_t imbalanceTilesOnVD0 = (uint16_t)(numTiles % m_pipeline->GetPipeNum());
315 
316         auto brcFeature = dynamic_cast<Av1Brc *>(m_featureManager->GetFeature(Av1FeatureIDs::av1BrcFeature));
317         ENCODE_CHK_NULL_RETURN(brcFeature);
318 
319         hucPakStitchDmem->TotalSizeInCommandBuffer = numTiles * CODECHAL_CACHELINE_SIZE;
320         // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
321         hucPakStitchDmem->OffsetInCommandBuffer = (numTiles - 1) * CODECHAL_CACHELINE_SIZE + 8;
322         hucPakStitchDmem->PicWidthInPixel       = (uint16_t)m_basicFeature->m_frameWidth;
323         hucPakStitchDmem->PicHeightInPixel      = (uint16_t)m_basicFeature->m_frameHeight;
324         hucPakStitchDmem->TotalNumberOfPAKs     = brcFeature->IsBRCEnabled() ? CODECHAL_ENCODE_DEFAULT_VD_COUNT : 0;
325         hucPakStitchDmem->Codec                 = 4;  // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc 4: av1
326 
327         hucPakStitchDmem->MAXPass           = brcFeature->IsBRCEnabled() ? CODECHAL_VDENC_BRC_NUM_OF_PASSES : 1;
328         hucPakStitchDmem->CurrentPass       = (uint8_t)currentPass + 1;  // Current BRC pass [1..MAXPass]
329 
330         hucPakStitchDmem->bitdepth_luma     = m_basicFeature->m_bitDepth;    // default: 8
331         hucPakStitchDmem->bitdepth_chroma   = m_basicFeature->m_bitDepth;  // default: 8
332         hucPakStitchDmem->ChromaFormatIdc   = m_basicFeature->m_outputChromaFormat;
333 
334         uint32_t       lastTileIndex = numTiles - 1;
335         EncodeTileData tileData      = {};
336         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex);
337         hucPakStitchDmem->LastTileBS_StartInBytes = MOS_ALIGN_CEIL(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
338 
339         Av1TileStatusInfo av1TileStatsOffset    = {};
340         Av1TileStatusInfo av1StatsSize          = {};
341         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileStatusInfo, av1TileStatsOffset, av1StatsSize);
342 
343         // tileRecord starts with offset = 0
344         const uint32_t tileRecordOffset = 0;
345 
346         if (m_pipeline->GetPipeNum() > 1)
347         {
348             // set Pak Int output offset at index 0
349             hucPakStitchDmem->TileSizeRecord_offset[0] = tileRecordOffset;
350             hucPakStitchDmem->VDENCSTAT_offset[0]      = brcFeature->IsBRCEnabled() ? av1TileStatsOffset.uiVdencStatistics : 0xFFFFFFFF;
351 
352             // set Pak Int tiles count & input offset for VD0 dedicatedly due to possible extra tile on VD0
353             hucPakStitchDmem->NumTiles[0]  = numTilesPerPipe + imbalanceTilesOnVD0;
354             hucPakStitchDmem->NumSlices[0] = numTilesPerPipe + imbalanceTilesOnVD0;
355 
356             hucPakStitchDmem->TileSizeRecord_offset[1] = tileRecordOffset;
357             hucPakStitchDmem->VDENCSTAT_offset[1]      = av1TileStatsOffset.uiVdencStatistics;
358 
359             for (uint32_t i = 1; i < m_pipeline->GetPipeNum(); i++)
360             {
361                 hucPakStitchDmem->NumTiles[i]  = numTilesPerPipe;
362                 hucPakStitchDmem->NumSlices[i] = numTilesPerPipe;
363 
364                 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
365                 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
366                 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe + imbalanceTilesOnVD0) * CODECHAL_CACHELINE_SIZE + tileRecordOffset;
367                 hucPakStitchDmem->VDENCSTAT_offset[i + 1]      = (i * numTilesPerPipe + imbalanceTilesOnVD0) * av1StatsSize.uiVdencStatistics + av1TileStatsOffset.uiVdencStatistics;
368             }
369         }
370 
371         if ((m_basicFeature->m_enableTileStitchByHW || !m_basicFeature->m_enableSWStitching) && m_pipeline->GetPipeNum() > 1)
372         {
373             hucPakStitchDmem->StitchEnable        = true;
374             hucPakStitchDmem->StitchCommandOffset = 0;
375             hucPakStitchDmem->BBEndforStitch      = HUC_BATCH_BUFFER_END;
376         }
377 
378         m_allocator->UnLock(m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]);
379 
380         return eStatus;
381     }
382 
ReadSseStatistics(MOS_COMMAND_BUFFER & cmdBuffer)383     MOS_STATUS Av1PakIntegratePkt::ReadSseStatistics(MOS_COMMAND_BUFFER &cmdBuffer)
384     {
385         // implement SSE
386         ENCODE_FUNC_CALL();
387 
388         PMOS_RESOURCE osResource = nullptr;
389         uint32_t      offset     = 0;
390 
391         m_statusReport->GetAddress(statusReportSumSquareError, osResource, offset);
392 
393         for (auto i = 0; i < 3; i++)  // 64 bit SSE values for luma/ chroma channels need to be copied
394         {
395             auto &miCpyMemMemParams       = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
396             miCpyMemMemParams             = {};
397             MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr;
398             // to-do: add if condition in av1 vdenc packet so as to read from pak int
399             RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, FeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer);
400             ENCODE_CHK_NULL_RETURN(resHuCPakAggregatedFrameStatsBuffer);
401             bool tiles_enabled = false;
402             RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, FeatureIDs::encodeTile, IsEnabled, tiles_enabled);
403             miCpyMemMemParams.presSrc     = tiles_enabled && m_pipeline->GetPipeNum() > 1 ? resHuCPakAggregatedFrameStatsBuffer : m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0);
404             miCpyMemMemParams.dwSrcOffset = (m_basicFeature->m_hevcPakStatsSSEOffset + i) * sizeof(uint32_t);  // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
405             miCpyMemMemParams.presDst     = osResource;
406             miCpyMemMemParams.dwDstOffset = offset + i * sizeof(uint32_t);
407             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(&cmdBuffer));
408         }
409 
410         return MOS_STATUS_SUCCESS;
411     }
412 
SetupTilesStatusData(void * mfxStatus,void * statusReport)413     MOS_STATUS Av1PakIntegratePkt::SetupTilesStatusData(void *mfxStatus, void *statusReport)
414     {
415         ENCODE_FUNC_CALL();
416 
417         ENCODE_CHK_NULL_RETURN(mfxStatus);
418         ENCODE_CHK_NULL_RETURN(statusReport);
419         ENCODE_CHK_NULL_RETURN(m_basicFeature);
420 
421         EncodeStatusMfx *       encodeStatusMfx  = (EncodeStatusMfx *)mfxStatus;
422         EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
423 
424         uint32_t statBufIdx     = statusReportData->currOriginalPic.FrameIdx;
425         const EncodeReportTileData *tileReportData = nullptr;
426         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetReportTileData, statBufIdx, tileReportData);
427         ENCODE_CHK_NULL_RETURN(tileReportData);
428 
429         statusReportData->codecStatus                                           = CODECHAL_STATUS_SUCCESSFUL;
430         statusReportData->panicMode                                             = false;
431         statusReportData->averageQP                                             = 0;
432         statusReportData->qpY                                                   = 0;
433         statusReportData->suggestedQPYDelta                                     = 0;
434         statusReportData->numberPasses                                          = 1;
435         statusReportData->bitstreamSize                                         = 0;
436         statusReportData->numberSlices                                          = 0;
437         encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0;
438 
439         // Allocate the tile size report memory
440         statusReportData->sizeOfTileInfoBuffer = statusReportData->numberTilesInFrame * sizeof(CodechalTileInfo);
441 
442         MOS_RESOURCE *tileSizeStatusBuffer = nullptr;
443         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, tileSizeStatusBuffer);
444         ENCODE_CHK_NULL_RETURN(tileSizeStatusBuffer);
445 
446         MOS_LOCK_PARAMS lockFlags;
447         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
448         PakHwTileSizeRecord *tileRecord =
449             (PakHwTileSizeRecord *)m_allocator->Lock(tileSizeStatusBuffer, &lockFlags);
450         ENCODE_CHK_NULL_RETURN(tileRecord);
451 
452         uint32_t totalCU    = 0;
453         uint32_t sliceCount = 0;
454         double   sumQp      = 0.0;
455         for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; i++)
456         {
457             if (tileRecord[i].Length == 0)
458             {
459                 statusReportData->codecStatus = CODECHAL_STATUS_INCOMPLETE;
460                 return MOS_STATUS_SUCCESS;
461             }
462 
463             statusReportData->numTileReported                     = i + 1;
464             statusReportData->bitstreamSize += tileRecord[i].Length;
465             totalCU += (tileReportData[i].tileHeightInMinCbMinus1 + 1) * (tileReportData[i].tileWidthInMinCbMinus1 + 1);
466             sumQp += tileRecord[i].Hcp_Qp_Status_Count;
467 
468         }
469 
470         if (statusReportData->bitstreamSize == 0 ||
471             statusReportData->bitstreamSize > m_basicFeature->m_bitstreamSize)
472         {
473             statusReportData->codecStatus   = CODECHAL_STATUS_ERROR;
474             statusReportData->bitstreamSize = 0;
475             return MOS_STATUS_INVALID_FILE_SIZE;
476         }
477 
478         if (totalCU != 0)
479         {
480             statusReportData->qpY = statusReportData->averageQP =
481                 (uint8_t)((sumQp / (double)totalCU) / 4.0);  // due to TU is 4x4 and there are 4 TUs in one CU
482         }
483         else
484         {
485             return MOS_STATUS_INVALID_PARAMETER;
486         }
487 #if 0 // enable after moving sw stitching to pak int
488         if ((!m_basicFeature->m_enableTileStitchByHW || m_basicFeature -> m_enableSWStitching) && m_pipeline->m_dualEncEnable)
489         {
490             ENCODE_CHK_STATUS_RETURN(PerformSwStitch(tileReportData, tileRecord, statusReportData));
491         }
492 #endif
493         if (tileRecord)
494         {
495 #if 0 // enable after moving sw stitching to pak int
496             // clean-up the tile status report buffer
497             MOS_ZeroMemory(tileRecord, sizeof(tileRecord[0]) * statusReportData->numberTilesInFrame);
498 #endif
499             m_allocator->UnLock(tileSizeStatusBuffer);
500         }
501 
502         return MOS_STATUS_SUCCESS;
503     }
504 
Completed(void * mfxStatus,void * rcsStatus,void * statusReport)505     MOS_STATUS Av1PakIntegratePkt::Completed(void *mfxStatus, void *rcsStatus, void *statusReport)
506     {
507         ENCODE_FUNC_CALL();
508 
509         ENCODE_CHK_NULL_RETURN(mfxStatus);
510         ENCODE_CHK_NULL_RETURN(statusReport);
511         ENCODE_CHK_NULL_RETURN(m_basicFeature);
512 
513         EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
514 
515         if (statusReportData->numberTilesInFrame == 1)
516         {
517             // When Tile feature is not enabled, not need following complete options
518             return MOS_STATUS_SUCCESS;
519         }
520 
521         ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Completed(mfxStatus, rcsStatus, statusReport));
522 
523         // Tile status data is only update and performed in multi-pipe mode
524         ENCODE_CHK_STATUS_RETURN(SetupTilesStatusData(mfxStatus, statusReport));
525 
526         m_basicFeature->Reset((CODEC_REF_LIST *)statusReportData->currRefList);
527         return MOS_STATUS_SUCCESS;
528     }
529 
PerformSwStitch(const EncodeReportTileData * tileReportData,PakHwTileSizeRecord * tileRecord,EncodeStatusReportData * statusReportData)530     MOS_STATUS Av1PakIntegratePkt::PerformSwStitch(
531         const EncodeReportTileData *tileReportData,
532         PakHwTileSizeRecord        *tileRecord,
533         EncodeStatusReportData     *statusReportData)
534     {
535         ENCODE_FUNC_CALL();
536 
537         ENCODE_CHK_NULL_RETURN(tileReportData);
538         ENCODE_CHK_NULL_RETURN(tileRecord);
539 
540         uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr;
541         tempBsBuffer = bufPtr = (uint8_t *)MOS_AllocAndZeroMemory(statusReportData->bitstreamSize);
542         ENCODE_CHK_NULL_RETURN(tempBsBuffer);
543 
544         PCODEC_REF_LIST currRefList = (PCODEC_REF_LIST)statusReportData->currRefList;
545 
546         MOS_LOCK_PARAMS lockFlags;
547         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
548         lockFlags.ReadOnly = 1;
549         uint8_t *bitstream = (uint8_t *)m_allocator->Lock(
550             &currRefList->resBitstreamBuffer,
551             &lockFlags);
552         if (bitstream == nullptr)
553         {
554             MOS_FreeMemory(tempBsBuffer);
555             ENCODE_CHK_NULL_RETURN(nullptr);
556         }
557 
558         for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; i++)
559         {
560             uint32_t offset = MOS_ALIGN_CEIL(tileReportData[i].bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, MOS_PAGE_SIZE);
561             uint32_t len    = tileRecord[i].Length;
562 
563             MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
564             bufPtr += len;
565         }
566 
567         MOS_SecureMemcpy(bitstream, statusReportData->bitstreamSize, tempBsBuffer, statusReportData->bitstreamSize);
568         MOS_ZeroMemory(&bitstream[statusReportData->bitstreamSize], m_basicFeature->m_bitstreamSize - statusReportData->bitstreamSize);
569 
570         if (bitstream)
571         {
572             m_allocator->UnLock(&currRefList->resBitstreamBuffer);
573         }
574 
575         MOS_FreeMemory(tempBsBuffer);
576 
577         return MOS_STATUS_SUCCESS;
578     }
579 
PerformHwStitch(PMOS_COMMAND_BUFFER cmdBuffer)580     MOS_STATUS Av1PakIntegratePkt::PerformHwStitch(
581         PMOS_COMMAND_BUFFER cmdBuffer)
582     {
583         ENCODE_FUNC_CALL();
584 
585         // 2nd level BB buffer for stitching cmd
586         // Current location to add cmds in 2nd level batch buffer
587         m_HucStitchCmdBatchBuffer.iCurrent = 0;
588         // Reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
589         m_HucStitchCmdBatchBuffer.dwOffset = 0;
590         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(cmdBuffer, &m_HucStitchCmdBatchBuffer));
591         // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
592         auto &mfxWaitParams               = m_miItf->MHW_GETPAR_F(MFX_WAIT)();
593         mfxWaitParams                     = {};
594         mfxWaitParams.iStallVdboxPipeline = m_osInterface->osCpInterface->IsCpEnabled() ? true : false;
595         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(cmdBuffer));
596 
597         return MOS_STATUS_SUCCESS;
598     }
599 
ConfigStitchDataBuffer() const600     MOS_STATUS Av1PakIntegratePkt::ConfigStitchDataBuffer() const
601     {
602         ENCODE_FUNC_CALL();
603 
604         auto currPass = m_pipeline->GetCurrentPass();
605         HucCommandData *hucStitchDataBuf = (HucCommandData*)m_allocator->LockResourceForWrite(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass]));
606         ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
607 
608         MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
609         hucStitchDataBuf->TotalCommands          = 1;
610         hucStitchDataBuf->InputCOM[0].SizeOfData = 0xf;
611 
612         uint16_t numTileColumns = 1;
613         uint16_t numTileRows    = 1;
614         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
615 
616         HucInputCmd hucInputCmd;
617         MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmd));
618 
619         ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
620         hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
621         hucInputCmd.CmdMode             = HUC_CMD_LIST_MODE;
622         hucInputCmd.LengthOfTable       = numTileRows * numTileColumns;
623         hucInputCmd.CopySize            = m_hwInterface->m_tileRecordSize;
624 
625         // Tile record always in m_tileRecordBuffer even in scalable node
626         uint32_t      statBufIdx = m_basicFeature->m_currOriginalPic.FrameIdx;
627         MOS_RESOURCE *presSrc    = nullptr;
628 
629         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, presSrc);
630         ENCODE_CHK_NULL_RETURN(presSrc);
631 
632         ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
633             m_osInterface,
634             presSrc,
635             false,
636             false));
637 
638         ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
639             m_osInterface,
640             &m_basicFeature->m_resBitstreamBuffer,
641             true,
642             true));
643 
644         uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
645         uint64_t destrAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_basicFeature->m_resBitstreamBuffer);
646         hucInputCmd.SrcAddrBottom  = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
647         hucInputCmd.SrcAddrTop     = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
648         hucInputCmd.DestAddrBottom = (uint32_t)(destrAddr & 0x00000000FFFFFFFF);
649         hucInputCmd.DestAddrTop    = (uint32_t)((destrAddr & 0xFFFFFFFF00000000) >> 32);
650 
651         MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmd), &hucInputCmd, sizeof(HucInputCmd));
652 
653         m_allocator->UnLock(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass]));
654 
655         return MOS_STATUS_SUCCESS;
656     }
657 
AddCondBBEndFor2ndPass(MOS_COMMAND_BUFFER & cmdBuffer)658     MOS_STATUS Av1PakIntegratePkt::AddCondBBEndFor2ndPass(MOS_COMMAND_BUFFER &cmdBuffer)
659     {
660         ENCODE_FUNC_CALL();
661 
662         if (m_pipeline->IsSingleTaskPhaseSupported() || m_pipeline->IsFirstPass() || m_pipeline->GetPassNum() == 1)
663         {
664             return MOS_STATUS_SUCCESS;
665         }
666 
667         auto &miConditionalBatchBufferEndParams = m_miItf->MHW_GETPAR_F(MI_CONDITIONAL_BATCH_BUFFER_END)();
668         miConditionalBatchBufferEndParams       = {};
669 
670         // VDENC uses HuC FW generated semaphore for conditional 2nd pass
671         miConditionalBatchBufferEndParams.presSemaphoreBuffer =
672             m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0);
673 
674         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_CONDITIONAL_BATCH_BUFFER_END)(&cmdBuffer));
675 
676         return MOS_STATUS_SUCCESS;
677     }
678 
MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE,Av1PakIntegratePkt)679     MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE, Av1PakIntegratePkt)
680     {
681         params.kernelDescriptor = m_vdboxHucPakIntKernelDescriptor;
682         return MOS_STATUS_SUCCESS;
683     }
684 
MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE,Av1PakIntegratePkt)685     MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE, Av1PakIntegratePkt)
686     {
687         params.function = PAK_INTEGRATE;
688 
689         ENCODE_CHK_STATUS_RETURN(SetDmemBuffer());
690 
691         int32_t currentPass  = m_pipeline->GetCurrentPass();
692         params.hucDataSource = m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass];
693         params.dataLength    = MOS_ALIGN_CEIL(sizeof(HucPakIntegrateDmem), CODECHAL_CACHELINE_SIZE);
694         params.dmemOffset    = HUC_DMEM_OFFSET_RTOS_GEMS;
695 
696         return MOS_STATUS_SUCCESS;
697     }
698 
MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE,Av1PakIntegratePkt)699     MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE, Av1PakIntegratePkt)
700     {
701         params.function = PAK_INTEGRATE;
702 
703         uint32_t statBufIdx = 0;
704         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetStatisticsBufferIndex, statBufIdx);
705 
706         MOS_RESOURCE *resTileBasedStatisticsBuffer = nullptr;
707         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileBasedStatisticsBuffer, statBufIdx, resTileBasedStatisticsBuffer);
708         MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr;
709         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer);
710         MOS_RESOURCE *resTileRecordBuffer = nullptr;
711         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, resTileRecordBuffer);
712         uint32_t numTiles = 1;
713         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileNum, numTiles);
714         uint32_t       lastTileIndex = numTiles - 1;
715         EncodeTileData tileData      = {};
716         RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex);
717 
718         // Add Virtual addr
719         params.regionParams[0].presRegion = resTileBasedStatisticsBuffer;                 // Region 0 Input - Tile based input statistics from PAK/ VDEnc
720         params.regionParams[0].dwOffset   = 0;
721         params.regionParams[1].presRegion = resHuCPakAggregatedFrameStatsBuffer;          // Region 1 Output - HuC Frame statistics output
722         params.regionParams[1].isWritable = true;
723 
724         params.regionParams[4].presRegion = &m_basicFeature->m_resBitstreamBuffer;        // Region 4 Input - Last Tile bitstream
725         params.regionParams[4].dwOffset   = MOS_ALIGN_CEIL(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
726         params.regionParams[5].presRegion = &m_basicFeature->m_resBitstreamBuffer;        // Region 5 Output - HuC modifies the last tile bitstream before stitch
727         params.regionParams[5].dwOffset   = MOS_ALIGN_CEIL(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
728         params.regionParams[5].isWritable = true;
729         MOS_RESOURCE *resBrcDataBuffer                 = nullptr;
730         RUN_FEATURE_INTERFACE_RETURN(Av1Brc, Av1FeatureIDs::av1BrcFeature, GetBrcDataBuffer, resBrcDataBuffer);
731         params.regionParams[9].presRegion = resBrcDataBuffer;                              // Region 9 Output - HuC outputs BRC data
732         params.regionParams[9].isWritable = true;
733 
734         params.regionParams[15].presRegion = resTileRecordBuffer;
735         params.regionParams[15].dwOffset = 0;
736 
737         if ((m_basicFeature->m_enableTileStitchByHW || !m_basicFeature->m_enableSWStitching) && m_pipeline->GetPipeNum() > 1)
738         {
739             ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
740 
741             uint32_t currentPass               = m_pipeline->GetCurrentPass();
742             params.regionParams[8].presRegion  = const_cast<PMOS_RESOURCE>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]);  // Region 8 - data buffer read by HUC for stitching cmd generation
743             params.regionParams[10].presRegion = const_cast<PMOS_RESOURCE>(&m_HucStitchCmdBatchBuffer.OsResource);  // Region 10 - SLB for stitching cmd output from Huc
744             params.regionParams[10].isWritable = true;
745         }
746 
747         return MOS_STATUS_SUCCESS;
748     }
749 
750 #if USE_CODECHAL_DEBUG_TOOL
DumpInput()751     MOS_STATUS Av1PakIntegratePkt::DumpInput()
752     {
753         ENCODE_FUNC_CALL();
754         int32_t currentPass = m_pipeline->GetCurrentPass();
755 
756         CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
757         ENCODE_CHK_NULL_RETURN(debugInterface);
758 
759         ENCODE_CHK_STATUS_RETURN(debugInterface->DumpHucDmem(
760             m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass],
761             m_vdencHucPakDmemBufferSize,
762             currentPass,
763             hucRegionDumpPakIntegrate));
764 
765         ENCODE_CHK_STATUS_RETURN(DumpRegion(0, "_TileBasedStatistic", true, hucRegionDumpPakIntegrate));
766         ENCODE_CHK_STATUS_RETURN(DumpRegion(4, "_Bitstream", true, hucRegionDumpPakIntegrate, 1024));
767         ENCODE_CHK_STATUS_RETURN(DumpRegion(15, "_TileRecord", true, hucRegionDumpPakIntegrate));
768 
769         return MOS_STATUS_SUCCESS;
770     }
771 
DumpOutput()772     MOS_STATUS Av1PakIntegratePkt::DumpOutput()
773     {
774         ENCODE_FUNC_CALL();
775 
776         ENCODE_CHK_STATUS_RETURN(DumpRegion(0, "_TileBasedStatistic", false, hucRegionDumpPakIntegrate));
777         ENCODE_CHK_STATUS_RETURN(DumpRegion(1, "_HuCPakAggregatedFrameStats", false, hucRegionDumpPakIntegrate));
778         ENCODE_CHK_STATUS_RETURN(DumpRegion(5, "_Bitstream", false, hucRegionDumpPakIntegrate, 1024));
779         ENCODE_CHK_STATUS_RETURN(DumpRegion(9, "_OutputBrcData", false, hucRegionDumpPakIntegrate));
780         ENCODE_CHK_STATUS_RETURN(DumpRegion(10, "_StitchCmd", false, hucRegionDumpPakIntegrate));
781 
782         return MOS_STATUS_SUCCESS;
783     }
784 #endif
785 }
786