1 /* 2 * Copyright (c) 2023, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 //! 23 //! \file encode_pak_integrate_packet.cpp 24 //! \brief Defines the interface for pak integrate packet 25 //! 26 #include "mos_defs.h" 27 #include "encode_av1_pak_integrate_packet.h" 28 #include "mhw_vdbox.h" 29 #include "encode_av1_brc.h" 30 #include "encode_status_report_defs.h" 31 #include "mos_os_cp_interface_specific.h" 32 33 34 #define CODECHAL_ENCODE_DEFAULT_VD_COUNT 2 35 36 namespace encode { Init()37 MOS_STATUS Av1PakIntegratePkt::Init() 38 { 39 ENCODE_FUNC_CALL(); 40 41 m_basicFeature = dynamic_cast<Av1BasicFeature *>(m_featureManager->GetFeature(Av1FeatureIDs::basicFeature)); 42 ENCODE_CHK_NULL_RETURN(m_basicFeature); 43 44 ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Init()); 45 46 ENCODE_CHK_NULL_RETURN(m_hwInterface); 47 m_osInterface = m_hwInterface->GetOsInterface(); 48 ENCODE_CHK_NULL_RETURN(m_osInterface); 49 50 m_miItf = m_hwInterface->GetMiInterfaceNext(); 51 ENCODE_CHK_NULL_RETURN(m_miItf); 52 53 ENCODE_CHK_NULL_RETURN(m_pipeline); 54 #ifdef _MMC_SUPPORTED 55 m_mmcState = m_pipeline->GetMmcState(); 56 ENCODE_CHK_NULL_RETURN(m_mmcState); 57 #endif 58 59 return MOS_STATUS_SUCCESS; 60 } 61 AllocateResources()62 MOS_STATUS Av1PakIntegratePkt::AllocateResources() 63 { 64 ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::AllocateResources()); 65 66 // Only needed when tile & BRC is enabled, but the size is not changing at frame level 67 if (m_resHucPakStitchDmemBuffer[0][0] == nullptr) 68 { 69 uint8_t *data; 70 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; 71 72 // Pak stitch DMEM 73 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); 74 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; 75 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; 76 allocParamsForBufferLinear.Format = Format_Buffer; 77 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakIntegrateDmem), CODECHAL_CACHELINE_SIZE); 78 allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer"; 79 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE; 80 81 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++) 82 { 83 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++) 84 { 85 m_resHucPakStitchDmemBuffer[k][i] = m_allocator->AllocateResource(allocParamsForBufferLinear, true); 86 } 87 } 88 89 if (m_basicFeature->m_enableTileStitchByHW || !m_basicFeature -> m_enableSWStitching) 90 { 91 // HuC stitching data buffer 92 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE); 93 allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer"; 94 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE; 95 MOS_RESOURCE *allocatedBuffer = nullptr; 96 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; ++i) 97 { 98 for (auto j = 0; j < CODECHAL_VDENC_BRC_NUM_OF_PASSES; ++j) 99 { 100 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true); 101 ENCODE_CHK_NULL_RETURN(allocatedBuffer); 102 m_resHucStitchDataBuffer[i][j] = *allocatedBuffer; 103 } 104 } 105 106 // Second level batch buffer for HuC stitching CMD 107 MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer)); 108 m_HucStitchCmdBatchBuffer.bSecondLevel = true; 109 ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( 110 m_osInterface, 111 &m_HucStitchCmdBatchBuffer, 112 nullptr, 113 m_hwInterface->m_HucStitchCmdBatchBufferSize)); 114 m_HucStitchCmdBatchBuffer.iSize = m_hwInterface->m_HucStitchCmdBatchBufferSize; // for region dump 115 } 116 } 117 118 return MOS_STATUS_SUCCESS; 119 } 120 FreeResources()121 MOS_STATUS Av1PakIntegratePkt::FreeResources() 122 { 123 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 124 125 ENCODE_FUNC_CALL(); 126 127 eStatus = Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr); 128 ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS); 129 130 return eStatus; 131 } 132 UpdateParameters()133 void Av1PakIntegratePkt::UpdateParameters() 134 { 135 ENCODE_FUNC_CALL(); 136 137 if (!m_pipeline->IsSingleTaskPhaseSupported()) 138 { 139 m_osInterface->pfnResetPerfBufferID(m_osInterface); 140 } 141 142 } 143 Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)144 MOS_STATUS Av1PakIntegratePkt::Submit(MOS_COMMAND_BUFFER *commandBuffer, uint8_t packetPhase) 145 { 146 ENCODE_FUNC_CALL(); 147 148 bool firstTaskInPhase = packetPhase & firstPacket; 149 bool requestProlog = !m_pipeline->IsSingleTaskPhaseSupported() || firstTaskInPhase; 150 151 uint16_t perfTag = CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL; 152 SetPerfTag(perfTag, (uint16_t)m_basicFeature->m_mode, m_basicFeature->m_pictureCodingType); 153 154 auto brcFeature = dynamic_cast<Av1Brc *>(m_featureManager->GetFeature(Av1FeatureIDs::av1BrcFeature)); 155 ENCODE_CHK_NULL_RETURN(brcFeature); 156 157 ENCODE_CHK_STATUS_RETURN(AddCondBBEndFor2ndPass(*commandBuffer)) 158 159 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance(); 160 ENCODE_CHK_NULL_RETURN(perfProfiler); 161 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectStartCmd( 162 (void *)m_pipeline, m_osInterface, m_miItf, commandBuffer)); 163 164 if (m_pipeline->GetPipeNum() > 1) 165 { 166 // Huc basic 167 ENCODE_CHK_STATUS_RETURN(Execute(commandBuffer, true, requestProlog)); 168 169 // Add huc status update to status buffer 170 PMOS_RESOURCE osResource = nullptr; 171 uint32_t offset = 0; 172 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusRegMask, osResource, offset)); 173 ENCODE_CHK_NULL_RETURN(osResource); 174 175 // Write HUC_STATUS mask 176 auto &storeDataParams = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)(); 177 storeDataParams = {}; 178 storeDataParams.pOsResource = osResource; 179 storeDataParams.dwResourceOffset = offset; 180 storeDataParams.dwValue = m_hwInterface->GetHucInterfaceNext()->GetHucStatusReEncodeMask(); 181 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(commandBuffer)); 182 183 // store HUC_STATUS register 184 osResource = nullptr; 185 offset = 0; 186 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusReg, osResource, offset)); 187 ENCODE_CHK_NULL_RETURN(osResource); 188 auto mmioRegisters = m_hucItf->GetMmioRegisters(m_vdboxIndex); 189 auto &storeRegParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)(); 190 storeDataParams = {}; 191 storeRegParams.presStoreBuffer = osResource; 192 storeRegParams.dwOffset = offset; 193 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset; 194 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(commandBuffer)); 195 } 196 197 // Use HW stitch commands only in the scalable & tile split mode 198 // For single pipe with tile replay, stitch also needed 199 if ((m_basicFeature->m_enableTileStitchByHW || !m_basicFeature->m_enableSWStitching) && m_pipeline->GetPipeNum() > 1) 200 { 201 ENCODE_CHK_STATUS_RETURN(PerformHwStitch(commandBuffer)); 202 } 203 204 ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(*commandBuffer)); 205 206 // ENCODE_CHK_STATUS_RETURN(EndStatusReport(statusReportMfx, commandBuffer)); 207 208 CODECHAL_DEBUG_TOOL( 209 if (m_mmcState) { 210 m_mmcState->UpdateUserFeatureKey(&(m_basicFeature->m_reconSurface)); 211 }) 212 // Reset parameters for next PAK execution 213 if (false == m_pipeline->IsFrameTrackingEnabled()) 214 { 215 UpdateParameters(); 216 } 217 218 CODECHAL_DEBUG_TOOL 219 ( 220 ENCODE_CHK_STATUS_RETURN(DumpInput()); 221 ) 222 223 return MOS_STATUS_SUCCESS; 224 } 225 EndStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)226 MOS_STATUS Av1PakIntegratePkt::EndStatusReport( 227 uint32_t srType, 228 MOS_COMMAND_BUFFER *cmdBuffer) 229 { 230 ENCODE_FUNC_CALL(); 231 ENCODE_CHK_NULL_RETURN(cmdBuffer); 232 ENCODE_CHK_STATUS_RETURN(MediaPacket::EndStatusReportNext(srType, cmdBuffer)); 233 234 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance(); 235 ENCODE_CHK_NULL_RETURN(perfProfiler); 236 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd( 237 (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer)); 238 239 return MOS_STATUS_SUCCESS; 240 } 241 CalculateCommandSize(uint32_t & commandBufferSize,uint32_t & requestedPatchListSize)242 MOS_STATUS Av1PakIntegratePkt::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize) 243 { 244 ENCODE_FUNC_CALL(); 245 246 uint32_t hucCommandsSize = 0; 247 uint32_t hucPatchListSize = 0; 248 MHW_VDBOX_STATE_CMDSIZE_PARAMS stateCmdSizeParams; 249 250 stateCmdSizeParams.uNumStoreDataImm = 2; 251 stateCmdSizeParams.uNumStoreReg = 4; 252 stateCmdSizeParams.uNumMfxWait = 11; 253 stateCmdSizeParams.uNumMiCopy = 5; 254 stateCmdSizeParams.uNumMiFlush = 2; 255 stateCmdSizeParams.uNumVdPipelineFlush = 1; 256 stateCmdSizeParams.bPerformHucStreamOut = true; 257 ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucStateCommandSize( 258 m_basicFeature->m_mode, (uint32_t*)&hucCommandsSize, (uint32_t*)&hucPatchListSize, &stateCmdSizeParams)); 259 260 if ((m_basicFeature->m_enableTileStitchByHW || !m_basicFeature->m_enableSWStitching) && m_pipeline->GetPipeNum() > 1) 261 { 262 uint32_t maxSize = 0; 263 uint32_t patchListMaxSize = 0; 264 ENCODE_CHK_NULL_RETURN(m_hwInterface); 265 ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface()); 266 MhwCpInterface *cpInterface = m_hwInterface->GetCpInterface(); 267 cpInterface->GetCpStateLevelCmdSize(maxSize, patchListMaxSize); 268 hucCommandsSize += maxSize; 269 hucPatchListSize += patchListMaxSize; 270 } 271 272 commandBufferSize = hucCommandsSize; 273 requestedPatchListSize = m_osInterface->bUsesPatchList ? hucPatchListSize : 0; 274 275 // reserve cmd size for hw stitch 276 commandBufferSize += m_hwStitchCmdSize; 277 278 // 4K align since allocation is in chunks of 4K bytes. 279 commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, CODECHAL_PAGE_SIZE); 280 281 return MOS_STATUS_SUCCESS; 282 } 283 SetDmemBuffer() const284 MOS_STATUS Av1PakIntegratePkt::SetDmemBuffer() const 285 { 286 ENCODE_FUNC_CALL(); 287 288 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 289 290 int32_t currentPass = m_pipeline->GetCurrentPass(); 291 if (currentPass < 0 || currentPass >= CODECHAL_VDENC_BRC_NUM_OF_PASSES) 292 { 293 eStatus = MOS_STATUS_INVALID_PARAMETER; 294 return eStatus; 295 } 296 297 HucPakIntegrateDmem *hucPakStitchDmem = 298 (HucPakIntegrateDmem *)m_allocator->LockResourceForWrite(m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]); 299 300 ENCODE_CHK_NULL_RETURN(hucPakStitchDmem); 301 MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakIntegrateDmem)); 302 303 // Reset all the offsets to be shared in the huc dmem (6*5 DW's) 304 MOS_FillMemory(hucPakStitchDmem, 6 * (MAX_PAK_NUM + 1) * sizeof(uint32_t), 0xFF); 305 306 uint16_t numTileColumns = 1; 307 uint16_t numTileRows = 1; 308 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns); 309 310 uint32_t numTiles = 1; 311 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileNum, numTiles); 312 313 uint16_t numTilesPerPipe = (uint16_t)(numTiles / m_pipeline->GetPipeNum()); 314 uint16_t imbalanceTilesOnVD0 = (uint16_t)(numTiles % m_pipeline->GetPipeNum()); 315 316 auto brcFeature = dynamic_cast<Av1Brc *>(m_featureManager->GetFeature(Av1FeatureIDs::av1BrcFeature)); 317 ENCODE_CHK_NULL_RETURN(brcFeature); 318 319 hucPakStitchDmem->TotalSizeInCommandBuffer = numTiles * CODECHAL_CACHELINE_SIZE; 320 // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record 321 hucPakStitchDmem->OffsetInCommandBuffer = (numTiles - 1) * CODECHAL_CACHELINE_SIZE + 8; 322 hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_basicFeature->m_frameWidth; 323 hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_basicFeature->m_frameHeight; 324 hucPakStitchDmem->TotalNumberOfPAKs = brcFeature->IsBRCEnabled() ? CODECHAL_ENCODE_DEFAULT_VD_COUNT : 0; 325 hucPakStitchDmem->Codec = 4; // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc 4: av1 326 327 hucPakStitchDmem->MAXPass = brcFeature->IsBRCEnabled() ? CODECHAL_VDENC_BRC_NUM_OF_PASSES : 1; 328 hucPakStitchDmem->CurrentPass = (uint8_t)currentPass + 1; // Current BRC pass [1..MAXPass] 329 330 hucPakStitchDmem->bitdepth_luma = m_basicFeature->m_bitDepth; // default: 8 331 hucPakStitchDmem->bitdepth_chroma = m_basicFeature->m_bitDepth; // default: 8 332 hucPakStitchDmem->ChromaFormatIdc = m_basicFeature->m_outputChromaFormat; 333 334 uint32_t lastTileIndex = numTiles - 1; 335 EncodeTileData tileData = {}; 336 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex); 337 hucPakStitchDmem->LastTileBS_StartInBytes = MOS_ALIGN_CEIL(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); 338 339 Av1TileStatusInfo av1TileStatsOffset = {}; 340 Av1TileStatusInfo av1StatsSize = {}; 341 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileStatusInfo, av1TileStatsOffset, av1StatsSize); 342 343 // tileRecord starts with offset = 0 344 const uint32_t tileRecordOffset = 0; 345 346 if (m_pipeline->GetPipeNum() > 1) 347 { 348 // set Pak Int output offset at index 0 349 hucPakStitchDmem->TileSizeRecord_offset[0] = tileRecordOffset; 350 hucPakStitchDmem->VDENCSTAT_offset[0] = brcFeature->IsBRCEnabled() ? av1TileStatsOffset.uiVdencStatistics : 0xFFFFFFFF; 351 352 // set Pak Int tiles count & input offset for VD0 dedicatedly due to possible extra tile on VD0 353 hucPakStitchDmem->NumTiles[0] = numTilesPerPipe + imbalanceTilesOnVD0; 354 hucPakStitchDmem->NumSlices[0] = numTilesPerPipe + imbalanceTilesOnVD0; 355 356 hucPakStitchDmem->TileSizeRecord_offset[1] = tileRecordOffset; 357 hucPakStitchDmem->VDENCSTAT_offset[1] = av1TileStatsOffset.uiVdencStatistics; 358 359 for (uint32_t i = 1; i < m_pipeline->GetPipeNum(); i++) 360 { 361 hucPakStitchDmem->NumTiles[i] = numTilesPerPipe; 362 hucPakStitchDmem->NumSlices[i] = numTilesPerPipe; 363 364 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic. 365 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region. 366 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe + imbalanceTilesOnVD0) * CODECHAL_CACHELINE_SIZE + tileRecordOffset; 367 hucPakStitchDmem->VDENCSTAT_offset[i + 1] = (i * numTilesPerPipe + imbalanceTilesOnVD0) * av1StatsSize.uiVdencStatistics + av1TileStatsOffset.uiVdencStatistics; 368 } 369 } 370 371 if ((m_basicFeature->m_enableTileStitchByHW || !m_basicFeature->m_enableSWStitching) && m_pipeline->GetPipeNum() > 1) 372 { 373 hucPakStitchDmem->StitchEnable = true; 374 hucPakStitchDmem->StitchCommandOffset = 0; 375 hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END; 376 } 377 378 m_allocator->UnLock(m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]); 379 380 return eStatus; 381 } 382 ReadSseStatistics(MOS_COMMAND_BUFFER & cmdBuffer)383 MOS_STATUS Av1PakIntegratePkt::ReadSseStatistics(MOS_COMMAND_BUFFER &cmdBuffer) 384 { 385 // implement SSE 386 ENCODE_FUNC_CALL(); 387 388 PMOS_RESOURCE osResource = nullptr; 389 uint32_t offset = 0; 390 391 m_statusReport->GetAddress(statusReportSumSquareError, osResource, offset); 392 393 for (auto i = 0; i < 3; i++) // 64 bit SSE values for luma/ chroma channels need to be copied 394 { 395 auto &miCpyMemMemParams = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)(); 396 miCpyMemMemParams = {}; 397 MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr; 398 // to-do: add if condition in av1 vdenc packet so as to read from pak int 399 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, FeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer); 400 ENCODE_CHK_NULL_RETURN(resHuCPakAggregatedFrameStatsBuffer); 401 bool tiles_enabled = false; 402 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, FeatureIDs::encodeTile, IsEnabled, tiles_enabled); 403 miCpyMemMemParams.presSrc = tiles_enabled && m_pipeline->GetPipeNum() > 1 ? resHuCPakAggregatedFrameStatsBuffer : m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0); 404 miCpyMemMemParams.dwSrcOffset = (m_basicFeature->m_hevcPakStatsSSEOffset + i) * sizeof(uint32_t); // SSE luma offset is located at DW32 in Frame statistics, followed by chroma 405 miCpyMemMemParams.presDst = osResource; 406 miCpyMemMemParams.dwDstOffset = offset + i * sizeof(uint32_t); 407 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(&cmdBuffer)); 408 } 409 410 return MOS_STATUS_SUCCESS; 411 } 412 SetupTilesStatusData(void * mfxStatus,void * statusReport)413 MOS_STATUS Av1PakIntegratePkt::SetupTilesStatusData(void *mfxStatus, void *statusReport) 414 { 415 ENCODE_FUNC_CALL(); 416 417 ENCODE_CHK_NULL_RETURN(mfxStatus); 418 ENCODE_CHK_NULL_RETURN(statusReport); 419 ENCODE_CHK_NULL_RETURN(m_basicFeature); 420 421 EncodeStatusMfx * encodeStatusMfx = (EncodeStatusMfx *)mfxStatus; 422 EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport; 423 424 uint32_t statBufIdx = statusReportData->currOriginalPic.FrameIdx; 425 const EncodeReportTileData *tileReportData = nullptr; 426 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetReportTileData, statBufIdx, tileReportData); 427 ENCODE_CHK_NULL_RETURN(tileReportData); 428 429 statusReportData->codecStatus = CODECHAL_STATUS_SUCCESSFUL; 430 statusReportData->panicMode = false; 431 statusReportData->averageQP = 0; 432 statusReportData->qpY = 0; 433 statusReportData->suggestedQPYDelta = 0; 434 statusReportData->numberPasses = 1; 435 statusReportData->bitstreamSize = 0; 436 statusReportData->numberSlices = 0; 437 encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0; 438 439 // Allocate the tile size report memory 440 statusReportData->sizeOfTileInfoBuffer = statusReportData->numberTilesInFrame * sizeof(CodechalTileInfo); 441 442 MOS_RESOURCE *tileSizeStatusBuffer = nullptr; 443 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, tileSizeStatusBuffer); 444 ENCODE_CHK_NULL_RETURN(tileSizeStatusBuffer); 445 446 MOS_LOCK_PARAMS lockFlags; 447 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); 448 PakHwTileSizeRecord *tileRecord = 449 (PakHwTileSizeRecord *)m_allocator->Lock(tileSizeStatusBuffer, &lockFlags); 450 ENCODE_CHK_NULL_RETURN(tileRecord); 451 452 uint32_t totalCU = 0; 453 uint32_t sliceCount = 0; 454 double sumQp = 0.0; 455 for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; i++) 456 { 457 if (tileRecord[i].Length == 0) 458 { 459 statusReportData->codecStatus = CODECHAL_STATUS_INCOMPLETE; 460 return MOS_STATUS_SUCCESS; 461 } 462 463 statusReportData->numTileReported = i + 1; 464 statusReportData->bitstreamSize += tileRecord[i].Length; 465 totalCU += (tileReportData[i].tileHeightInMinCbMinus1 + 1) * (tileReportData[i].tileWidthInMinCbMinus1 + 1); 466 sumQp += tileRecord[i].Hcp_Qp_Status_Count; 467 468 } 469 470 if (statusReportData->bitstreamSize == 0 || 471 statusReportData->bitstreamSize > m_basicFeature->m_bitstreamSize) 472 { 473 statusReportData->codecStatus = CODECHAL_STATUS_ERROR; 474 statusReportData->bitstreamSize = 0; 475 return MOS_STATUS_INVALID_FILE_SIZE; 476 } 477 478 if (totalCU != 0) 479 { 480 statusReportData->qpY = statusReportData->averageQP = 481 (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU 482 } 483 else 484 { 485 return MOS_STATUS_INVALID_PARAMETER; 486 } 487 #if 0 // enable after moving sw stitching to pak int 488 if ((!m_basicFeature->m_enableTileStitchByHW || m_basicFeature -> m_enableSWStitching) && m_pipeline->m_dualEncEnable) 489 { 490 ENCODE_CHK_STATUS_RETURN(PerformSwStitch(tileReportData, tileRecord, statusReportData)); 491 } 492 #endif 493 if (tileRecord) 494 { 495 #if 0 // enable after moving sw stitching to pak int 496 // clean-up the tile status report buffer 497 MOS_ZeroMemory(tileRecord, sizeof(tileRecord[0]) * statusReportData->numberTilesInFrame); 498 #endif 499 m_allocator->UnLock(tileSizeStatusBuffer); 500 } 501 502 return MOS_STATUS_SUCCESS; 503 } 504 Completed(void * mfxStatus,void * rcsStatus,void * statusReport)505 MOS_STATUS Av1PakIntegratePkt::Completed(void *mfxStatus, void *rcsStatus, void *statusReport) 506 { 507 ENCODE_FUNC_CALL(); 508 509 ENCODE_CHK_NULL_RETURN(mfxStatus); 510 ENCODE_CHK_NULL_RETURN(statusReport); 511 ENCODE_CHK_NULL_RETURN(m_basicFeature); 512 513 EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport; 514 515 if (statusReportData->numberTilesInFrame == 1) 516 { 517 // When Tile feature is not enabled, not need following complete options 518 return MOS_STATUS_SUCCESS; 519 } 520 521 ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Completed(mfxStatus, rcsStatus, statusReport)); 522 523 // Tile status data is only update and performed in multi-pipe mode 524 ENCODE_CHK_STATUS_RETURN(SetupTilesStatusData(mfxStatus, statusReport)); 525 526 m_basicFeature->Reset((CODEC_REF_LIST *)statusReportData->currRefList); 527 return MOS_STATUS_SUCCESS; 528 } 529 PerformSwStitch(const EncodeReportTileData * tileReportData,PakHwTileSizeRecord * tileRecord,EncodeStatusReportData * statusReportData)530 MOS_STATUS Av1PakIntegratePkt::PerformSwStitch( 531 const EncodeReportTileData *tileReportData, 532 PakHwTileSizeRecord *tileRecord, 533 EncodeStatusReportData *statusReportData) 534 { 535 ENCODE_FUNC_CALL(); 536 537 ENCODE_CHK_NULL_RETURN(tileReportData); 538 ENCODE_CHK_NULL_RETURN(tileRecord); 539 540 uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr; 541 tempBsBuffer = bufPtr = (uint8_t *)MOS_AllocAndZeroMemory(statusReportData->bitstreamSize); 542 ENCODE_CHK_NULL_RETURN(tempBsBuffer); 543 544 PCODEC_REF_LIST currRefList = (PCODEC_REF_LIST)statusReportData->currRefList; 545 546 MOS_LOCK_PARAMS lockFlags; 547 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); 548 lockFlags.ReadOnly = 1; 549 uint8_t *bitstream = (uint8_t *)m_allocator->Lock( 550 &currRefList->resBitstreamBuffer, 551 &lockFlags); 552 if (bitstream == nullptr) 553 { 554 MOS_FreeMemory(tempBsBuffer); 555 ENCODE_CHK_NULL_RETURN(nullptr); 556 } 557 558 for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; i++) 559 { 560 uint32_t offset = MOS_ALIGN_CEIL(tileReportData[i].bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, MOS_PAGE_SIZE); 561 uint32_t len = tileRecord[i].Length; 562 563 MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len); 564 bufPtr += len; 565 } 566 567 MOS_SecureMemcpy(bitstream, statusReportData->bitstreamSize, tempBsBuffer, statusReportData->bitstreamSize); 568 MOS_ZeroMemory(&bitstream[statusReportData->bitstreamSize], m_basicFeature->m_bitstreamSize - statusReportData->bitstreamSize); 569 570 if (bitstream) 571 { 572 m_allocator->UnLock(&currRefList->resBitstreamBuffer); 573 } 574 575 MOS_FreeMemory(tempBsBuffer); 576 577 return MOS_STATUS_SUCCESS; 578 } 579 PerformHwStitch(PMOS_COMMAND_BUFFER cmdBuffer)580 MOS_STATUS Av1PakIntegratePkt::PerformHwStitch( 581 PMOS_COMMAND_BUFFER cmdBuffer) 582 { 583 ENCODE_FUNC_CALL(); 584 585 // 2nd level BB buffer for stitching cmd 586 // Current location to add cmds in 2nd level batch buffer 587 m_HucStitchCmdBatchBuffer.iCurrent = 0; 588 // Reset starting location (offset) executing 2nd level batch buffer for each frame & each pass 589 m_HucStitchCmdBatchBuffer.dwOffset = 0; 590 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(cmdBuffer, &m_HucStitchCmdBatchBuffer)); 591 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases 592 auto &mfxWaitParams = m_miItf->MHW_GETPAR_F(MFX_WAIT)(); 593 mfxWaitParams = {}; 594 mfxWaitParams.iStallVdboxPipeline = m_osInterface->osCpInterface->IsCpEnabled() ? true : false; 595 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(cmdBuffer)); 596 597 return MOS_STATUS_SUCCESS; 598 } 599 ConfigStitchDataBuffer() const600 MOS_STATUS Av1PakIntegratePkt::ConfigStitchDataBuffer() const 601 { 602 ENCODE_FUNC_CALL(); 603 604 auto currPass = m_pipeline->GetCurrentPass(); 605 HucCommandData *hucStitchDataBuf = (HucCommandData*)m_allocator->LockResourceForWrite(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass])); 606 ENCODE_CHK_NULL_RETURN(hucStitchDataBuf); 607 608 MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData)); 609 hucStitchDataBuf->TotalCommands = 1; 610 hucStitchDataBuf->InputCOM[0].SizeOfData = 0xf; 611 612 uint16_t numTileColumns = 1; 613 uint16_t numTileRows = 1; 614 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns); 615 616 HucInputCmd hucInputCmd; 617 MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmd)); 618 619 ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface); 620 hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0; 621 hucInputCmd.CmdMode = HUC_CMD_LIST_MODE; 622 hucInputCmd.LengthOfTable = numTileRows * numTileColumns; 623 hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize; 624 625 // Tile record always in m_tileRecordBuffer even in scalable node 626 uint32_t statBufIdx = m_basicFeature->m_currOriginalPic.FrameIdx; 627 MOS_RESOURCE *presSrc = nullptr; 628 629 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, presSrc); 630 ENCODE_CHK_NULL_RETURN(presSrc); 631 632 ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource( 633 m_osInterface, 634 presSrc, 635 false, 636 false)); 637 638 ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource( 639 m_osInterface, 640 &m_basicFeature->m_resBitstreamBuffer, 641 true, 642 true)); 643 644 uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc); 645 uint64_t destrAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_basicFeature->m_resBitstreamBuffer); 646 hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF); 647 hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32); 648 hucInputCmd.DestAddrBottom = (uint32_t)(destrAddr & 0x00000000FFFFFFFF); 649 hucInputCmd.DestAddrTop = (uint32_t)((destrAddr & 0xFFFFFFFF00000000) >> 32); 650 651 MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmd), &hucInputCmd, sizeof(HucInputCmd)); 652 653 m_allocator->UnLock(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass])); 654 655 return MOS_STATUS_SUCCESS; 656 } 657 AddCondBBEndFor2ndPass(MOS_COMMAND_BUFFER & cmdBuffer)658 MOS_STATUS Av1PakIntegratePkt::AddCondBBEndFor2ndPass(MOS_COMMAND_BUFFER &cmdBuffer) 659 { 660 ENCODE_FUNC_CALL(); 661 662 if (m_pipeline->IsSingleTaskPhaseSupported() || m_pipeline->IsFirstPass() || m_pipeline->GetPassNum() == 1) 663 { 664 return MOS_STATUS_SUCCESS; 665 } 666 667 auto &miConditionalBatchBufferEndParams = m_miItf->MHW_GETPAR_F(MI_CONDITIONAL_BATCH_BUFFER_END)(); 668 miConditionalBatchBufferEndParams = {}; 669 670 // VDENC uses HuC FW generated semaphore for conditional 2nd pass 671 miConditionalBatchBufferEndParams.presSemaphoreBuffer = 672 m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0); 673 674 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_CONDITIONAL_BATCH_BUFFER_END)(&cmdBuffer)); 675 676 return MOS_STATUS_SUCCESS; 677 } 678 MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE,Av1PakIntegratePkt)679 MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE, Av1PakIntegratePkt) 680 { 681 params.kernelDescriptor = m_vdboxHucPakIntKernelDescriptor; 682 return MOS_STATUS_SUCCESS; 683 } 684 MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE,Av1PakIntegratePkt)685 MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE, Av1PakIntegratePkt) 686 { 687 params.function = PAK_INTEGRATE; 688 689 ENCODE_CHK_STATUS_RETURN(SetDmemBuffer()); 690 691 int32_t currentPass = m_pipeline->GetCurrentPass(); 692 params.hucDataSource = m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]; 693 params.dataLength = MOS_ALIGN_CEIL(sizeof(HucPakIntegrateDmem), CODECHAL_CACHELINE_SIZE); 694 params.dmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; 695 696 return MOS_STATUS_SUCCESS; 697 } 698 MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE,Av1PakIntegratePkt)699 MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE, Av1PakIntegratePkt) 700 { 701 params.function = PAK_INTEGRATE; 702 703 uint32_t statBufIdx = 0; 704 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetStatisticsBufferIndex, statBufIdx); 705 706 MOS_RESOURCE *resTileBasedStatisticsBuffer = nullptr; 707 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileBasedStatisticsBuffer, statBufIdx, resTileBasedStatisticsBuffer); 708 MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr; 709 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer); 710 MOS_RESOURCE *resTileRecordBuffer = nullptr; 711 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, resTileRecordBuffer); 712 uint32_t numTiles = 1; 713 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileNum, numTiles); 714 uint32_t lastTileIndex = numTiles - 1; 715 EncodeTileData tileData = {}; 716 RUN_FEATURE_INTERFACE_RETURN(Av1EncodeTile, Av1FeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex); 717 718 // Add Virtual addr 719 params.regionParams[0].presRegion = resTileBasedStatisticsBuffer; // Region 0 Input - Tile based input statistics from PAK/ VDEnc 720 params.regionParams[0].dwOffset = 0; 721 params.regionParams[1].presRegion = resHuCPakAggregatedFrameStatsBuffer; // Region 1 Output - HuC Frame statistics output 722 params.regionParams[1].isWritable = true; 723 724 params.regionParams[4].presRegion = &m_basicFeature->m_resBitstreamBuffer; // Region 4 Input - Last Tile bitstream 725 params.regionParams[4].dwOffset = MOS_ALIGN_CEIL(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); 726 params.regionParams[5].presRegion = &m_basicFeature->m_resBitstreamBuffer; // Region 5 Output - HuC modifies the last tile bitstream before stitch 727 params.regionParams[5].dwOffset = MOS_ALIGN_CEIL(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); 728 params.regionParams[5].isWritable = true; 729 MOS_RESOURCE *resBrcDataBuffer = nullptr; 730 RUN_FEATURE_INTERFACE_RETURN(Av1Brc, Av1FeatureIDs::av1BrcFeature, GetBrcDataBuffer, resBrcDataBuffer); 731 params.regionParams[9].presRegion = resBrcDataBuffer; // Region 9 Output - HuC outputs BRC data 732 params.regionParams[9].isWritable = true; 733 734 params.regionParams[15].presRegion = resTileRecordBuffer; 735 params.regionParams[15].dwOffset = 0; 736 737 if ((m_basicFeature->m_enableTileStitchByHW || !m_basicFeature->m_enableSWStitching) && m_pipeline->GetPipeNum() > 1) 738 { 739 ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer()); 740 741 uint32_t currentPass = m_pipeline->GetCurrentPass(); 742 params.regionParams[8].presRegion = const_cast<PMOS_RESOURCE>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]); // Region 8 - data buffer read by HUC for stitching cmd generation 743 params.regionParams[10].presRegion = const_cast<PMOS_RESOURCE>(&m_HucStitchCmdBatchBuffer.OsResource); // Region 10 - SLB for stitching cmd output from Huc 744 params.regionParams[10].isWritable = true; 745 } 746 747 return MOS_STATUS_SUCCESS; 748 } 749 750 #if USE_CODECHAL_DEBUG_TOOL DumpInput()751 MOS_STATUS Av1PakIntegratePkt::DumpInput() 752 { 753 ENCODE_FUNC_CALL(); 754 int32_t currentPass = m_pipeline->GetCurrentPass(); 755 756 CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface(); 757 ENCODE_CHK_NULL_RETURN(debugInterface); 758 759 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpHucDmem( 760 m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass], 761 m_vdencHucPakDmemBufferSize, 762 currentPass, 763 hucRegionDumpPakIntegrate)); 764 765 ENCODE_CHK_STATUS_RETURN(DumpRegion(0, "_TileBasedStatistic", true, hucRegionDumpPakIntegrate)); 766 ENCODE_CHK_STATUS_RETURN(DumpRegion(4, "_Bitstream", true, hucRegionDumpPakIntegrate, 1024)); 767 ENCODE_CHK_STATUS_RETURN(DumpRegion(15, "_TileRecord", true, hucRegionDumpPakIntegrate)); 768 769 return MOS_STATUS_SUCCESS; 770 } 771 DumpOutput()772 MOS_STATUS Av1PakIntegratePkt::DumpOutput() 773 { 774 ENCODE_FUNC_CALL(); 775 776 ENCODE_CHK_STATUS_RETURN(DumpRegion(0, "_TileBasedStatistic", false, hucRegionDumpPakIntegrate)); 777 ENCODE_CHK_STATUS_RETURN(DumpRegion(1, "_HuCPakAggregatedFrameStats", false, hucRegionDumpPakIntegrate)); 778 ENCODE_CHK_STATUS_RETURN(DumpRegion(5, "_Bitstream", false, hucRegionDumpPakIntegrate, 1024)); 779 ENCODE_CHK_STATUS_RETURN(DumpRegion(9, "_OutputBrcData", false, hucRegionDumpPakIntegrate)); 780 ENCODE_CHK_STATUS_RETURN(DumpRegion(10, "_StitchCmd", false, hucRegionDumpPakIntegrate)); 781 782 return MOS_STATUS_SUCCESS; 783 } 784 #endif 785 } 786