1 /*
2 * Copyright (c) 2020-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file encode_vp9_pak_integrate_packet.cpp
24 //! \brief Defines the interface for vp9 pak integrate packet
25 //!
26 #include "encode_vp9_pak_integrate_packet.h"
27 #include "mos_os_cp_interface_specific.h"
28
29 namespace encode
30 {
~Vp9PakIntegratePkt()31 Vp9PakIntegratePkt::~Vp9PakIntegratePkt()
32 {
33 FreeResources();
34 }
35
Init()36 MOS_STATUS Vp9PakIntegratePkt::Init()
37 {
38 ENCODE_FUNC_CALL();
39
40 m_basicFeature = dynamic_cast<Vp9BasicFeature *>(m_featureManager->GetFeature(Vp9FeatureIDs::basicFeature));
41 ENCODE_CHK_NULL_RETURN(m_basicFeature);
42
43 m_hcpInterfaceNew = std::static_pointer_cast<mhw::vdbox::hcp::Itf>(m_hwInterface->GetHcpInterfaceNext());
44 ENCODE_CHK_NULL_RETURN(m_hcpInterfaceNew);
45
46 ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Init());
47
48 ENCODE_CHK_NULL_RETURN(m_pipeline);
49 #ifdef _MMC_SUPPORTED
50 m_mmcState = m_pipeline->GetMmcState();
51 ENCODE_CHK_NULL_RETURN(m_mmcState);
52 #endif // __MMC_SUPPORTED
53
54 return MOS_STATUS_SUCCESS;
55 }
56
Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)57 MOS_STATUS Vp9PakIntegratePkt::Submit(MOS_COMMAND_BUFFER *commandBuffer, uint8_t packetPhase)
58 {
59 ENCODE_FUNC_CALL();
60
61 bool firstTaskPhase = packetPhase & firstPacket;
62 bool requestProlog = false;
63
64 if (m_basicFeature->m_hucEnabled)
65 {
66 // Huc Basic
67 ENCODE_CHK_STATUS_RETURN(Execute(commandBuffer, true, requestProlog));
68
69 // Add huc status update to status buffer
70 PMOS_RESOURCE osResource = nullptr;
71 uint32_t offset = 0;
72 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusRegMask, osResource, offset));
73 ENCODE_CHK_NULL_RETURN(osResource);
74
75 // Write HUC_STATUS mask
76 auto &storeDataParams = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
77 storeDataParams = {};
78 storeDataParams.pOsResource = osResource;
79 storeDataParams.dwResourceOffset = offset;
80 storeDataParams.dwValue = m_hwInterface->GetHucInterfaceNext()->GetHucStatusReEncodeMask();
81 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(commandBuffer));
82
83 // store HUC_STATUS register
84 osResource = nullptr;
85 offset = 0;
86 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusReg, osResource, offset));
87 ENCODE_CHK_NULL_RETURN(osResource);
88
89 ENCODE_CHK_NULL_RETURN(m_hwInterface->GetHucInterfaceNext());
90 auto mmioRegisters = m_hwInterface->GetHucInterfaceNext()->GetMmioRegisters(m_vdboxIndex);
91 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
92 miStoreRegMemParams = {};
93 miStoreRegMemParams.presStoreBuffer = osResource;
94 miStoreRegMemParams.dwOffset = offset;
95 miStoreRegMemParams.dwRegister = mmioRegisters->hucStatusRegOffset;
96 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(commandBuffer));
97
98 if (m_basicFeature->m_enableTileStitchByHW)
99 {
100 // 2nd level BB buffer for stitching cmd
101 // Current location to add cmds in 2nd level batch buffer
102 m_HucStitchCmdBatchBuffer.iCurrent = 0;
103 // Reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
104 m_HucStitchCmdBatchBuffer.dwOffset = 0;
105 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(commandBuffer, &m_HucStitchCmdBatchBuffer));
106 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
107 auto &mfxWaitParams = m_miItf->MHW_GETPAR_F(MFX_WAIT)();
108 mfxWaitParams = {};
109 mfxWaitParams.iStallVdboxPipeline = m_osInterface->osCpInterface->IsCpEnabled() ? true : false;
110 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(commandBuffer));
111 }
112 }
113
114 ENCODE_CHK_STATUS_RETURN(EndStatusReport(statusReportMfx, commandBuffer));
115 if (false == m_pipeline->IsFrameTrackingEnabled())
116 {
117 ENCODE_CHK_STATUS_RETURN(UpdateStatusReportNext(statusReportGlobalCount, commandBuffer));
118 }
119
120 CODECHAL_DEBUG_TOOL(
121 if (m_mmcState) {
122 m_mmcState->UpdateUserFeatureKey(&(m_basicFeature->m_reconSurface));
123 }
124 )
125
126 // Reset parameters for next PAK execution
127 if (false == m_pipeline->IsFrameTrackingEnabled())
128 {
129 UpdateParameters();
130 }
131
132 CODECHAL_DEBUG_TOOL(
133 ENCODE_CHK_STATUS_RETURN(DumpInput());
134 )
135
136 return MOS_STATUS_SUCCESS;
137 }
138
CalculateCommandSize(uint32_t & commandBufferSize,uint32_t & requestedPatchListSize)139 MOS_STATUS Vp9PakIntegratePkt::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize)
140 {
141 ENCODE_FUNC_CALL();
142
143 uint32_t hucCommandsSize = 0;
144 uint32_t hucPatchListSize = 0;
145 MHW_VDBOX_STATE_CMDSIZE_PARAMS stateCmdSizeParams;
146
147 ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucStateCommandSize(
148 m_basicFeature->m_mode, (uint32_t *)&hucCommandsSize, (uint32_t *)&hucPatchListSize, &stateCmdSizeParams));
149
150 commandBufferSize = hucCommandsSize;
151 requestedPatchListSize = m_osInterface->bUsesPatchList ? hucPatchListSize : 0;
152
153 // Reserve cmd size for hw stitch
154 commandBufferSize += m_hwStitchCmdSize;
155
156 // 4K align since allocation is in chunks of 4K bytes.
157 commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, CODECHAL_PAGE_SIZE);
158
159 return MOS_STATUS_SUCCESS;
160 }
161
DumpOutput()162 MOS_STATUS Vp9PakIntegratePkt::DumpOutput()
163 {
164 ENCODE_FUNC_CALL();
165
166 #if USE_CODECHAL_DEBUG_TOOL
167
168 // Region 1 - HuC Frame statistics output
169 ENCODE_CHK_STATUS_RETURN(DumpRegion(1, "_IntegratedStreamout_output", false, hucRegionDumpPakIntegrate, m_hwInterface->m_pakIntAggregatedFrameStatsSize));
170 // Region 8 - data buffer read by HUC for stitching cmd generation
171 ENCODE_CHK_STATUS_RETURN(DumpRegion(8, "_HucStitchDataBuffer", false, hucRegionDumpPakIntegrate, MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE)));
172 // Region 9 - HuC outputs BRC data
173 ENCODE_CHK_STATUS_RETURN(DumpRegion(9, "_BrcDataOutputBuffer", false, hucRegionDumpPakIntegrate));
174 // Region 10 - SLB for stitching cmd output from Huc
175 ENCODE_CHK_STATUS_RETURN(DumpRegion(10, "_SLBHucStitchCmdBuffer", false, hucRegionDumpPakIntegrate, m_hwInterface->m_HucStitchCmdBatchBufferSize));
176 // Region 15 [In/Out] - Tile Record Buffer
177 ENCODE_CHK_STATUS_RETURN(DumpRegion(15, "_TileRecordBuffer", false, hucRegionDumpPakIntegrate, m_basicFeature->m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterfaceNew->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE)));
178
179 #endif // USE_CODECHAL_DEBUG_TOOL
180
181 return MOS_STATUS_SUCCESS;
182 }
183
Completed(void * mfxStatus,void * rcsStatus,void * statusReport)184 MOS_STATUS Vp9PakIntegratePkt::Completed(void *mfxStatus, void *rcsStatus, void *statusReport)
185 {
186 ENCODE_FUNC_CALL();
187
188 ENCODE_CHK_NULL_RETURN(mfxStatus);
189 ENCODE_CHK_NULL_RETURN(statusReport);
190 ENCODE_CHK_NULL_RETURN(m_basicFeature);
191
192 EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
193
194 if (statusReportData->numberTilesInFrame == 1 || !m_basicFeature->m_scalableMode)
195 {
196 // When Tile feature is not enabled or not in scalable mode, not need following complete options
197 return MOS_STATUS_SUCCESS;
198 }
199
200 ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Completed(mfxStatus, rcsStatus, statusReport));
201
202 // Tile status data is only update and performed in multi-pipe mode
203 ENCODE_CHK_STATUS_RETURN(SetupTilesStatusData(mfxStatus, statusReport));
204
205 m_basicFeature->Reset((CODEC_REF_LIST *)statusReportData->currRefList);
206
207 return MOS_STATUS_SUCCESS;
208 }
209
AllocateResources()210 MOS_STATUS Vp9PakIntegratePkt::AllocateResources()
211 {
212 ENCODE_FUNC_CALL();
213
214 ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::AllocateResources());
215
216 if (m_basicFeature->m_hucPakIntBrcDataBuffer == nullptr)
217 {
218 MOS_RESOURCE * allocatedBuffer = nullptr;
219 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
220 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
221 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
222 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
223 allocParamsForBufferLinear.Format = Format_Buffer;
224
225 // HUC PAK Int DMEM buffer
226 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hucPakIntDmemBufferSize, CODECHAL_CACHELINE_SIZE);
227 allocParamsForBufferLinear.pBufName = "Huc Pak Int Dmem Buffer";
228 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
229 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; ++i)
230 {
231 for (auto j = 0; j < Vp9EncodeBrc::m_brcMaxNumPasses; ++j)
232 {
233 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
234 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
235 m_hucPakIntDmemBuffer[i][j] = *allocatedBuffer;
236 }
237 }
238
239 // HuC PAK Int region 7, 8
240 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
241 allocParamsForBufferLinear.pBufName = "HUC PAK Int Dummy Buffer";
242 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
243 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
244 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
245 m_hucPakIntDummyBuffer = *allocatedBuffer;
246
247 // Allocate region 9 of PAK integration to be fed as input to HuC BRC region 7
248 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
249 allocParamsForBufferLinear.pBufName = "HUC PAK Integration FrameByteCount output";
250 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_WRITE;
251 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
252 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
253 m_basicFeature->m_hucPakIntBrcDataBuffer = allocatedBuffer;
254
255 if (m_basicFeature->m_enableTileStitchByHW)
256 {
257 // HuC stitching data buffer
258 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
259 allocParamsForBufferLinear.pBufName = "VP9 HuC Stitch Data Buffer";
260 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
261
262 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; ++i)
263 {
264 for (auto j = 0; j < CODECHAL_ENCODE_VP9_BRC_MAX_NUM_OF_PASSES; ++j)
265 {
266 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
267 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
268 m_resHucStitchDataBuffer[i][j] = *allocatedBuffer;
269 }
270 }
271
272 // Second level batch buffer for HuC stitching CMD
273 MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
274 m_HucStitchCmdBatchBuffer.bSecondLevel = true;
275 ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
276 m_osInterface,
277 &m_HucStitchCmdBatchBuffer,
278 nullptr,
279 m_hwInterface->m_HucStitchCmdBatchBufferSize));
280 }
281 }
282
283 return MOS_STATUS_SUCCESS;
284 }
285
SetDmemBuffer() const286 MOS_STATUS Vp9PakIntegratePkt::SetDmemBuffer() const
287 {
288 ENCODE_FUNC_CALL();
289
290 auto currentPass = m_pipeline->GetCurrentPass();
291 if (currentPass >= Vp9EncodeBrc::m_brcMaxNumPasses)
292 {
293 return MOS_STATUS_INVALID_PARAMETER;
294 }
295
296 HucPakIntDmem *dmem = (HucPakIntDmem *)m_allocator->LockResourceForWrite(const_cast<MOS_RESOURCE *>(&m_hucPakIntDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]));
297 ENCODE_CHK_NULL_RETURN(dmem);
298 MOS_ZeroMemory(dmem, sizeof(HucPakIntDmem));
299
300 MOS_FillMemory(dmem, m_pakIntDmemOffsetsSize, 0xFF);
301
302 uint16_t numTileColumns = 1;
303 uint16_t numTileRows = 1;
304 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
305 uint32_t numTiles = 1;
306 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileNum, numTiles);
307
308 dmem->totalSizeInCommandBuffer = numTiles * CODECHAL_CACHELINE_SIZE;
309 dmem->offsetInCommandBuffer = 0xFFFF; // Not used for VP9, all bytes in dmem for fields not used are 0xFF
310 dmem->picWidthInPixel = (uint16_t)m_basicFeature->m_frameWidth;
311 dmem->picHeightInPixel = (uint16_t)m_basicFeature->m_frameHeight;
312 dmem->totalNumberOfPaks = (uint16_t)m_pipeline->GetPipeNum();
313 dmem->codec = m_pakIntVp9CodecId;
314 dmem->maxPass = Vp9EncodeBrc::m_brcMaxNumPasses; // Only VDEnc CQP and BRC
315 dmem->currentPass = currentPass + 1;
316
317 uint32_t lastTileIndex = numTiles - 1;
318 EncodeTileData tileData = {};
319 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex);
320
321 dmem->lastTileBSStartInBytes = tileData.tileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
322 dmem->picStateStartInBytes = 0xFFFF;
323
324 if (m_basicFeature->m_enableTileStitchByHW)
325 {
326 dmem->StitchEnable = true;
327 dmem->StitchCommandOffset = 0;
328 dmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
329 }
330
331 Vp9TileStatusInfo vp9TileStatsOffset = {};
332 Vp9TileStatusInfo vp9FrameStatsOffset = {};
333 Vp9TileStatusInfo vp9StatsSize = {};
334 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileStatusInfo, vp9TileStatsOffset, vp9FrameStatsOffset, vp9StatsSize);
335
336 // Offset 0 is for region 1 - output of integrated frame stats from PAK integration kernel
337
338 dmem->tileSizeRecordOffset[0] = vp9FrameStatsOffset.tileSizeRecord;
339 dmem->vdencStatOffset[0] = vp9FrameStatsOffset.vdencStats;
340 dmem->vp9PakStatOffset[0] = vp9FrameStatsOffset.pakStats;
341 dmem->vp9CounterBufferOffset[0] = vp9FrameStatsOffset.counterBuffer;
342
343 uint16_t numTilesPerPipe = (uint16_t)(numTiles / m_pipeline->GetPipeNum());
344
345 // Offset 1 - 4 is for region 0 - Input to PAK integration kernel for all tile statistics per pipe
346 for (auto i = 1; i <= m_pipeline->GetPipeNum(); ++i)
347 {
348 dmem->numTiles[i - 1] = numTilesPerPipe;
349 dmem->tileSizeRecordOffset[i] = vp9TileStatsOffset.tileSizeRecord + ((i - 1) * (dmem->numTiles[i - 1]) * vp9StatsSize.tileSizeRecord);
350 dmem->vdencStatOffset[i] = vp9TileStatsOffset.vdencStats + ((i - 1) * (dmem->numTiles[i - 1]) * vp9StatsSize.vdencStats);
351 dmem->vp9PakStatOffset[i] = vp9TileStatsOffset.pakStats + ((i - 1) * (dmem->numTiles[i - 1]) * vp9StatsSize.pakStats);
352 dmem->vp9CounterBufferOffset[i] = vp9TileStatsOffset.counterBuffer + ((i - 1) * (dmem->numTiles[i - 1]) * vp9StatsSize.counterBuffer);
353 }
354
355 m_allocator->UnLock(const_cast<MOS_RESOURCE *>(&m_hucPakIntDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]));
356
357 return MOS_STATUS_SUCCESS;
358 }
359
ReadHcpStatus(MHW_VDBOX_NODE_IND vdboxIndex,MediaStatusReport * statusReport,MOS_COMMAND_BUFFER & cmdBuffer)360 MOS_STATUS Vp9PakIntegratePkt::ReadHcpStatus(MHW_VDBOX_NODE_IND vdboxIndex, MediaStatusReport *statusReport, MOS_COMMAND_BUFFER &cmdBuffer)
361 {
362 ENCODE_FUNC_CALL();
363
364 ENCODE_CHK_NULL_RETURN(statusReport);
365 ENCODE_CHK_NULL_RETURN(m_hwInterface);
366
367 MOS_RESOURCE *osResource = nullptr;
368 uint32_t offset = 0;
369
370 EncodeStatusReadParams params;
371 MOS_ZeroMemory(¶ms, sizeof(params));
372
373 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamByteCountPerFrame, osResource, offset));
374 params.resBitstreamByteCountPerFrame = osResource;
375 params.bitstreamByteCountPerFrameOffset = offset;
376
377 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamSyntaxElementOnlyBitCount, osResource, offset));
378 params.resBitstreamSyntaxElementOnlyBitCount = osResource;
379 params.bitstreamSyntaxElementOnlyBitCountOffset = offset;
380
381 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportQPStatusCount, osResource, offset));
382 params.resQpStatusCount = osResource;
383 params.qpStatusCountOffset = offset;
384
385 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusMask, osResource, offset));
386 params.resImageStatusMask = osResource;
387 params.imageStatusMaskOffset = offset;
388
389 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusCtrl, osResource, offset));
390 params.resImageStatusCtrl = osResource;
391 params.imageStatusCtrlOffset = offset;
392
393 ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadHcpStatus(vdboxIndex, params, &cmdBuffer));
394
395 auto mmioRegisters = m_hcpInterfaceNew->GetMmioRegisters(vdboxIndex);
396
397 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
398 miStoreRegMemParams = {};
399 miStoreRegMemParams.presStoreBuffer = params.resBitstreamByteCountPerFrame;
400 miStoreRegMemParams.dwOffset = params.bitstreamByteCountPerFrameOffset;
401 miStoreRegMemParams.dwRegister = mmioRegisters->hcpVp9EncBitstreamBytecountFrameRegOffset;
402 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
403
404 ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadImageStatusForHcp(vdboxIndex, params, &cmdBuffer));
405
406 HucBrcBuffers *hucBrcBuffers = nullptr;
407 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeBrc, Vp9FeatureIDs::vp9BrcFeature, GetHucBrcBuffers, hucBrcBuffers);
408 ENCODE_CHK_NULL_RETURN(hucBrcBuffers);
409
410 auto ©MemMemParams = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
411 copyMemMemParams = {};
412 copyMemMemParams.presSrc = params.resBitstreamByteCountPerFrame;
413 copyMemMemParams.dwSrcOffset = params.bitstreamByteCountPerFrameOffset;
414 copyMemMemParams.presDst = &(hucBrcBuffers->resBrcBitstreamSizeBuffer);
415 copyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(EncodeVp9BSBuffer, dwHcpBitstreamByteCountFrame);
416 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(&cmdBuffer));
417
418 return MOS_STATUS_SUCCESS;
419 }
420
EndStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)421 MOS_STATUS Vp9PakIntegratePkt::EndStatusReport(uint32_t srType, MOS_COMMAND_BUFFER *cmdBuffer)
422 {
423 ENCODE_FUNC_CALL();
424 ENCODE_CHK_NULL_RETURN(cmdBuffer);
425
426 auto brcFeature = dynamic_cast<Vp9EncodeBrc *>(m_featureManager->GetFeature(Vp9FeatureIDs::vp9BrcFeature));
427 ENCODE_CHK_NULL_RETURN(brcFeature);
428
429 if (!m_basicFeature->m_scalableMode)
430 {
431 // Single pipe mode can read the info from MMIO register. Otherwise,
432 // we have to use the tile size statistic buffer
433 ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(m_vdboxIndex, m_statusReport, *cmdBuffer));
434 }
435 ENCODE_CHK_STATUS_RETURN(MediaPacket::EndStatusReportNext(srType, cmdBuffer));
436
437 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
438 ENCODE_CHK_NULL_RETURN(perfProfiler);
439 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd(
440 (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer));
441
442 return MOS_STATUS_SUCCESS;
443 }
444
UpdateParameters()445 void Vp9PakIntegratePkt::UpdateParameters()
446 {
447 ENCODE_FUNC_CALL();
448
449 if (!m_pipeline->IsSingleTaskPhaseSupported())
450 {
451 m_osInterface->pfnResetPerfBufferID(m_osInterface);
452 }
453 }
454
SetupTilesStatusData(void * mfxStatus,void * statusReport)455 MOS_STATUS Vp9PakIntegratePkt::SetupTilesStatusData(void *mfxStatus, void *statusReport)
456 {
457 ENCODE_FUNC_CALL();
458
459 ENCODE_CHK_NULL_RETURN(mfxStatus);
460 ENCODE_CHK_NULL_RETURN(statusReport);
461 ENCODE_CHK_NULL_RETURN(m_basicFeature);
462
463 EncodeStatusMfx * encodeStatusMfx = (EncodeStatusMfx *)mfxStatus;
464 EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
465
466 uint32_t statBufIdx = statusReportData->currOriginalPic.FrameIdx;
467 const EncodeReportTileData *tileReportData = nullptr;
468 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetReportTileData, statBufIdx, tileReportData);
469 ENCODE_CHK_NULL_RETURN(tileReportData);
470
471 MOS_RESOURCE *tileSizeStatusBuffer = nullptr;
472 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, tileSizeStatusBuffer);
473 ENCODE_CHK_NULL_RETURN(tileSizeStatusBuffer);
474
475 PakHwTileSizeRecord *tileStatusReport =
476 (PakHwTileSizeRecord *)m_allocator->LockResourceForRead(tileSizeStatusBuffer);
477 ENCODE_CHK_NULL_RETURN(tileStatusReport);
478
479 statusReportData->codecStatus = CODECHAL_STATUS_SUCCESSFUL;
480 statusReportData->panicMode = false;
481 statusReportData->averageQP = 0;
482 statusReportData->qpY = 0;
483 statusReportData->suggestedQPYDelta = 0;
484 statusReportData->numberPasses = 1;
485 statusReportData->bitstreamSize = 0;
486
487 encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0;
488
489 double sumQP = 0.0;
490 uint32_t totalCU = 0;
491 for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; ++i)
492 {
493 if (tileStatusReport[i].Length == 0)
494 {
495 statusReportData->codecStatus = CODECHAL_STATUS_INCOMPLETE;
496 // Clean-up the tile status report buffer
497 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
498 m_allocator->UnLock(tileSizeStatusBuffer);
499 return MOS_STATUS_SUCCESS;
500 }
501 statusReportData->bitstreamSize += tileStatusReport[i].Length;
502 totalCU += (tileReportData[i].tileHeightInMinCbMinus1 + 1) * (tileReportData[i].tileWidthInMinCbMinus1 + 1);
503 sumQP += tileStatusReport[i].Hcp_Qp_Status_Count;
504 }
505
506 if (totalCU != 0)
507 {
508 statusReportData->qpY = statusReportData->averageQP =
509 (uint8_t)((sumQP / (double)totalCU) / 4.0); // Due to TU is 4x4 and there are 4 TUs in one CU
510 }
511 else
512 {
513 // Clean-up the tile status report buffer
514 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
515 m_allocator->UnLock(tileSizeStatusBuffer);
516 return MOS_STATUS_INVALID_PARAMETER;
517 }
518
519 if (m_basicFeature->m_enableTileStitchByHW)
520 {
521 // Clean-up the tile status report buffer
522 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
523 m_allocator->UnLock(tileSizeStatusBuffer);
524 return MOS_STATUS_SUCCESS;
525 }
526
527 uint8_t *bufPtr = (uint8_t *)MOS_AllocAndZeroMemory(statusReportData->bitstreamSize);
528 uint8_t *tempBsBuffer = bufPtr;
529
530 auto tempTerminateFunc = [&]()
531 {
532 MOS_SafeFreeMemory(tempBsBuffer);
533
534 // Clean-up the tile status report buffer
535 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
536 m_allocator->UnLock(tileSizeStatusBuffer);
537 };
538 ENCODE_CHK_NULL_WITH_DESTROY_RETURN_VALUE(tempBsBuffer, tempTerminateFunc);
539
540 PCODEC_REF_LIST currRefList = (PCODEC_REF_LIST)statusReportData->currRefList;
541 ENCODE_CHK_NULL_WITH_DESTROY_RETURN_VALUE(currRefList, tempTerminateFunc);
542 uint8_t *bitstream = (uint8_t *)m_allocator->LockResourceForWrite(&currRefList->resBitstreamBuffer);
543 ENCODE_CHK_NULL_WITH_DESTROY_RETURN_VALUE(bitstream, tempTerminateFunc);
544
545 for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; ++i)
546 {
547 uint32_t offset = tileReportData[i].bitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
548 uint32_t len = tileStatusReport[i].Length;
549 if (offset + len >= m_basicFeature->m_bitstreamSize)
550 {
551 ENCODE_ASSERTMESSAGE("Error: Tile offset and length add up to more than bitstream upper bound");
552 statusReportData->codecStatus = CODECHAL_STATUS_ERROR;
553 statusReportData->bitstreamSize = 0;
554
555 MOS_FreeMemory(tempBsBuffer);
556 m_allocator->UnLock(&currRefList->resBitstreamBuffer);
557
558 // Clean-up the tile status report buffer
559 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
560 m_allocator->UnLock(tileSizeStatusBuffer);
561
562 return MOS_STATUS_INVALID_FILE_SIZE;
563 }
564
565 MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
566 bufPtr += len;
567 }
568
569 MOS_SecureMemcpy(bitstream, statusReportData->bitstreamSize, tempBsBuffer, statusReportData->bitstreamSize);
570 MOS_ZeroMemory(&bitstream[statusReportData->bitstreamSize], m_basicFeature->m_bitstreamSize - statusReportData->bitstreamSize);
571
572 MOS_FreeMemory(tempBsBuffer);
573 m_allocator->UnLock(&currRefList->resBitstreamBuffer);
574
575 // Clean-up the tile status report buffer
576 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
577 m_allocator->UnLock(tileSizeStatusBuffer);
578
579 return MOS_STATUS_SUCCESS;
580 }
581
FreeResources()582 MOS_STATUS Vp9PakIntegratePkt::FreeResources()
583 {
584 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
585
586 ENCODE_FUNC_CALL();
587
588 eStatus = Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
589 ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
590
591 return eStatus;
592 }
593
ConfigStitchDataBuffer() const594 MOS_STATUS Vp9PakIntegratePkt::ConfigStitchDataBuffer() const
595 {
596 ENCODE_FUNC_CALL();
597
598 auto currPass = m_pipeline->GetCurrentPass();
599 HucCommandData *hucStitchDataBuf = (HucCommandData*)m_allocator->LockResourceForWrite(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass]));
600 ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
601
602 MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
603 hucStitchDataBuf->TotalCommands = 1;
604 hucStitchDataBuf->InputCOM[0].SizeOfData = 0xf;
605
606 uint16_t numTileColumns = 1;
607 uint16_t numTileRows = 1;
608 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
609
610 HucInputCmdG12 hucInputCmd;
611 MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG12));
612
613 ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
614 hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
615 hucInputCmd.CmdMode = HUC_CMD_LIST_MODE;
616 hucInputCmd.LengthOfTable = numTileRows * numTileColumns;
617 hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize;
618
619 // Tile record always in m_tileRecordBuffer even in scalable node
620 uint32_t statBufIdx = m_basicFeature->m_currOriginalPic.FrameIdx;
621 MOS_RESOURCE *presSrc = nullptr;
622
623 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, presSrc);
624 ENCODE_CHK_NULL_RETURN(presSrc);
625
626 ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
627 m_osInterface,
628 presSrc,
629 false,
630 false));
631
632 ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
633 m_osInterface,
634 &m_basicFeature->m_resBitstreamBuffer,
635 true,
636 true));
637
638 uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
639 uint64_t destrAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_basicFeature->m_resBitstreamBuffer);
640 hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
641 hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
642 hucInputCmd.DestAddrBottom = (uint32_t)(destrAddr & 0x00000000FFFFFFFF);
643 hucInputCmd.DestAddrTop = (uint32_t)((destrAddr & 0xFFFFFFFF00000000) >> 32);
644
645 MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG12), &hucInputCmd, sizeof(HucInputCmdG12));
646
647 m_allocator->UnLock(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass]));
648
649 return MOS_STATUS_SUCCESS;
650 }
651
652 #if USE_CODECHAL_DEBUG_TOOL
DumpInput()653 MOS_STATUS Vp9PakIntegratePkt::DumpInput()
654 {
655 ENCODE_FUNC_CALL();
656
657 int32_t currentPass = m_pipeline->GetCurrentPass();
658
659 CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
660 ENCODE_CHK_NULL_RETURN(debugInterface);
661
662 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpHucDmem(
663 &m_hucPakIntDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass],
664 m_hucPakIntDmemBufferSize,
665 currentPass,
666 hucRegionDumpPakIntegrate));
667
668 // Region 0 - Tile based input statistics from PAK/ VDEnc
669 ENCODE_CHK_STATUS_RETURN(DumpRegion(0, "_PakIntStitchBuffer", true, hucRegionDumpPakIntegrate, m_hwInterface->m_pakIntTileStatsSize));
670 // Region 15 [In/Out] - Tile Record Buffer
671 ENCODE_CHK_STATUS_RETURN(DumpRegion(15, "_TileRecordBuffer", true, hucRegionDumpPakIntegrate, m_basicFeature->m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterfaceNew->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE)));
672
673 return MOS_STATUS_SUCCESS;
674 }
675 #endif // USE_CODECHAL_DEBUG_TOOL
676
MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE,Vp9PakIntegratePkt)677 MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE, Vp9PakIntegratePkt)
678 {
679 ENCODE_FUNC_CALL();
680
681 params.kernelDescriptor = m_vdboxHucPakIntegrationKernelDescriptor;
682
683 return MOS_STATUS_SUCCESS;
684 }
685
MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE,Vp9PakIntegratePkt)686 MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE, Vp9PakIntegratePkt)
687 {
688 ENCODE_FUNC_CALL();
689
690 ENCODE_CHK_STATUS_RETURN(SetDmemBuffer());
691
692 params.function = PAK_INTEGRATE;
693 uint32_t currentPass = m_pipeline->GetCurrentPass();
694 params.hucDataSource = const_cast<PMOS_RESOURCE>(&m_hucPakIntDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]);
695 params.dataLength = MOS_ALIGN_CEIL(m_hucPakIntDmemBufferSize, CODECHAL_CACHELINE_SIZE);
696 params.dmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
697
698 return MOS_STATUS_SUCCESS;
699 }
700
MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE,Vp9PakIntegratePkt)701 MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE, Vp9PakIntegratePkt)
702 {
703 ENCODE_FUNC_CALL();
704
705 params.function = PAK_INTEGRATE;
706
707 if (m_basicFeature->m_enableTileStitchByHW)
708 {
709 ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
710 }
711
712 uint32_t currentPass = m_pipeline->GetCurrentPass();
713
714 // Region 0, 1, 15
715 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, SetRegionsForPakInt, params);
716
717 params.regionParams[4].presRegion = const_cast<PMOS_RESOURCE>(&m_hucPakIntDummyBuffer); // Region 4 - Not used for VP9
718 params.regionParams[5].presRegion = const_cast<PMOS_RESOURCE>(&m_hucPakIntDummyBuffer); // Region 5 - Not used for VP9
719 params.regionParams[5].isWritable = true;
720 params.regionParams[6].presRegion = const_cast<PMOS_RESOURCE>(&m_hucPakIntDummyBuffer); // Region 6 - Not used for VP9
721 params.regionParams[6].isWritable = true;
722 params.regionParams[7].presRegion = const_cast<PMOS_RESOURCE>(&m_hucPakIntDummyBuffer); // Region 7 - Not used for VP9
723
724 if (m_basicFeature->m_enableTileStitchByHW)
725 {
726 params.regionParams[8].presRegion = const_cast<PMOS_RESOURCE>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]); // Region 8 - data buffer read by HUC for stitching cmd generation
727 params.regionParams[8].isWritable = true;
728 params.regionParams[10].presRegion = const_cast<PMOS_RESOURCE>(&m_HucStitchCmdBatchBuffer.OsResource); // Region 10 - SLB for stitching cmd output from Huc
729 params.regionParams[10].isWritable = true;
730 }
731
732 params.regionParams[9].presRegion = m_basicFeature->m_hucPakIntBrcDataBuffer; // Region 9 - HuC outputs BRC data
733 params.regionParams[9].isWritable = true;
734
735 return MOS_STATUS_SUCCESS;
736 }
737
738 } // namespace encode
739