1 /*
2 * Copyright (c) 2019-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file encode_vp9_tile.cpp
24 //! \brief Defines the common interface for vp9 tile
25 //!
26
27 #include "encode_vp9_tile.h"
28 #include "encode_vp9_basic_feature.h"
29 #include "codec_def_common.h"
30 #include "encode_vp9_vdenc_feature_manager.h"
31 #include "encode_vp9_brc.h"
32
33 namespace encode
34 {
Vp9EncodeTile(MediaFeatureManager * featureManager,EncodeAllocator * allocator,CodechalHwInterfaceNext * hwInterface,void * constSettings)35 Vp9EncodeTile::Vp9EncodeTile(
36 MediaFeatureManager *featureManager,
37 EncodeAllocator * allocator,
38 CodechalHwInterfaceNext *hwInterface,
39 void * constSettings) : EncodeTile(featureManager, allocator, hwInterface, constSettings)
40 {
41 auto encFeatureManager = dynamic_cast<EncodeVp9VdencFeatureManager *>(featureManager);
42 ENCODE_CHK_NULL_NO_STATUS_RETURN(encFeatureManager);
43
44 m_basicFeature = dynamic_cast<EncodeBasicFeature *>(encFeatureManager->GetFeature(FeatureIDs::basicFeature));
45 ENCODE_CHK_NULL_NO_STATUS_RETURN(m_basicFeature);
46
47 ENCODE_CHK_NULL_NO_STATUS_RETURN(hwInterface);
48
49 m_hcpInterfaceNew = std::static_pointer_cast<mhw::vdbox::hcp::Itf>(hwInterface->GetHcpInterfaceNext());
50 ENCODE_CHK_NULL_NO_STATUS_RETURN(m_hcpInterfaceNew);
51 }
52
Init(void * settings)53 MOS_STATUS Vp9EncodeTile::Init(void *settings)
54 {
55 ENCODE_FUNC_CALL();
56 ENCODE_CHK_NULL_RETURN(settings);
57
58 ENCODE_CHK_STATUS_RETURN(EncodeTile::Init(settings));
59
60 m_maxTileNumber = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameWidth, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH) *
61 CODECHAL_GET_HEIGHT_IN_BLOCKS(m_basicFeature->m_frameHeight, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT);
62
63 return MOS_STATUS_SUCCESS;
64 }
65
Update(void * params)66 MOS_STATUS Vp9EncodeTile::Update(void *params)
67 {
68 ENCODE_FUNC_CALL();
69 ENCODE_CHK_NULL_RETURN(params);
70
71 EncoderParams *encodeParams = (EncoderParams *)params;
72 ENCODE_CHK_NULL_RETURN(encodeParams);
73
74 PCODEC_VP9_ENCODE_PIC_PARAMS vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(encodeParams->pPicParams);
75 ENCODE_CHK_NULL_RETURN(vp9PicParams);
76
77 m_enabled = true;
78
79 m_maxTileNumber = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameWidth, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH) *
80 CODECHAL_GET_HEIGHT_IN_BLOCKS(m_basicFeature->m_frameHeight, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT);
81
82 ENCODE_CHK_STATUS_RETURN(EncodeTile::Update(params));
83
84 return MOS_STATUS_SUCCESS;
85 }
86
SetCurrentTile(uint32_t tileRow,uint32_t tileCol,EncodePipeline * pipeline)87 MOS_STATUS Vp9EncodeTile::SetCurrentTile(uint32_t tileRow, uint32_t tileCol, EncodePipeline *pipeline)
88 {
89 ENCODE_FUNC_CALL();
90 ENCODE_CHK_STATUS_RETURN(EncodeTile::SetCurrentTile(tileRow, tileCol, pipeline));
91
92 auto basicFeature = static_cast<Vp9BasicFeature *>(m_basicFeature);
93 ENCODE_CHK_NULL_RETURN(basicFeature);
94
95 // Update any fields here as need
96 if (basicFeature->m_scalableMode)
97 {
98 m_curTileCodingParams.Vp9ProbabilityCounterStreamoutOffset = m_tileData[m_tileIdx].vp9ProbabilityCounterStreamoutOffset;
99 }
100 else
101 {
102 m_curTileCodingParams.CuRecordOffset = 0;
103 m_curTileCodingParams.SliceSizeStreamoutOffset = 0;
104 m_curTileCodingParams.SseRowstoreOffset = 0;
105 m_curTileCodingParams.SaoRowstoreOffset = 0;
106 m_curTileCodingParams.BitstreamByteOffset = 0;
107 m_curTileCodingParams.CuLevelStreamoutOffset = 0;
108 m_curTileCodingParams.TileSizeStreamoutOffset = 0;
109 // DW5
110 m_curTileCodingParams.PakTileStatisticsOffset = 0;
111 // DW12
112 m_curTileCodingParams.Vp9ProbabilityCounterStreamoutOffset = 0;
113 }
114
115 return MOS_STATUS_SUCCESS;
116 }
117
SetRegionsForBrcUpdate(mhw::vdbox::huc::HUC_VIRTUAL_ADDR_STATE_PAR & params) const118 MOS_STATUS Vp9EncodeTile::SetRegionsForBrcUpdate(mhw::vdbox::huc::HUC_VIRTUAL_ADDR_STATE_PAR ¶ms) const
119 {
120 ENCODE_FUNC_CALL();
121
122 auto basicFeature = dynamic_cast<Vp9BasicFeature *>(m_basicFeature);
123 ENCODE_CHK_NULL_RETURN(basicFeature);
124
125 // VDEnc Stats Buffer - IN
126 params.regionParams[1].presRegion = const_cast<PMOS_RESOURCE>(&m_resHuCPakAggregatedFrameStatsBuffer);
127 params.regionParams[1].dwOffset = m_tileStatsOffset.vdencStats;
128 // Frame (not PAK) Stats Buffer - IN
129 params.regionParams[2].presRegion = const_cast<PMOS_RESOURCE>(&m_resHuCPakAggregatedFrameStatsBuffer);
130 params.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
131 // PAK MMIO - IN
132 params.regionParams[7].presRegion = basicFeature->m_hucPakIntBrcDataBuffer;
133
134 return MOS_STATUS_SUCCESS;
135 }
136
SetRegionsForPakInt(mhw::vdbox::huc::HUC_VIRTUAL_ADDR_STATE_PAR & params) const137 MOS_STATUS Vp9EncodeTile::SetRegionsForPakInt(mhw::vdbox::huc::HUC_VIRTUAL_ADDR_STATE_PAR ¶ms) const
138 {
139 ENCODE_FUNC_CALL();
140 // Region 0 - Tile based input statistics from PAK/ VDEnc
141 params.regionParams[0].presRegion = const_cast<PMOS_RESOURCE>(&m_resTileBasedStatisticsBuffer[m_statisticsBufIndex]);
142 params.regionParams[0].dwOffset = 0;
143 // Region 1 - HuC Frame statistics output
144 params.regionParams[1].presRegion = const_cast<PMOS_RESOURCE>(&m_resHuCPakAggregatedFrameStatsBuffer);
145 params.regionParams[1].isWritable = true;
146 // Region 15 [In/Out] - Tile Record Buffer
147 params.regionParams[15].presRegion = const_cast<PMOS_RESOURCE>(&m_tileRecordBuffer[m_statisticsBufIndex]);
148 params.regionParams[15].dwOffset = 0;
149 params.regionParams[15].isWritable = true;
150
151 return MOS_STATUS_SUCCESS;
152 }
153
SetHcpTileCodingParams(uint32_t activePipes)154 MOS_STATUS Vp9EncodeTile::SetHcpTileCodingParams(uint32_t activePipes)
155 {
156 ENCODE_FUNC_CALL();
157 if (!m_enabled)
158 {
159 return MOS_STATUS_SUCCESS;
160 }
161
162 auto basicFeature = dynamic_cast<Vp9BasicFeature *>(m_basicFeature);
163 ENCODE_CHK_NULL_RETURN(basicFeature);
164
165 m_curTileCodingParams.NumberOfActiveBePipes = activePipes;
166
167 if (basicFeature->m_scalableMode)
168 {
169 m_curTileCodingParams.Vp9ProbabilityCounterStreamoutOffset = m_tileData[m_tileIdx].vp9ProbabilityCounterStreamoutOffset;
170 }
171 else
172 {
173 m_curTileCodingParams.Vp9ProbabilityCounterStreamoutOffset = 0;
174 }
175
176 return MOS_STATUS_SUCCESS;
177 }
178
SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)179 MOS_STATUS Vp9EncodeTile::SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS &pipeBufAddrParams)
180 {
181 ENCODE_FUNC_CALL();
182 if (!m_enabled)
183 {
184 return MOS_STATUS_SUCCESS;
185 }
186
187 MOS_RESOURCE *tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_statisticsBufIndex];
188 if (!Mos_ResourceIsNull(tileStatisticsBuffer))
189 {
190 pipeBufAddrParams.presVdencStreamOutBuffer = tileStatisticsBuffer;
191 pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_tileStatsOffset.vdencStats;
192 }
193
194 return MOS_STATUS_SUCCESS;
195 }
196
GetTileStatusInfo(Vp9TileStatusInfo & vp9TileStatsOffset,Vp9TileStatusInfo & vp9FrameStatsOffset,Vp9TileStatusInfo & vp9StatsSize)197 MOS_STATUS Vp9EncodeTile::GetTileStatusInfo(Vp9TileStatusInfo &vp9TileStatsOffset, Vp9TileStatusInfo &vp9FrameStatsOffset, Vp9TileStatusInfo &vp9StatsSize)
198 {
199 ENCODE_FUNC_CALL();
200 if (!m_enabled)
201 {
202 return MOS_STATUS_SUCCESS;
203 }
204
205 vp9TileStatsOffset = m_tileStatsOffset;
206 vp9FrameStatsOffset = m_frameStatsOffset;
207 vp9StatsSize = m_statsSize;
208
209 return MOS_STATUS_SUCCESS;
210 }
211
AllocateResources()212 MOS_STATUS Vp9EncodeTile::AllocateResources()
213 {
214 ENCODE_FUNC_CALL();
215 ENCODE_CHK_STATUS_RETURN(EncodeTile::AllocateResources());
216
217 auto basicFeature = dynamic_cast<Vp9BasicFeature *>(m_basicFeature);
218 ENCODE_CHK_NULL_RETURN(basicFeature);
219
220 MOS_RESOURCE *allocatedBuffer = nullptr;
221
222 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
223 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
224 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
225 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
226 allocParamsForBufferLinear.Format = Format_Buffer;
227
228 // Tile record stream out buffer
229 uint32_t size = basicFeature->m_maxPicSizeInSb * CODECHAL_CACHELINE_SIZE; // worst case: each SB is tile
230 allocParamsForBufferLinear.dwBytes = size;
231 allocParamsForBufferLinear.pBufName = "TileRecordStreamOutBuffer";
232 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
233 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
234 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
235 m_resTileRecordStrmOutBuffer = *allocatedBuffer;
236
237 // CU statistics stream out buffer
238 size = MOS_ALIGN_CEIL(basicFeature->m_maxPicSizeInSb * 64 * 8, CODECHAL_CACHELINE_SIZE);
239 allocParamsForBufferLinear.dwBytes = size;
240 allocParamsForBufferLinear.pBufName = "CuStatsStrmOutBuffer";
241 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
242 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
243 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
244 m_resCuStatsStrmOutBuffer = *allocatedBuffer;
245
246 return MOS_STATUS_SUCCESS;
247 }
248
SetTileData(void * params)249 MOS_STATUS Vp9EncodeTile::SetTileData(void *params)
250 {
251 ENCODE_FUNC_CALL();
252
253 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
254
255 if (!m_enabled)
256 {
257 return eStatus;
258 }
259
260 ENCODE_CHK_NULL_RETURN(params);
261
262 EncoderParams *encodeParams = (EncoderParams *)params;
263 ENCODE_CHK_NULL_RETURN(encodeParams);
264
265 PCODEC_VP9_ENCODE_PIC_PARAMS vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(encodeParams->pPicParams);
266 ENCODE_CHK_NULL_RETURN(vp9PicParams);
267
268 auto basicFeature = dynamic_cast<Vp9BasicFeature *>(m_basicFeature);
269 ENCODE_CHK_NULL_RETURN(basicFeature);
270
271 m_numTileRows = (1 << vp9PicParams->log2_tile_rows);
272 m_numTileColumns = (1 << vp9PicParams->log2_tile_columns);
273
274 // Tile width needs to be minimum size 256, error out if less
275 if ((m_numTileColumns != 1) && ((vp9PicParams->SrcFrameWidthMinus1 + 1) < m_numTileColumns * CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH))
276 {
277 ENCODE_ASSERTMESSAGE("Incorrect number of columns input parameter, Tile width is < 256");
278 return MOS_STATUS_INVALID_PARAMETER;
279 }
280
281 if (m_numTileRows > 4)
282 {
283 ENCODE_ASSERTMESSAGE("Max number of rows cannot exceeds 4 by VP9 Spec.");
284 return MOS_STATUS_INVALID_PARAMETER;
285 }
286
287 m_numTiles = m_numTileRows * m_numTileColumns;
288 if (m_numTiles > CODECHAL_GET_WIDTH_IN_BLOCKS(basicFeature->m_frameWidth, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH) *
289 CODECHAL_GET_HEIGHT_IN_BLOCKS(basicFeature->m_frameHeight, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT))
290 {
291 return MOS_STATUS_INVALID_PARAMETER;
292 }
293
294 //max LCU size is 64, min Cu size is 8
295 const uint32_t maxNumOfCUInSB = (CODEC_VP9_SUPER_BLOCK_HEIGHT / CODEC_VP9_MIN_BLOCK_HEIGHT) * (CODEC_VP9_SUPER_BLOCK_WIDTH / CODEC_VP9_MIN_BLOCK_WIDTH);
296 const uint32_t numCachelinesPerSB = MOS_ROUNDUP_DIVIDE((2 * BYTES_PER_DWORD * (NUM_PAK_DWS_PER_LCU + maxNumOfCUInSB * NUM_DWS_PER_CU)), MHW_CACHELINE_SIZE);
297
298 uint32_t bitstreamSizePerTile = basicFeature->m_bitstreamUpperBound / (m_numTiles * CODECHAL_CACHELINE_SIZE);
299 uint32_t numCuRecord = 64;
300 uint32_t cuLevelStreamoutOffset = 0;
301 uint32_t sliceSizeStreamoutOffset = 0;
302 uint32_t bitstreamByteOffset = 0;
303 uint32_t saoRowstoreOffset = 0;
304 uint32_t sseRowstoreOffset = 0;
305 uint32_t tileStartSbAddr = 0;
306 uint32_t cumulativeCUTileOffsetInBytes = 0;
307
308 uint32_t picWidthInSb = basicFeature->m_picWidthInSb;
309 uint32_t picHeightInSb = basicFeature->m_picHeightInSb;
310 bool scalableMode = basicFeature->m_scalableMode;
311
312 for (uint32_t numLcusInTiles = 0, tileY = 0; tileY < m_numTileRows; ++tileY)
313 {
314 bool isLastTileRow = ((m_numTileRows - 1) == tileY);
315 uint32_t tileStartSbY = (tileY * picHeightInSb) >> vp9PicParams->log2_tile_rows;
316 uint32_t tileHeightInSb = (isLastTileRow ? picHeightInSb : (((tileY + 1) * picHeightInSb) >> vp9PicParams->log2_tile_rows)) - tileStartSbY;
317 uint32_t lastTileRowHeight = (MOS_ALIGN_CEIL((vp9PicParams->SrcFrameHeightMinus1 + 1 - tileStartSbY * CODEC_VP9_SUPER_BLOCK_HEIGHT), CODEC_VP9_MIN_BLOCK_HEIGHT) / CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
318
319 for (uint32_t tileX = 0; tileX < m_numTileColumns; ++tileX)
320 {
321 uint32_t idx = tileY * m_numTileColumns + tileX;
322
323 bool isLastTileCol = ((m_numTileColumns - 1) == tileX);
324 uint32_t tileStartSbX = (tileX * picWidthInSb) >> vp9PicParams->log2_tile_columns;
325 uint32_t tileWidthInSb = (isLastTileCol ? picWidthInSb : (((tileX + 1) * picWidthInSb) >> vp9PicParams->log2_tile_columns)) - tileStartSbX;
326 uint32_t lastTileColWidth = (MOS_ALIGN_CEIL((vp9PicParams->SrcFrameWidthMinus1 + 1 - tileStartSbX * CODEC_VP9_SUPER_BLOCK_WIDTH), CODEC_VP9_MIN_BLOCK_WIDTH) / CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
327 uint32_t numLcuInTile = tileWidthInSb * tileHeightInSb;
328
329 m_tileData[idx].mode = CODECHAL_ENCODE_MODE_VP9;
330 m_tileData[idx].tileStartXInSb = tileStartSbX;
331 m_tileData[idx].tileStartYInSb = tileStartSbY;
332 m_tileData[idx].tileEndXInSb = m_tileData[idx].tileStartXInSb + tileWidthInSb;
333 m_tileData[idx].tileEndYInSb = m_tileData[idx].tileStartYInSb + tileHeightInSb;
334
335 //m_tileData[idx].tileColumnStoreSelect = tileX % 2;
336 //m_tileData[idx].tileRowStoreSelect = tileY % 2;
337
338 m_tileData[idx].numOfTilesInFrame = m_numTiles;
339 m_tileData[idx].numOfTileColumnsInFrame = m_numTileColumns;
340
341 m_tileData[idx].tileStartXInLCU = tileStartSbX;
342 m_tileData[idx].tileStartYInLCU = tileStartSbY;
343 m_tileData[idx].tileEndXInLCU = m_tileData[idx].tileStartXInLCU + tileWidthInSb;
344 m_tileData[idx].tileEndYInLCU = m_tileData[idx].tileStartYInLCU + tileHeightInSb;
345
346 m_tileData[idx].isLastTileofColumn = isLastTileRow;
347 m_tileData[idx].isLastTileofRow = isLastTileCol;
348
349 m_tileData[idx].tileWidthInMinCbMinus1 = isLastTileCol ? lastTileColWidth : (tileWidthInSb * CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
350 m_tileData[idx].tileHeightInMinCbMinus1 = isLastTileRow ? lastTileRowHeight : (tileHeightInSb * CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
351
352 // Reset the following fields for non-scalable mode in SetCurrentTile()
353 sseRowstoreOffset = (m_tileData[idx].tileStartXInSb + (3 * tileX)) << 5;
354
355 m_tileData[idx].cuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * 64), CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
356 m_tileData[idx].sliceSizeStreamoutOffset = sliceSizeStreamoutOffset;
357 m_tileData[idx].sseRowstoreOffset = sseRowstoreOffset;
358 m_tileData[idx].saoRowstoreOffset = saoRowstoreOffset;
359 m_tileData[idx].bitstreamByteOffset = bitstreamByteOffset;
360 m_tileData[idx].cuLevelStreamoutOffset = cuLevelStreamoutOffset;
361
362 m_tileData[idx].cumulativeCUTileOffset = cumulativeCUTileOffsetInBytes / CODECHAL_CACHELINE_SIZE;
363 m_tileData[idx].bitstreamByteOffset = bitstreamByteOffset;
364 m_tileData[idx].tileStreaminOffset = 4 * (m_tileData[idx].tileStartYInSb * picWidthInSb + m_tileData[idx].tileStartXInSb * tileHeightInSb);
365
366 uint32_t numOfSbsInTile = tileWidthInSb * tileHeightInSb;
367 tileStartSbAddr += numOfSbsInTile;
368 // For Cumulative CU Count : 2 bytes per LCU
369 cumulativeCUTileOffsetInBytes += numOfSbsInTile * 2;
370 cumulativeCUTileOffsetInBytes = MOS_ALIGN_CEIL(cumulativeCUTileOffsetInBytes, CODECHAL_CACHELINE_SIZE);
371
372 if (m_tileData[idx].tileStartXInSb != 0 || m_tileData[idx].tileStartYInSb != 0)
373 {
374 uint32_t numOfSBs = m_tileData[idx].tileStartYInLCU * picWidthInSb + m_tileData[idx].tileStartXInLCU * tileHeightInSb;
375 m_tileData[idx].tileLCUStreamOutOffset = numOfSBs * numCachelinesPerSB;
376 }
377
378 cuLevelStreamoutOffset += (m_tileData[idx].tileWidthInMinCbMinus1 + 1) * (m_tileData[idx].tileHeightInMinCbMinus1 + 1);
379 sliceSizeStreamoutOffset += (m_tileData[idx].tileWidthInMinCbMinus1 + 1) * (m_tileData[idx].tileHeightInMinCbMinus1 + 1);
380 bitstreamByteOffset += bitstreamSizePerTile;
381 numLcusInTiles += numLcuInTile;
382
383 m_tileData[idx].tileSizeStreamoutOffset = (idx * m_hcpInterfaceNew->GetPakHWTileSizeRecordSize() + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
384
385 // DW5
386 const uint32_t frameStatsStreamoutSize = Vp9EncodeBrc::m_brcPakStatsBufSize;
387 m_tileData[idx].pakTileStatisticsOffset = (idx * frameStatsStreamoutSize + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
388
389 // DW12
390 m_tileData[idx].vp9ProbabilityCounterStreamoutOffset = ((idx * m_probabilityCounterBufferSize) + (CODECHAL_CACHELINE_SIZE - 1)) / CODECHAL_CACHELINE_SIZE;
391 }
392 }
393
394 return eStatus;
395 }
396
AllocateTileStatistics(void * params)397 MOS_STATUS Vp9EncodeTile::AllocateTileStatistics(void *params)
398 {
399 ENCODE_FUNC_CALL();
400
401 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
402
403 ENCODE_CHK_NULL_RETURN(params);
404
405 if (!m_enabled)
406 {
407 return eStatus;
408 }
409
410 EncoderParams * encodeParams = (EncoderParams *)params;
411 PCODEC_VP9_ENCODE_PIC_PARAMS vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(encodeParams->pPicParams);
412
413 MOS_ZeroMemory(&m_statsSize, sizeof(Vp9TileStatusInfo));
414 MOS_ZeroMemory(&m_frameStatsOffset, sizeof(Vp9TileStatusInfo));
415 MOS_ZeroMemory(&m_tileStatsOffset, sizeof(Vp9TileStatusInfo));
416
417 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
418 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
419 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
420 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
421 allocParamsForBufferLinear.Format = Format_Buffer;
422
423 // Sizes of each buffer to be loaded into the region 0 as input and 1 loaded out as output
424 m_statsSize.tileSizeRecord = m_hcpInterfaceNew->GetPakHWTileSizeRecordSize();
425 m_statsSize.vdencStats = Vp9EncodeBrc::m_brcStatsBufSize; // VDEnc stats size
426 m_statsSize.pakStats = Vp9EncodeBrc::m_brcPakStatsBufSize; // Frame stats size
427 m_statsSize.counterBuffer = m_probabilityCounterBufferSize;
428
429 // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer
430 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
431 m_frameStatsOffset.tileSizeRecord = 0;
432 m_frameStatsOffset.vdencStats = MOS_ALIGN_CEIL((m_frameStatsOffset.tileSizeRecord + (m_maxTileNumber * m_statsSize.tileSizeRecord)), CODECHAL_PAGE_SIZE);
433 m_frameStatsOffset.pakStats = MOS_ALIGN_CEIL((m_frameStatsOffset.vdencStats + m_statsSize.vdencStats), CODECHAL_PAGE_SIZE);
434 m_frameStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_frameStatsOffset.pakStats + m_statsSize.pakStats), CODECHAL_PAGE_SIZE);
435
436 // Frame level statistics
437 m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL((m_frameStatsOffset.counterBuffer + m_statsSize.counterBuffer), CODECHAL_PAGE_SIZE);
438
439 // VP9 Frame Statistics Buffer - Output from HuC PAK Integration kernel
440 // Ref. CodechalVdencVp9StateG12::m_frameStatsPakIntegrationBuffer
441 if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer))
442 {
443 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
444 allocParamsForBufferLinear.pBufName = "PAK HUC Integrated Aggregated Frame Statistics Streamout Buffer";
445 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE;
446
447 auto allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
448 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
449 m_resHuCPakAggregatedFrameStatsBuffer = *allocatedBuffer;
450 }
451 // Max row is 4 by VP9 Spec
452 uint32_t maxScalableModeTiles = m_maxTileNumber;
453
454 // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer
455 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
456 // Fill Pak integration kernel input tile stats structure
457 // TileSizeRecord has to be 4K aligned
458 m_tileStatsOffset.tileSizeRecord = 0;
459 // VdencStats has to be 4k aligned
460 m_tileStatsOffset.vdencStats = MOS_ALIGN_CEIL((m_tileStatsOffset.tileSizeRecord + (maxScalableModeTiles * m_statsSize.tileSizeRecord)), CODECHAL_PAGE_SIZE);
461 // VP9PAKStats has to be 64 byte aligned
462 m_tileStatsOffset.pakStats = MOS_ALIGN_CEIL((m_tileStatsOffset.vdencStats + (maxScalableModeTiles * m_statsSize.vdencStats)), CODECHAL_PAGE_SIZE);
463 // VP9CounterBuffer has to be 4k aligned
464 m_tileStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_tileStatsOffset.pakStats + (maxScalableModeTiles * m_statsSize.pakStats)), CODECHAL_PAGE_SIZE);
465 // Combined statistics size for all tiles
466 m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL((m_tileStatsOffset.counterBuffer + (maxScalableModeTiles * m_statsSize.counterBuffer)), CODECHAL_PAGE_SIZE);
467
468 // Tile size record size for all tiles
469 m_hwInterface->m_tileRecordSize = m_statsSize.tileSizeRecord * maxScalableModeTiles;
470
471 uint32_t curPakIntTileStatsSize = 0;
472 MOS_SURFACE surface;
473 MOS_ZeroMemory(&surface, sizeof(surface));
474 // Ref. CodechalVdencVp9StateG12::m_tileStatsPakIntegrationBuffer[]
475 surface.OsResource = m_resTileBasedStatisticsBuffer[m_statisticsBufIndex];
476 if (!Mos_ResourceIsNull(&surface.OsResource))
477 {
478 m_allocator->GetSurfaceInfo(&surface);
479 curPakIntTileStatsSize = surface.dwHeight * surface.dwWidth;
480 }
481 if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_statisticsBufIndex]) ||
482 curPakIntTileStatsSize < m_hwInterface->m_pakIntTileStatsSize)
483 {
484 if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_statisticsBufIndex]))
485 {
486 m_allocator->DestroyResource(&m_resTileBasedStatisticsBuffer[m_statisticsBufIndex]);
487 }
488 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize;
489 allocParamsForBufferLinear.pBufName = "Tile Level Statistics Streamout Buffer";
490 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE;
491
492 auto allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
493 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
494 m_resTileBasedStatisticsBuffer[m_statisticsBufIndex] = *allocatedBuffer;
495 }
496
497 // Allocate the updated tile size buffer for PAK integration kernel
498 // Ref. CodechalVdencVp9StateG12::m_tileRecordBuffer[]
499 if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_statisticsBufIndex]))
500 {
501 auto size = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterfaceNew->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
502 allocParamsForBufferLinear.dwBytes = size;
503 allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
504 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE;
505
506 auto allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
507 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
508 m_tileRecordBuffer[m_statisticsBufIndex] = *allocatedBuffer;
509 }
510
511 return MOS_STATUS_SUCCESS;
512 }
513
MHW_SETPAR_DECL_SRC(HCP_TILE_CODING,Vp9EncodeTile)514 MHW_SETPAR_DECL_SRC(HCP_TILE_CODING, Vp9EncodeTile)
515 {
516 ENCODE_FUNC_CALL();
517
518 auto tileCodingParams = m_curTileCodingParams;
519
520 params.numberOfActiveBePipes = tileCodingParams.NumberOfActiveBePipes;
521 params.numOfTileColumnsInFrame = tileCodingParams.NumOfTileColumnsInFrame;
522 params.tileRowStoreSelect = tileCodingParams.TileRowStoreSelect;
523 params.tileColumnStoreSelect = tileCodingParams.TileColumnStoreSelect;
524
525 params.tileStartLCUX = tileCodingParams.TileStartLCUX;
526 params.tileStartLCUY = tileCodingParams.TileStartLCUY;
527 params.nonFirstPassTile = tileCodingParams.bTileReplayEnable && (!tileCodingParams.IsFirstPass);
528 params.isLastTileofColumn = tileCodingParams.IsLastTileofColumn;
529 params.isLastTileofRow = tileCodingParams.IsLastTileofRow;
530 params.tileHeightInMinCbMinus1 = tileCodingParams.TileHeightInMinCbMinus1;
531 params.tileWidthInMinCbMinus1 = tileCodingParams.TileWidthInMinCbMinus1;
532
533 params.bitstreamByteOffsetEnable = tileCodingParams.bTileReplayEnable;
534
535 params.cuRecordOffset = tileCodingParams.CuRecordOffset;
536 params.bitstreamByteOffset = tileCodingParams.BitstreamByteOffset;
537 params.pakTileStatisticsOffset = tileCodingParams.PakTileStatisticsOffset;
538 params.cuLevelStreamoutOffset = tileCodingParams.CuLevelStreamoutOffset;
539 params.sliceSizeStreamoutOffset = tileCodingParams.SliceSizeStreamoutOffset;
540 params.tileSizeStreamoutOffset = tileCodingParams.TileSizeStreamoutOffset;
541 params.sseRowstoreOffset = tileCodingParams.SseRowstoreOffset;
542 params.saoRowstoreOffset = tileCodingParams.SaoRowstoreOffset;
543 params.tileSizeStreamoutOffset = tileCodingParams.TileSizeStreamoutOffset;
544
545 params.vp9ProbabilityCounterStreamoutOffset = tileCodingParams.Vp9ProbabilityCounterStreamoutOffset;
546
547 return MOS_STATUS_SUCCESS;
548 }
549
MHW_SETPAR_DECL_SRC(HCP_IND_OBJ_BASE_ADDR_STATE,Vp9EncodeTile)550 MHW_SETPAR_DECL_SRC(HCP_IND_OBJ_BASE_ADDR_STATE, Vp9EncodeTile)
551 {
552 ENCODE_FUNC_CALL();
553
554 if (!m_enabled)
555 {
556 return MOS_STATUS_SUCCESS;
557 }
558
559 auto basicFeature = dynamic_cast<Vp9BasicFeature *>(m_basicFeature);
560 ENCODE_CHK_NULL_RETURN(basicFeature);
561
562 uint32_t picSizeInSb = basicFeature->m_picSizeInSb;
563
564 if (basicFeature->m_ref.DysRefFrameFlags() != DYS_REF_NONE)
565 {
566 params.presTileRecordBuffer = const_cast<PMOS_RESOURCE>(&m_resTileRecordStrmOutBuffer);
567 params.dwTileRecordSize = picSizeInSb * CODECHAL_CACHELINE_SIZE;
568 params.presCuStatsBuffer = const_cast<PMOS_RESOURCE>(&m_resCuStatsStrmOutBuffer);
569 params.dwCuStatsSize = MOS_ALIGN_CEIL(picSizeInSb * 64 * 8, CODECHAL_CACHELINE_SIZE);
570 }
571 else
572 {
573 if (basicFeature->m_scalableMode && basicFeature->m_hucEnabled)
574 {
575 // Overwrite presProbabilityCounterBuffer and it's params for scalable mode
576 params.presProbabilityCounterBuffer = const_cast<PMOS_RESOURCE>(&m_resTileBasedStatisticsBuffer[m_statisticsBufIndex]);
577 params.dwProbabilityCounterOffset = m_tileStatsOffset.counterBuffer;
578 params.dwProbabilityCounterSize = m_statsSize.counterBuffer;
579 }
580
581 MOS_RESOURCE *tileRecordBuffer = const_cast<PMOS_RESOURCE>(&m_tileRecordBuffer[m_statisticsBufIndex]);
582 if (!Mos_ResourceIsNull(tileRecordBuffer))
583 {
584 params.presPakTileSizeStasBuffer = tileRecordBuffer;
585 params.dwPakTileSizeStasBufferSize = m_statsSize.tileSizeRecord * m_numTiles;
586 params.dwPakTileSizeRecordOffset = m_tileStatsOffset.tileSizeRecord;
587 }
588 else
589 {
590 params.presPakTileSizeStasBuffer = nullptr;
591 params.dwPakTileSizeStasBufferSize = 0;
592 params.dwPakTileSizeRecordOffset = 0;
593 }
594
595 // Need to use presPakTileSizeStasBuffer instead of presTileRecordBuffer, so setting to null
596 params.presTileRecordBuffer = nullptr;
597 params.dwTileRecordSize = 0;
598 params.presCuStatsBuffer = const_cast<PMOS_RESOURCE>(&m_resCuStatsStrmOutBuffer);
599 params.dwCuStatsSize = MOS_ALIGN_CEIL(picSizeInSb * 64 * 8, CODECHAL_CACHELINE_SIZE);
600 }
601
602 return MOS_STATUS_SUCCESS;
603 }
604
MHW_SETPAR_DECL_SRC(HCP_PIPE_BUF_ADDR_STATE,Vp9EncodeTile)605 MHW_SETPAR_DECL_SRC(HCP_PIPE_BUF_ADDR_STATE, Vp9EncodeTile)
606 {
607 ENCODE_FUNC_CALL();
608
609 if (!m_enabled)
610 {
611 return MOS_STATUS_SUCCESS;
612 }
613
614 auto basicFeature = dynamic_cast<Vp9BasicFeature *>(m_basicFeature);
615 ENCODE_CHK_NULL_RETURN(basicFeature);
616
617 if (basicFeature->m_scalableMode && basicFeature->m_hucEnabled)
618 {
619 MOS_RESOURCE *tileStatisticsBuffer = const_cast<PMOS_RESOURCE>(&m_resTileBasedStatisticsBuffer[m_statisticsBufIndex]);
620 if (!Mos_ResourceIsNull(tileStatisticsBuffer))
621 {
622 params.presVdencStreamOutBuffer = tileStatisticsBuffer;
623 params.dwVdencStatsStreamOutOffset = m_tileStatsOffset.vdencStats;
624
625 // The new framestats streamout will now be the tile level stats buffer because each pak is spewing out tile level stats
626 params.presFrameStatStreamOutBuffer = tileStatisticsBuffer;
627 params.dwFrameStatStreamOutOffset = m_tileStatsOffset.pakStats;
628 // Main Frame Stats are integrated by PAK integration kernel
629 }
630 else
631 {
632 params.presFrameStatStreamOutBuffer = nullptr;
633 params.dwFrameStatStreamOutOffset = 0;
634 }
635 }
636
637 return MOS_STATUS_SUCCESS;
638 }
639
MHW_SETPAR_DECL_SRC(VDENC_WEIGHTSOFFSETS_STATE,Vp9EncodeTile)640 MHW_SETPAR_DECL_SRC(VDENC_WEIGHTSOFFSETS_STATE, Vp9EncodeTile)
641 {
642 ENCODE_FUNC_CALL();
643
644 int8_t size = sizeof(params.weightsLuma) / sizeof(int8_t);
645 memset(params.weightsLuma, 1, size);
646 memset(params.offsetsLuma, 0, size);
647
648 size = sizeof(params.weightsChroma) / sizeof(int8_t);
649 memset(params.weightsChroma, 0, size);
650 memset(params.offsetsChroma, 0, size);
651
652 return MOS_STATUS_SUCCESS;
653 }
654
MHW_SETPAR_DECL_SRC(VDENC_HEVC_VP9_TILE_SLICE_STATE,Vp9EncodeTile)655 MHW_SETPAR_DECL_SRC(VDENC_HEVC_VP9_TILE_SLICE_STATE, Vp9EncodeTile)
656 {
657 ENCODE_FUNC_CALL();
658
659 auto vp9BasicFeature = dynamic_cast<Vp9BasicFeature *>(m_basicFeature);
660 ENCODE_CHK_NULL_RETURN(vp9BasicFeature);
661 auto picParams = vp9BasicFeature->m_vp9PicParams;
662 ENCODE_CHK_NULL_RETURN(picParams);
663 auto tileCodingParams = m_curTileCodingParams;
664 params.ctbSize = CODEC_VP9_SUPER_BLOCK_WIDTH;
665
666 if (!m_enabled)
667 {
668 params.tileWidth = picParams->SrcFrameWidthMinus1 + 1;
669 params.tileHeight = picParams->SrcFrameHeightMinus1 + 1;
670 }
671 else
672 {
673 params.tileWidth = ((tileCodingParams.TileWidthInMinCbMinus1 + 1) * CODEC_VP9_MIN_BLOCK_WIDTH);
674 params.tileHeight = ((tileCodingParams.TileHeightInMinCbMinus1 + 1) * CODEC_VP9_MIN_BLOCK_HEIGHT);
675
676 params.tileStartLCUX = tileCodingParams.TileStartLCUX;
677 params.tileStartLCUY = tileCodingParams.TileStartLCUY;
678
679 params.tileId = m_tileIdx;
680
681 params.tileEnable = 1;
682 params.tileStreamInOffset = tileCodingParams.TileStreaminOffset;
683 params.tileLCUStreamOutOffset = tileCodingParams.TileLCUStreamOutOffset;
684 params.tileRowstoreOffset = (params.tileStartLCUY == 0) ? (params.tileStartLCUX * params.ctbSize) / 32 : 0;
685
686 params.VdencHEVCVP9TileSlicePar18 = 0;
687 params.VdencHEVCVP9TileSlicePar19 = tileCodingParams.CumulativeCUTileOffset;
688 }
689
690 // Default values
691 params.VdencHEVCVP9TileSlicePar12 = 0x3f;
692 params.VdencHEVCVP9TileSlicePar13 = 2;
693
694 params.VdencHEVCVP9TileSlicePar17[0] = 0x3f;
695 params.VdencHEVCVP9TileSlicePar17[1] = 0x3f;
696 params.VdencHEVCVP9TileSlicePar17[2] = 0x3f;
697
698 return MOS_STATUS_SUCCESS;
699 }
700
MHW_SETPAR_DECL_SRC(VDENC_WALKER_STATE,Vp9EncodeTile)701 MHW_SETPAR_DECL_SRC(VDENC_WALKER_STATE, Vp9EncodeTile)
702 {
703 ENCODE_FUNC_CALL();
704
705 auto vp9BasicFeature = dynamic_cast<Vp9BasicFeature *>(m_basicFeature);
706 ENCODE_CHK_NULL_RETURN(vp9BasicFeature);
707 auto picParams = vp9BasicFeature->m_vp9PicParams;
708 ENCODE_CHK_NULL_RETURN(picParams);
709 auto tileCodingParams = m_curTileCodingParams;
710
711 if (!m_enabled)
712 {
713 params.nextTileSliceStartLcuMbX = CODECHAL_GET_WIDTH_IN_BLOCKS(picParams->SrcFrameWidthMinus1, CODEC_VP9_SUPER_BLOCK_WIDTH);
714 params.nextTileSliceStartLcuMbY = CODECHAL_GET_HEIGHT_IN_BLOCKS(picParams->SrcFrameHeightMinus1, CODEC_VP9_SUPER_BLOCK_HEIGHT);
715 params.firstSuperSlice = 1;
716 }
717 else
718 {
719 params.tileSliceStartLcuMbX = tileCodingParams.TileStartLCUX;
720 params.tileSliceStartLcuMbY = tileCodingParams.TileStartLCUY;
721
722 uint16_t tileWidth = ((tileCodingParams.TileWidthInMinCbMinus1 + 1) * CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
723 uint16_t tileHeight = ((tileCodingParams.TileHeightInMinCbMinus1 + 1) * CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
724
725 uint32_t tileStartCtbX = tileCodingParams.TileStartLCUX * CODEC_VP9_SUPER_BLOCK_WIDTH;
726 uint32_t tileStartCtbY = tileCodingParams.TileStartLCUY * CODEC_VP9_SUPER_BLOCK_HEIGHT;
727
728 params.nextTileSliceStartLcuMbX = CODECHAL_GET_WIDTH_IN_BLOCKS((tileStartCtbX + tileWidth + 1), CODEC_VP9_SUPER_BLOCK_WIDTH);
729 params.nextTileSliceStartLcuMbY = CODECHAL_GET_HEIGHT_IN_BLOCKS((tileStartCtbY + tileHeight + 1), CODEC_VP9_SUPER_BLOCK_HEIGHT);
730
731 params.firstSuperSlice = 1;
732 }
733
734 return MOS_STATUS_SUCCESS;
735 }
736
MHW_SETPAR_DECL_SRC(VDENC_PIPE_BUF_ADDR_STATE,Vp9EncodeTile)737 MHW_SETPAR_DECL_SRC(VDENC_PIPE_BUF_ADDR_STATE, Vp9EncodeTile)
738 {
739 ENCODE_FUNC_CALL();
740
741 if (!m_enabled)
742 {
743 return MOS_STATUS_SUCCESS;
744 }
745
746 auto basicFeature = dynamic_cast<Vp9BasicFeature *>(m_basicFeature);
747 ENCODE_CHK_NULL_RETURN(basicFeature);
748
749 if (basicFeature->m_scalableMode && basicFeature->m_hucEnabled)
750 {
751 MOS_RESOURCE *tileStatisticsBuffer = const_cast<PMOS_RESOURCE>(&m_resTileBasedStatisticsBuffer[m_statisticsBufIndex]);
752 if (!Mos_ResourceIsNull(tileStatisticsBuffer))
753 {
754 params.streamOutBuffer = tileStatisticsBuffer;
755 params.streamOutOffset = m_tileStatsOffset.vdencStats;
756 }
757 }
758
759 return MOS_STATUS_SUCCESS;
760 }
761
762 } // namespace encode
763