1 /*
2 * Copyright © 2024 Igalia
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include "genxml/gen_macros.h"
27 #include "genxml/genX_pack.h"
28
29 static int
anv_get_max_vmv_range(StdVideoH264LevelIdc level)30 anv_get_max_vmv_range(StdVideoH264LevelIdc level)
31 {
32 int max_vmv_range;
33
34 switch(level) {
35 case STD_VIDEO_H264_LEVEL_IDC_1_0:
36 max_vmv_range = 256;
37 break;
38 case STD_VIDEO_H264_LEVEL_IDC_1_1:
39 case STD_VIDEO_H264_LEVEL_IDC_1_2:
40 case STD_VIDEO_H264_LEVEL_IDC_1_3:
41 case STD_VIDEO_H264_LEVEL_IDC_2_0:
42 max_vmv_range = 512;
43 break;
44 case STD_VIDEO_H264_LEVEL_IDC_2_1:
45 case STD_VIDEO_H264_LEVEL_IDC_2_2:
46 case STD_VIDEO_H264_LEVEL_IDC_3_0:
47 max_vmv_range = 1024;
48 break;
49
50 case STD_VIDEO_H264_LEVEL_IDC_3_1:
51 case STD_VIDEO_H264_LEVEL_IDC_3_2:
52 case STD_VIDEO_H264_LEVEL_IDC_4_0:
53 case STD_VIDEO_H264_LEVEL_IDC_4_1:
54 case STD_VIDEO_H264_LEVEL_IDC_4_2:
55 case STD_VIDEO_H264_LEVEL_IDC_5_0:
56 case STD_VIDEO_H264_LEVEL_IDC_5_1:
57 case STD_VIDEO_H264_LEVEL_IDC_5_2:
58 case STD_VIDEO_H264_LEVEL_IDC_6_0:
59 case STD_VIDEO_H264_LEVEL_IDC_6_1:
60 case STD_VIDEO_H264_LEVEL_IDC_6_2:
61 default:
62 max_vmv_range = 2048;
63 break;
64 }
65
66 return max_vmv_range;
67 }
68
69 static bool
anv_post_deblock_enable(const StdVideoH264PictureParameterSet * pps,const VkVideoEncodeH264PictureInfoKHR * frame_info)70 anv_post_deblock_enable(const StdVideoH264PictureParameterSet *pps, const VkVideoEncodeH264PictureInfoKHR *frame_info)
71 {
72
73 if (!pps->flags.deblocking_filter_control_present_flag)
74 return true;
75
76 for (uint32_t slice_id = 0; slice_id < frame_info->naluSliceEntryCount; slice_id++) {
77 const VkVideoEncodeH264NaluSliceInfoKHR *nalu = &frame_info->pNaluSliceEntries[slice_id];
78 const StdVideoEncodeH264SliceHeader *slice_header = nalu->pStdSliceHeader;
79
80 if (slice_header->disable_deblocking_filter_idc != 1)
81 return true;
82 }
83
84 return false;
85 }
86
87 static uint8_t
anv_vdenc_h264_picture_type(StdVideoH264PictureType pic_type)88 anv_vdenc_h264_picture_type(StdVideoH264PictureType pic_type)
89 {
90 if (pic_type == STD_VIDEO_H264_PICTURE_TYPE_I || pic_type == STD_VIDEO_H264_PICTURE_TYPE_IDR) {
91 return 0;
92 } else {
93 return 1;
94 }
95 }
96
97 static uint8_t
anv_vdenc_h265_picture_type(StdVideoH265PictureType pic_type)98 anv_vdenc_h265_picture_type(StdVideoH265PictureType pic_type)
99 {
100 if (pic_type == STD_VIDEO_H265_PICTURE_TYPE_I || pic_type == STD_VIDEO_H265_PICTURE_TYPE_IDR) {
101 return 0;
102 } else {
103 return 2;
104 }
105 }
106
107 static const uint8_t vdenc_const_qp_lambda[42] = {
108 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
109 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
110 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
111 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
112 0x4a, 0x53,
113 };
114
115 /* P frame */
116 static const uint8_t vdenc_const_qp_lambda_p[42] = {
117 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
118 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
119 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
120 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
121 0x4a, 0x53,
122 };
123
124 static const uint16_t vdenc_const_skip_threshold_p[27] = {
125 0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
126 0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
127 0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
128 0x0333, 0x03b0, 0x0000,
129 };
130
131 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[27] = {
132 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
133 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
134 0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00,
135 };
136
137 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[27] = {
138 0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
139 0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
140 0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00,
141 };
142
143 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[27] = {
144 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
145 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
146 0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00,
147 };
148
149 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[27] = {
150 0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
151 0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
152 0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00,
153 };
154
155 static const int vdenc_mode_const[2][12][52] = {
156 //INTRASLICE
157 {
158 //LUTMODE_INTRA_NONPRED
159 {
160 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, //QP=[0 ~12]
161 16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14, //QP=[13~25]
162 12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8, //QP=[26~38]
163 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, //QP=[39~51]
164 },
165
166 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
167 {
168 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
170 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
172 },
173
174 //LUTMODE_INTRA_8x8
175 {
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
177 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, //QP=[13~25]
178 1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6, //QP=[26~38]
179 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, //QP=[39~51]
180 },
181
182 //LUTMODE_INTRA_4x4
183 {
184 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, //QP=[0 ~12]
185 64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64, //QP=[13~25]
186 57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50, //QP=[26~38]
187 46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13, //QP=[39~51]
188 },
189
190 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
191 { 0, },
192
193 //LUTMODE_INTER_8X8Q
194 { 0, },
195
196 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
197 { 0, },
198
199 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
200 { 0, },
201
202 //LUTMODE_INTER_16x16, LUTMODE_INTER
203 { 0, },
204
205 //LUTMODE_INTER_BWD
206 { 0, },
207
208 //LUTMODE_REF_ID
209 { 0, },
210
211 //LUTMODE_INTRA_CHROMA
212 { 0, },
213 },
214
215 //PREDSLICE
216 {
217 //LUTMODE_INTRA_NONPRED
218 {
219 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[0 ~12]
220 7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7, //QP=[13~25]
221 6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
222 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
223 },
224
225 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
226 {
227 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
228 24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
229 24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
230 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
231
232 },
233
234 //LUTMODE_INTRA_8x8
235 {
236 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, //QP=[0 ~12]
237 28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28, //QP=[13~25]
238 26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28, //QP=[26~38]
239 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, //QP=[39~51]
240 },
241
242 //LUTMODE_INTRA_4x4
243 {
244 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, //QP=[0 ~12]
245 72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68, //QP=[13~25]
246 64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68, //QP=[26~38]
247 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, //QP=[39~51]
248 },
249
250 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
251 {
252 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
253 8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9, //QP=[13~25]
254 8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8, //QP=[26~38]
255 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, //QP=[39~51]
256 },
257
258 //LUTMODE_INTER_8X8Q
259 {
260 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, //QP=[0 ~12]
261 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3, //QP=[13~25]
262 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[26~38]
263 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[39~51]
264 },
265
266 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
267 {
268 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
269 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13~25]
270 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26~38]
271 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39~51]
272 },
273
274 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
275 {
276 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
277 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[13~25]
278 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
279 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
280 },
281
282 //LUTMODE_INTER_16x16, LUTMODE_INTER
283 {
284 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
285 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[13~25]
286 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[26~38]
287 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[39~51]
288 },
289
290 //LUTMODE_INTER_BWD
291 {
292 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
294 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
296 },
297
298 //LUTMODE_REF_ID
299 {
300 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[0 ~12]
301 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[13~25]
302 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[26~38]
303 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[39~51]
304 },
305
306 //LUTMODE_INTRA_CHROMA
307 {
308 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
309 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
311 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
312 },
313 },
314 };
315
316
317 #define VDENC_LUTMODE_INTRA_NONPRED 0x00
318 #define VDENC_LUTMODE_INTRA 0x01
319 #define VDENC_LUTMODE_INTRA_16x16 0x01
320 #define VDENC_LUTMODE_INTRA_8x8 0x02
321 #define VDENC_LUTMODE_INTRA_4x4 0x03
322 #define VDENC_LUTMODE_INTER_16x8 0x04
323 #define VDENC_LUTMODE_INTER_8x16 0x04
324 #define VDENC_LUTMODE_INTER_8X8Q 0x05
325 #define VDENC_LUTMODE_INTER_8X4Q 0x06
326 #define VDENC_LUTMODE_INTER_4X8Q 0x06
327 #define VDENC_LUTMODE_INTER_16x8_FIELD 0x06
328 #define VDENC_LUTMODE_INTER_4X4Q 0x07
329 #define VDENC_LUTMODE_INTER_8x8_FIELD 0x07
330 #define VDENC_LUTMODE_INTER 0x08
331 #define VDENC_LUTMODE_INTER_16x16 0x08
332 #define VDENC_LUTMODE_INTER_BWD 0x09
333 #define VDENC_LUTMODE_REF_ID 0x0A
334 #define VDENC_LUTMODE_INTRA_CHROMA 0x0B
335
336 static unsigned char
map_44_lut_value(unsigned int v,unsigned char max)337 map_44_lut_value(unsigned int v, unsigned char max)
338 {
339 unsigned int maxcost;
340 int d;
341 unsigned char ret;
342
343 if (v == 0) {
344 return 0;
345 }
346
347 maxcost = ((max & 15) << (max >> 4));
348
349 if (v >= maxcost) {
350 return max;
351 }
352
353 d = (int)(log((double)v) / log(2.0)) - 3;
354
355 if (d < 0) {
356 d = 0;
357 }
358
359 ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
360 ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
361
362 return ret;
363 }
364
update_costs(uint8_t * mode_cost,uint8_t * mv_cost,uint8_t * hme_mv_cost,int qp,StdVideoH264PictureType pic_type)365 static void update_costs(uint8_t *mode_cost, uint8_t *mv_cost, uint8_t *hme_mv_cost, int qp, StdVideoH264PictureType pic_type)
366 {
367 int frame_type = anv_vdenc_h264_picture_type(pic_type);
368
369 memset(mode_cost, 0, 12 * sizeof(uint8_t));
370 memset(mv_cost, 0, 8 * sizeof(uint8_t));
371 memset(hme_mv_cost, 0, 8 * sizeof(uint8_t));
372
373 mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
374 mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
375 mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
376 mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
377 }
378
379 static void
anv_h264_encode_video(struct anv_cmd_buffer * cmd,const VkVideoEncodeInfoKHR * enc_info)380 anv_h264_encode_video(struct anv_cmd_buffer *cmd, const VkVideoEncodeInfoKHR *enc_info)
381 {
382 ANV_FROM_HANDLE(anv_buffer, dst_buffer, enc_info->dstBuffer);
383
384 struct anv_video_session *vid = cmd->video.vid;
385 struct anv_video_session_params *params = cmd->video.params;
386
387 const struct VkVideoEncodeH264PictureInfoKHR *frame_info =
388 vk_find_struct_const(enc_info->pNext, VIDEO_ENCODE_H264_PICTURE_INFO_KHR);
389
390 const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_enc_std_sps(¶ms->vk, frame_info->pStdPictureInfo->seq_parameter_set_id);
391 const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_enc_std_pps(¶ms->vk, frame_info->pStdPictureInfo->pic_parameter_set_id);
392 const StdVideoEncodeH264ReferenceListsInfo *ref_list_info = frame_info->pStdPictureInfo->pRefLists;
393
394 const struct anv_image_view *iv = anv_image_view_from_handle(enc_info->srcPictureResource.imageViewBinding);
395 const struct anv_image *src_img = iv->image;
396 bool post_deblock_enable = anv_post_deblock_enable(pps, frame_info);
397 bool rc_disable = cmd->video.params->rc_mode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
398 uint8_t dpb_idx[ANV_VIDEO_H264_MAX_NUM_REF_FRAME] = { 0,};
399
400 const struct anv_image_view *base_ref_iv;
401 if (enc_info->pSetupReferenceSlot) {
402 base_ref_iv = anv_image_view_from_handle(enc_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
403 } else {
404 base_ref_iv = iv;
405 }
406
407 const struct anv_image *base_ref_img = base_ref_iv->image;
408
409 anv_batch_emit(&cmd->batch, GENX(MI_FLUSH_DW), flush) {
410 flush.VideoPipelineCacheInvalidate = 1;
411 };
412
413 #if GFX_VER >= 12
414 anv_batch_emit(&cmd->batch, GENX(MI_FORCE_WAKEUP), wake) {
415 wake.MFXPowerWellControl = 1;
416 wake.MaskBits = 768;
417 }
418
419 anv_batch_emit(&cmd->batch, GENX(MFX_WAIT), mfx) {
420 mfx.MFXSyncControlFlag = 1;
421 }
422 #endif
423
424 anv_batch_emit(&cmd->batch, GENX(MFX_PIPE_MODE_SELECT), pipe_mode) {
425 pipe_mode.StandardSelect = SS_AVC;
426 pipe_mode.CodecSelect = Encode;
427 pipe_mode.FrameStatisticsStreamOutEnable = true;
428 pipe_mode.ScaledSurfaceEnable = false;
429 pipe_mode.PreDeblockingOutputEnable = !post_deblock_enable;
430 pipe_mode.PostDeblockingOutputEnable = post_deblock_enable;
431 pipe_mode.StreamOutEnable = false;
432 pipe_mode.VDEncMode = VM_VDEncMode;
433 pipe_mode.DecoderShortFormatMode = LongFormatDriverInterface;
434 }
435
436 #if GFX_VER >= 12
437 anv_batch_emit(&cmd->batch, GENX(MFX_WAIT), mfx) {
438 mfx.MFXSyncControlFlag = 1;
439 }
440 #endif
441
442 for (uint32_t i = 0; i < 2; i++) {
443 anv_batch_emit(&cmd->batch, GENX(MFX_SURFACE_STATE), surface) {
444 const struct anv_image *img_ = i == 0 ? base_ref_img : src_img;
445
446 surface.Width = img_->vk.extent.width - 1;
447 surface.Height = img_->vk.extent.height - 1;
448 /* TODO. add a surface for MFX_ReconstructedScaledReferencePicture */
449 surface.SurfaceID = i == 0 ? MFX_ReferencePicture : MFX_SourceInputPicture;
450 surface.TileWalk = TW_YMAJOR;
451 surface.TiledSurface = img_->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR;
452 surface.SurfacePitch = img_->planes[0].primary_surface.isl.row_pitch_B - 1;
453 surface.InterleaveChroma = true;
454 surface.SurfaceFormat = MFX_PLANAR_420_8;
455
456 surface.YOffsetforUCb = img_->planes[1].primary_surface.memory_range.offset /
457 img_->planes[0].primary_surface.isl.row_pitch_B;
458 surface.YOffsetforVCr = img_->planes[1].primary_surface.memory_range.offset /
459 img_->planes[0].primary_surface.isl.row_pitch_B;
460 }
461 }
462
463 anv_batch_emit(&cmd->batch, GENX(MFX_PIPE_BUF_ADDR_STATE), buf) {
464 if (post_deblock_enable) {
465 buf.PostDeblockingDestinationAddress =
466 anv_image_address(base_ref_img, &base_ref_img->planes[0].primary_surface.memory_range);
467 } else {
468 buf.PreDeblockingDestinationAddress =
469 anv_image_address(base_ref_img, &base_ref_img->planes[0].primary_surface.memory_range);
470 }
471 buf.PreDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
472 .MOCS = anv_mocs(cmd->device, buf.PreDeblockingDestinationAddress.bo, 0),
473 };
474 buf.PostDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
475 .MOCS = anv_mocs(cmd->device, buf.PostDeblockingDestinationAddress.bo, 0),
476 };
477
478 buf.OriginalUncompressedPictureSourceAddress =
479 anv_image_address(src_img, &src_img->planes[0].primary_surface.memory_range);
480 buf.OriginalUncompressedPictureSourceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
481 .MOCS = anv_mocs(cmd->device, buf.OriginalUncompressedPictureSourceAddress.bo, 0),
482 };
483
484 buf.StreamOutDataDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
485 .MOCS = anv_mocs(cmd->device, NULL, 0),
486 };
487
488 buf.IntraRowStoreScratchBufferAddress = (struct anv_address) {
489 vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].mem->bo,
490 vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].offset
491 };
492 buf.IntraRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
493 .MOCS = anv_mocs(cmd->device, buf.IntraRowStoreScratchBufferAddress.bo, 0),
494 };
495
496 buf.DeblockingFilterRowStoreScratchAddress = (struct anv_address) {
497 vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].mem->bo,
498 vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].offset
499 };
500 buf.DeblockingFilterRowStoreScratchAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
501 .MOCS = anv_mocs(cmd->device, buf.DeblockingFilterRowStoreScratchAddress.bo, 0),
502 };
503
504 struct anv_bo *ref_bo = NULL;
505
506 for (unsigned i = 0; i < enc_info->referenceSlotCount; i++) {
507 const struct anv_image_view *ref_iv =
508 anv_image_view_from_handle(enc_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
509 int slot_idx = enc_info->pReferenceSlots[i].slotIndex;
510 assert(slot_idx < ANV_VIDEO_H264_MAX_NUM_REF_FRAME);
511
512 dpb_idx[slot_idx] = i;
513
514 buf.ReferencePictureAddress[i] =
515 anv_image_address(ref_iv->image, &ref_iv->image->planes[0].primary_surface.memory_range);
516
517 if (i == 0)
518 ref_bo = ref_iv->image->bindings[0].address.bo;
519 }
520
521 buf.ReferencePictureAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
522 .MOCS = anv_mocs(cmd->device, ref_bo, 0),
523 };
524
525 buf.MBStatusBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
526 .MOCS = anv_mocs(cmd->device, NULL, 0),
527 };
528
529 buf.MBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
530 .MOCS = anv_mocs(cmd->device, NULL, 0),
531 };
532 buf.SecondMBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
533 .MOCS = anv_mocs(cmd->device, NULL, 0),
534 };
535
536 /* TODO. Add for scaled reference surface */
537 buf.ScaledReferenceSurfaceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
538 .MOCS = anv_mocs(cmd->device, buf.ScaledReferenceSurfaceAddress.bo, 0),
539 };
540 }
541
542 anv_batch_emit(&cmd->batch, GENX(MFX_IND_OBJ_BASE_ADDR_STATE), index_obj) {
543 index_obj.MFXIndirectBitstreamObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
544 .MOCS = anv_mocs(cmd->device, NULL, 0),
545 };
546 index_obj.MFXIndirectMVObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
547 .MOCS = anv_mocs(cmd->device, NULL, 0),
548 };
549 index_obj.MFDIndirectITCOEFFObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
550 .MOCS = anv_mocs(cmd->device, NULL, 0),
551 };
552 index_obj.MFDIndirectITDBLKObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
553 .MOCS = anv_mocs(cmd->device, NULL, 0),
554 };
555
556 index_obj.MFCIndirectPAKBSEObjectAddress = anv_address_add(dst_buffer->address, 0);
557
558 index_obj.MFCIndirectPAKBSEObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
559 .MOCS = anv_mocs(cmd->device, index_obj.MFCIndirectPAKBSEObjectAddress.bo, 0),
560 };
561 }
562
563 anv_batch_emit(&cmd->batch, GENX(MFX_BSP_BUF_BASE_ADDR_STATE), bsp) {
564 bsp.BSDMPCRowStoreScratchBufferAddress = (struct anv_address) {
565 vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].mem->bo,
566 vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset
567 };
568
569 bsp.BSDMPCRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
570 .MOCS = anv_mocs(cmd->device, bsp.BSDMPCRowStoreScratchBufferAddress.bo, 0),
571 };
572
573 bsp.MPRRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
574 .MOCS = anv_mocs(cmd->device, NULL, 0),
575 };
576
577 bsp.BitplaneReadBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
578 .MOCS = anv_mocs(cmd->device, NULL, 0),
579 };
580 }
581
582 anv_batch_emit(&cmd->batch, GENX(VDENC_PIPE_MODE_SELECT), vdenc_pipe_mode) {
583 vdenc_pipe_mode.StandardSelect = SS_AVC;
584 vdenc_pipe_mode.PAKChromaSubSamplingType = _420;
585 #if GFX_VER >= 12
586 //vdenc_pipe_mode.HMERegionPrefetchEnable = !vdenc_pipe_mode.TLBPrefetchEnable;
587 vdenc_pipe_mode.SourceLumaPackedDataTLBPrefetchEnable = true;
588 vdenc_pipe_mode.SourceChromaTLBPrefetchEnable = true;
589 vdenc_pipe_mode.HzShift32Minus1Src = 3;
590 vdenc_pipe_mode.PrefetchOffsetforSource = 4;
591 #endif
592 }
593
594 anv_batch_emit(&cmd->batch, GENX(VDENC_SRC_SURFACE_STATE), vdenc_surface) {
595 vdenc_surface.SurfaceState.Width = src_img->vk.extent.width - 1;
596 vdenc_surface.SurfaceState.Height = src_img->vk.extent.height - 1;
597 vdenc_surface.SurfaceState.SurfaceFormat = VDENC_PLANAR_420_8;
598 vdenc_surface.SurfaceState.SurfacePitch = src_img->planes[0].primary_surface.isl.row_pitch_B - 1;
599
600 #if GFX_VER == 9
601 vdenc_surface.SurfaceState.InterleaveChroma = true;
602 #endif
603
604 vdenc_surface.SurfaceState.TileWalk = TW_YMAJOR;
605 vdenc_surface.SurfaceState.TiledSurface = src_img->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR;
606 vdenc_surface.SurfaceState.YOffsetforUCb = src_img->planes[1].primary_surface.memory_range.offset /
607 src_img->planes[0].primary_surface.isl.row_pitch_B;
608 vdenc_surface.SurfaceState.YOffsetforVCr = src_img->planes[1].primary_surface.memory_range.offset /
609 src_img->planes[0].primary_surface.isl.row_pitch_B;
610 vdenc_surface.SurfaceState.Colorspaceselection = 1;
611 }
612
613 anv_batch_emit(&cmd->batch, GENX(VDENC_REF_SURFACE_STATE), vdenc_surface) {
614 vdenc_surface.SurfaceState.Width = base_ref_img->vk.extent.width - 1;
615 vdenc_surface.SurfaceState.Height = base_ref_img->vk.extent.height - 1;
616 vdenc_surface.SurfaceState.SurfaceFormat = VDENC_PLANAR_420_8;
617 #if GFX_VER == 9
618 vdenc_surface.SurfaceState.InterleaveChroma = true;
619 #endif
620 vdenc_surface.SurfaceState.SurfacePitch = base_ref_img->planes[0].primary_surface.isl.row_pitch_B - 1;
621
622 vdenc_surface.SurfaceState.TileWalk = TW_YMAJOR;
623 vdenc_surface.SurfaceState.TiledSurface = base_ref_img->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR;
624 vdenc_surface.SurfaceState.YOffsetforUCb = base_ref_img->planes[1].primary_surface.memory_range.offset /
625 base_ref_img->planes[0].primary_surface.isl.row_pitch_B;
626 vdenc_surface.SurfaceState.YOffsetforVCr = base_ref_img->planes[1].primary_surface.memory_range.offset /
627 base_ref_img->planes[0].primary_surface.isl.row_pitch_B;
628 }
629
630 /* TODO. add a cmd for VDENC_DS_REF_SURFACE_STATE */
631
632 anv_batch_emit(&cmd->batch, GENX(VDENC_PIPE_BUF_ADDR_STATE), vdenc_buf) {
633 /* TODO. add DSFWDREF and FWDREF */
634 vdenc_buf.DSFWDREF0.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
635 .MOCS = anv_mocs(cmd->device, NULL, 0),
636 };
637
638 vdenc_buf.DSFWDREF1.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
639 .MOCS = anv_mocs(cmd->device, NULL, 0),
640 };
641
642 vdenc_buf.OriginalUncompressedPicture.Address =
643 anv_image_address(src_img, &src_img->planes[0].primary_surface.memory_range);
644 vdenc_buf.OriginalUncompressedPicture.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
645 .MOCS = anv_mocs(cmd->device, vdenc_buf.OriginalUncompressedPicture.Address.bo, 0),
646 };
647
648 vdenc_buf.StreamInDataPicture.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
649 .MOCS = anv_mocs(cmd->device, NULL, 0),
650 };
651
652 vdenc_buf.RowStoreScratchBuffer.Address = (struct anv_address) {
653 vid->vid_mem[ANV_VID_MEM_H264_MPR_ROW_SCRATCH].mem->bo,
654 vid->vid_mem[ANV_VID_MEM_H264_MPR_ROW_SCRATCH].offset
655 };
656
657 vdenc_buf.RowStoreScratchBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
658 .MOCS = anv_mocs(cmd->device, vdenc_buf.RowStoreScratchBuffer.Address.bo, 0),
659 };
660
661 const struct anv_image_view *ref_iv[2] = { 0, };
662 for (unsigned i = 0; i < enc_info->referenceSlotCount && i < 2; i++)
663 ref_iv[i] = anv_image_view_from_handle(enc_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
664
665 if (ref_iv[0]) {
666 vdenc_buf.ColocatedMVReadBuffer.Address =
667 anv_image_address(ref_iv[0]->image, &ref_iv[0]->image->vid_dmv_top_surface);
668 vdenc_buf.FWDREF0.Address =
669 anv_image_address(ref_iv[0]->image, &ref_iv[0]->image->planes[0].primary_surface.memory_range);
670 }
671
672 vdenc_buf.ColocatedMVReadBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
673 .MOCS = anv_mocs(cmd->device, vdenc_buf.ColocatedMVReadBuffer.Address.bo, 0),
674 };
675
676 vdenc_buf.FWDREF0.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
677 .MOCS = anv_mocs(cmd->device, vdenc_buf.FWDREF0.Address.bo, 0),
678 };
679
680 if (ref_iv[1])
681 vdenc_buf.FWDREF1.Address =
682 anv_image_address(ref_iv[1]->image, &ref_iv[1]->image->planes[0].primary_surface.memory_range);
683
684 vdenc_buf.FWDREF1.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
685 .MOCS = anv_mocs(cmd->device, vdenc_buf.FWDREF1.Address.bo, 0),
686 };
687
688 vdenc_buf.FWDREF2.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
689 .MOCS = anv_mocs(cmd->device, NULL, 0),
690 };
691
692 vdenc_buf.BWDREF0.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
693 .MOCS = anv_mocs(cmd->device, NULL, 0),
694 };
695
696 vdenc_buf.VDEncStatisticsStreamOut.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
697 .MOCS = anv_mocs(cmd->device, NULL, 0),
698 };
699
700 #if GFX_VER >= 11
701 vdenc_buf.DSFWDREF04X.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
702 .MOCS = anv_mocs(cmd->device, NULL, 0),
703 };
704 vdenc_buf.DSFWDREF14X.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
705 .MOCS = anv_mocs(cmd->device, NULL, 0),
706 };
707 vdenc_buf.VDEncCURecordStreamOutBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
708 .MOCS = anv_mocs(cmd->device, NULL, 0),
709 };
710 vdenc_buf.VDEncLCUPAK_OBJ_CMDBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
711 .MOCS = anv_mocs(cmd->device, NULL, 0),
712 };
713 vdenc_buf.ScaledReferenceSurface8X.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
714 .MOCS = anv_mocs(cmd->device, NULL, 0),
715 };
716 vdenc_buf.ScaledReferenceSurface4X.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
717 .MOCS = anv_mocs(cmd->device, NULL, 0),
718 };
719 vdenc_buf.VP9SegmentationMapStreamInBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
720 .MOCS = anv_mocs(cmd->device, NULL, 0),
721 };
722 vdenc_buf.VP9SegmentationMapStreamOutBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
723 .MOCS = anv_mocs(cmd->device, NULL, 0),
724 };
725 #endif
726 #if GFX_VER >= 12
727 vdenc_buf.VDEncTileRowStoreBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
728 .MOCS = anv_mocs(cmd->device, NULL, 0),
729 };
730 vdenc_buf.VDEncCumulativeCUCountStreamOutSurface.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
731 .MOCS = anv_mocs(cmd->device, NULL, 0),
732 };
733 vdenc_buf.VDEncPaletteModeStreamOutSurface.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
734 .MOCS = anv_mocs(cmd->device, NULL, 0),
735 };
736 #endif
737 }
738
739 StdVideoH264PictureType pic_type;
740
741 pic_type = frame_info->pStdPictureInfo->primary_pic_type;
742
743 anv_batch_emit(&cmd->batch, GENX(VDENC_CONST_QPT_STATE), qpt) {
744 if (pic_type == STD_VIDEO_H264_PICTURE_TYPE_IDR || pic_type == STD_VIDEO_H264_PICTURE_TYPE_I) {
745 for (uint32_t i = 0; i < 42; i++) {
746 qpt.QPLambdaArrayIndex[i] = vdenc_const_qp_lambda[i];
747 }
748 } else {
749 for (uint32_t i = 0; i < 42; i++) {
750 qpt.QPLambdaArrayIndex[i] = vdenc_const_qp_lambda_p[i];
751 }
752
753 for (uint32_t i = 0; i < 27; i++) {
754 qpt.SkipThresholdArrayIndex[i] = vdenc_const_skip_threshold_p[i];
755 qpt.SICForwardTransformCoeffThresholdMatrix0ArrayIndex[i] = vdenc_const_sic_forward_transform_coeff_threshold_0_p[i];
756 qpt.SICForwardTransformCoeffThresholdMatrix135ArrayIndex[i] = vdenc_const_sic_forward_transform_coeff_threshold_1_p[i];
757 qpt.SICForwardTransformCoeffThresholdMatrix2ArrayIndex[i] = vdenc_const_sic_forward_transform_coeff_threshold_2_p[i];
758 qpt.SICForwardTransformCoeffThresholdMatrix46ArrayIndex[i] = vdenc_const_sic_forward_transform_coeff_threshold_3_p[i];
759 }
760
761 if (!pps->flags.transform_8x8_mode_flag) {
762 for (uint32_t i = 0; i < 27; i++) {
763 qpt.SkipThresholdArrayIndex[i] /= 2;
764 }
765 }
766 }
767 }
768
769 anv_batch_emit(&cmd->batch, GENX(MFX_AVC_IMG_STATE), avc_img) {
770 avc_img.FrameWidth = sps->pic_width_in_mbs_minus1;
771 avc_img.FrameHeight = sps->pic_height_in_map_units_minus1;
772 avc_img.FrameSize = (avc_img.FrameWidth + 1) * (avc_img.FrameHeight + 1);
773 avc_img.ImageStructure = FramePicture;
774
775 avc_img.WeightedBiPredictionIDC = pps->weighted_bipred_idc;
776 avc_img.WeightedPredictionEnable = pps->flags.weighted_pred_flag;
777 avc_img.RhoDomainRateControlEnable = false;
778 avc_img.FirstChromaQPOffset = pps->chroma_qp_index_offset;
779 avc_img.SecondChromaQPOffset = pps->second_chroma_qp_index_offset;
780
781 avc_img.FieldPicture = false;
782 avc_img.MBAFFMode = sps->flags.mb_adaptive_frame_field_flag;
783 avc_img.FrameMBOnly = sps->flags.frame_mbs_only_flag;
784 avc_img._8x8IDCTTransformMode = pps->flags.transform_8x8_mode_flag;
785 avc_img.Direct8x8Inference = sps->flags.direct_8x8_inference_flag;
786 avc_img.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
787 avc_img.NonReferencePicture = false;
788 avc_img.EntropyCodingSyncEnable = pps->flags.entropy_coding_mode_flag;
789 avc_img.MBMVFormat = FOLLOW;
790 avc_img.ChromaFormatIDC = sps->chroma_format_idc;
791 avc_img.MVUnpackedEnable = true;
792
793 avc_img.IntraMBMaxBitControl = true;
794 avc_img.InterMBMaxBitControl = true;
795 avc_img.FrameBitrateMaxReport = true;
796 avc_img.FrameBitrateMinReport = true;
797 avc_img.ForceIPCMControl = true;
798 avc_img.TrellisQuantizationChromaDisable = true;
799
800 avc_img.IntraMBConformanceMaxSize = 2700;
801 avc_img.InterMBConformanceMaxSize = 4095;
802
803 avc_img.FrameBitrateMin = 0;
804 avc_img.FrameBitrateMinUnitMode = 1;
805 avc_img.FrameBitrateMinUnit = 1;
806 avc_img.FrameBitrateMax = (1 << 14) - 1;
807 avc_img.FrameBitrateMaxUnitMode = 1;
808 avc_img.FrameBitrateMaxUnit = 1;
809
810 avc_img.NumberofReferenceFrames = enc_info->referenceSlotCount;
811 if (pic_type != STD_VIDEO_H264_PICTURE_TYPE_IDR && pic_type != STD_VIDEO_H264_PICTURE_TYPE_I) {
812 avc_img.NumberofActiveReferencePicturesfromL0 = pps->num_ref_idx_l0_default_active_minus1 + 1;
813 avc_img.NumberofActiveReferencePicturesfromL1 = pps->num_ref_idx_l1_default_active_minus1 + 1;
814 }
815 avc_img.PicOrderPresent = pps->flags.bottom_field_pic_order_in_frame_present_flag;
816 avc_img.DeltaPicOrderAlwaysZero = sps->flags.delta_pic_order_always_zero_flag;
817 avc_img.PicOrderCountType = sps->pic_order_cnt_type;
818 avc_img.DeblockingFilterControlPresent = pps->flags.deblocking_filter_control_present_flag;
819 avc_img.RedundantPicCountPresent = pps->flags.redundant_pic_cnt_present_flag;
820 avc_img.Log2MaxFrameNumber = sps->log2_max_frame_num_minus4;
821 avc_img.Log2MaxPicOrderCountLSB = sps->log2_max_pic_order_cnt_lsb_minus4;
822 }
823
824 uint8_t mode_cost[12];
825 uint8_t mv_cost[8];
826 uint8_t hme_mv_cost[8];
827
828 anv_batch_emit(&cmd->batch, GENX(VDENC_IMG_STATE), vdenc_img) {
829 uint32_t slice_qp = 0;
830 for (uint32_t slice_id = 0; slice_id < frame_info->naluSliceEntryCount; slice_id++) {
831 const VkVideoEncodeH264NaluSliceInfoKHR *nalu = &frame_info->pNaluSliceEntries[slice_id];
832 slice_qp = rc_disable ? nalu->constantQp : pps->pic_init_qp_minus26 + 26;
833 }
834
835 update_costs(mode_cost, mv_cost, hme_mv_cost, slice_qp, pic_type);
836
837 if (pic_type == STD_VIDEO_H264_PICTURE_TYPE_IDR || pic_type == STD_VIDEO_H264_PICTURE_TYPE_I) {
838 vdenc_img.IntraSADMeasureAdjustment = 2;
839 vdenc_img.SubMBSubPartitionMask = 0x70;
840 vdenc_img.CREPrefetchEnable = true;
841 vdenc_img.Mode0Cost = 10;
842 vdenc_img.Mode1Cost = 0;
843 vdenc_img.Mode2Cost = 3;
844 vdenc_img.Mode3Cost = 30;
845
846 } else {
847 vdenc_img.BidirectionalWeight = 0x20;
848 vdenc_img.SubPelMode = 3;
849 vdenc_img.BmeDisableForFbrMessage = true;
850 vdenc_img.InterSADMeasureAdjustment = 2;
851 vdenc_img.IntraSADMeasureAdjustment = 2;
852 vdenc_img.SubMBSubPartitionMask = 0x70;
853 vdenc_img.CREPrefetchEnable = true;
854
855 vdenc_img.NonSkipZeroMVCostAdded = 1;
856 vdenc_img.NonSkipMBModeCostAdded = 1;
857 vdenc_img.RefIDCostModeSelect = 1;
858
859 vdenc_img.Mode0Cost = 7;
860 vdenc_img.Mode1Cost = 26;
861 vdenc_img.Mode2Cost = 30;
862 vdenc_img.Mode3Cost = 57;
863 vdenc_img.Mode4Cost = 8;
864 vdenc_img.Mode5Cost = 2;
865 vdenc_img.Mode6Cost = 4;
866 vdenc_img.Mode7Cost = 6;
867 vdenc_img.Mode8Cost = 5;
868 vdenc_img.Mode9Cost = 0;
869 vdenc_img.RefIDCost = 4;
870 vdenc_img.ChromaIntraModeCost = 0;
871
872 vdenc_img.MVCost.MV0Cost = 0;
873 vdenc_img.MVCost.MV1Cost = 6;
874 vdenc_img.MVCost.MV2Cost = 6;
875 vdenc_img.MVCost.MV3Cost = 9;
876 vdenc_img.MVCost.MV4Cost = 10;
877 vdenc_img.MVCost.MV5Cost = 13;
878 vdenc_img.MVCost.MV6Cost = 14;
879 vdenc_img.MVCost.MV7Cost = 24;
880
881 vdenc_img.SadHaarThreshold0 = 800;
882 vdenc_img.SadHaarThreshold1 = 1600;
883 vdenc_img.SadHaarThreshold2 = 2400;
884 }
885
886 vdenc_img.PenaltyforIntra16x16NonDCPrediction = 36;
887 vdenc_img.PenaltyforIntra8x8NonDCPrediction = 12;
888 vdenc_img.PenaltyforIntra4x4NonDCPrediction = 4;
889 vdenc_img.MaxQP = 0x33;
890 vdenc_img.MinQP = 0x0a;
891 vdenc_img.MaxDeltaQP = 0x0f;
892 vdenc_img.MaxHorizontalMVRange = 0x2000;
893 vdenc_img.MaxVerticalMVRange = 0x200;
894 vdenc_img.SmallMbSizeInWord = 0xff;
895 vdenc_img.LargeMbSizeInWord = 0xff;
896
897 vdenc_img.Transform8x8 = pps->flags.transform_8x8_mode_flag;
898 vdenc_img.VDEncExtendedPAK_OBJ_CMDEnable = true;
899 vdenc_img.PictureWidth = sps->pic_width_in_mbs_minus1 + 1;
900 vdenc_img.ForwardTransformSkipCheckEnable = true;
901 vdenc_img.BlockBasedSkipEnable = true;
902 vdenc_img.PictureHeight = sps->pic_height_in_map_units_minus1;
903 vdenc_img.PictureType = anv_vdenc_h264_picture_type(pic_type);
904 vdenc_img.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
905
906 if (pic_type == STD_VIDEO_H264_PICTURE_TYPE_P) {
907 vdenc_img.HMERef1Disable =
908 (ref_list_info->num_ref_idx_l1_active_minus1 + 1) == 1 ? true : false;
909 }
910
911 vdenc_img.SliceMBHeight = sps->pic_height_in_map_units_minus1;
912
913 if (vdenc_img.Transform8x8) {
914 vdenc_img.LumaIntraPartitionMask = 0;
915 } else {
916 vdenc_img.LumaIntraPartitionMask = (1 << 1);
917 }
918
919 vdenc_img.QpPrimeY = slice_qp;
920 vdenc_img.MaxVerticalMVRange = anv_get_max_vmv_range(sps->level_idc);
921
922 /* TODO. Update Mode/MV cost conditinally. */
923 if (1) {
924 vdenc_img.Mode0Cost = mode_cost[0];
925 vdenc_img.Mode1Cost = mode_cost[1];
926 vdenc_img.Mode2Cost = mode_cost[2];
927 vdenc_img.Mode3Cost = mode_cost[3];
928 vdenc_img.Mode4Cost = mode_cost[4];
929 vdenc_img.Mode5Cost = mode_cost[5];
930 vdenc_img.Mode6Cost = mode_cost[6];
931 vdenc_img.Mode7Cost = mode_cost[7];
932 vdenc_img.Mode8Cost = mode_cost[8];
933 vdenc_img.Mode9Cost = mode_cost[9];
934 vdenc_img.RefIDCost = mode_cost[10];
935 vdenc_img.ChromaIntraModeCost = mode_cost[11];
936 }
937 }
938
939 if (pps->flags.pic_scaling_matrix_present_flag) {
940 /* TODO. */
941 assert(0);
942 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
943 qm.DWordLength = 16;
944 qm.AVC = AVC_4x4_Intra_MATRIX;
945 for (unsigned m = 0; m < 3; m++)
946 for (unsigned q = 0; q < 16; q++)
947 qm.ForwardQuantizerMatrix[m * 16 + q] = pps->pScalingLists->ScalingList4x4[m][q];
948 }
949 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
950 qm.DWordLength = 16;
951 qm.AVC = AVC_4x4_Inter_MATRIX;
952 for (unsigned m = 0; m < 3; m++)
953 for (unsigned q = 0; q < 16; q++)
954 qm.ForwardQuantizerMatrix[m * 16 + q] = pps->pScalingLists->ScalingList4x4[m + 3][q];
955 }
956 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
957 qm.DWordLength = 16;
958 qm.AVC = AVC_8x8_Intra_MATRIX;
959 for (unsigned q = 0; q < 64; q++)
960 qm.ForwardQuantizerMatrix[q] = pps->pScalingLists->ScalingList8x8[0][q];
961 }
962 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
963 qm.DWordLength = 16;
964 qm.AVC = AVC_8x8_Inter_MATRIX;
965 for (unsigned q = 0; q < 64; q++)
966 qm.ForwardQuantizerMatrix[q] = pps->pScalingLists->ScalingList8x8[3][q];
967 }
968 } else if (sps->flags.seq_scaling_matrix_present_flag) {
969 /* TODO. */
970 assert(0);
971 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
972 qm.DWordLength = 16;
973 qm.AVC = AVC_4x4_Intra_MATRIX;
974 for (unsigned m = 0; m < 3; m++)
975 for (unsigned q = 0; q < 16; q++)
976 qm.ForwardQuantizerMatrix[m * 16 + q] = sps->pScalingLists->ScalingList4x4[m][q];
977 }
978 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
979 qm.DWordLength = 16;
980 qm.AVC = AVC_4x4_Inter_MATRIX;
981 for (unsigned m = 0; m < 3; m++)
982 for (unsigned q = 0; q < 16; q++)
983 qm.ForwardQuantizerMatrix[m * 16 + q] = sps->pScalingLists->ScalingList4x4[m + 3][q];
984 }
985 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
986 qm.DWordLength = 16;
987 qm.AVC = AVC_8x8_Intra_MATRIX;
988 for (unsigned q = 0; q < 64; q++)
989 qm.ForwardQuantizerMatrix[q] = sps->pScalingLists->ScalingList8x8[0][q];
990 }
991 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
992 qm.DWordLength = 16;
993 qm.AVC = AVC_8x8_Inter_MATRIX;
994 for (unsigned q = 0; q < 64; q++)
995 qm.ForwardQuantizerMatrix[q] = sps->pScalingLists->ScalingList8x8[3][q];
996 }
997 } else {
998 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
999 qm.AVC = AVC_4x4_Intra_MATRIX;
1000 for (unsigned q = 0; q < 3 * 16; q++)
1001 qm.ForwardQuantizerMatrix[q] = 0x10;
1002 }
1003 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
1004 qm.AVC = AVC_4x4_Inter_MATRIX;
1005 for (unsigned q = 0; q < 3 * 16; q++)
1006 qm.ForwardQuantizerMatrix[q] = 0x10;
1007 }
1008 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
1009 qm.AVC = AVC_8x8_Intra_MATRIX;
1010 for (unsigned q = 0; q < 64; q++)
1011 qm.ForwardQuantizerMatrix[q] = 0x10;
1012 }
1013 anv_batch_emit(&cmd->batch, GENX(MFX_QM_STATE), qm) {
1014 qm.AVC = AVC_8x8_Inter_MATRIX;
1015 for (unsigned q = 0; q < 64; q++)
1016 qm.ForwardQuantizerMatrix[q] = 0x10;
1017 }
1018 }
1019
1020 if (pps->flags.pic_scaling_matrix_present_flag) {
1021 /* TODO. */
1022 assert(0);
1023 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1024 fqm.AVC = AVC_4x4_Intra_MATRIX;
1025 for (unsigned m = 0; m < 3; m++)
1026 for (unsigned q = 0; q < 16; q++)
1027 fqm.QuantizerMatrix8x8[m * 16 + q] = pps->pScalingLists->ScalingList4x4[m][q];
1028 }
1029 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1030 fqm.AVC = AVC_4x4_Inter_MATRIX;
1031 for (unsigned m = 0; m < 3; m++)
1032 for (unsigned q = 0; q < 16; q++)
1033 fqm.QuantizerMatrix8x8[m * 16 + q] = pps->pScalingLists->ScalingList4x4[m + 3][q];
1034 }
1035 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1036 fqm.AVC = AVC_8x8_Intra_MATRIX;
1037 for (unsigned q = 0; q < 64; q++)
1038 fqm.QuantizerMatrix8x8[q] = pps->pScalingLists->ScalingList8x8[0][q];
1039 }
1040 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1041 fqm.AVC = AVC_8x8_Inter_MATRIX;
1042 for (unsigned q = 0; q < 64; q++)
1043 fqm.QuantizerMatrix8x8[q] = pps->pScalingLists->ScalingList8x8[3][q];
1044 }
1045 } else if (sps->flags.seq_scaling_matrix_present_flag) {
1046 /* TODO. */
1047 assert(0);
1048 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1049 fqm.AVC = AVC_4x4_Intra_MATRIX;
1050 for (unsigned m = 0; m < 3; m++)
1051 for (unsigned q = 0; q < 16; q++)
1052 fqm.QuantizerMatrix8x8[m * 16 + q] = sps->pScalingLists->ScalingList4x4[m][q];
1053 }
1054 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1055 fqm.AVC = AVC_4x4_Inter_MATRIX;
1056 for (unsigned m = 0; m < 3; m++)
1057 for (unsigned q = 0; q < 16; q++)
1058 fqm.QuantizerMatrix8x8[m * 16 + q] = sps->pScalingLists->ScalingList4x4[m + 3][q];
1059 }
1060 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1061 fqm.AVC = AVC_8x8_Intra_MATRIX;
1062 for (unsigned q = 0; q < 64; q++)
1063 fqm.QuantizerMatrix8x8[q] = sps->pScalingLists->ScalingList8x8[0][q];
1064 }
1065 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1066 fqm.AVC = AVC_8x8_Inter_MATRIX;
1067 for (unsigned q = 0; q < 64; q++)
1068 fqm.QuantizerMatrix8x8[q] = sps->pScalingLists->ScalingList8x8[3][q];
1069 }
1070 } else {
1071 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1072 fqm.AVC = AVC_4x4_Intra_MATRIX;
1073 for (unsigned q = 0; q < 64; q++)
1074 if (q % 2 == 1)
1075 fqm.QuantizerMatrix8x8[q] = 0x10;
1076 }
1077 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1078 fqm.AVC = AVC_4x4_Inter_MATRIX;
1079 for (unsigned q = 0; q < 64; q++)
1080 if (q % 2 == 1)
1081 fqm.QuantizerMatrix8x8[q] = 0x10;
1082 }
1083 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1084 fqm.AVC = AVC_8x8_Intra_MATRIX;
1085 for (unsigned q = 0; q < 64; q++)
1086 if (q % 2 == 1)
1087 fqm.QuantizerMatrix8x8[q] = 0x10;
1088 }
1089 anv_batch_emit(&cmd->batch, GENX(MFX_FQM_STATE), fqm) {
1090 fqm.AVC = AVC_8x8_Inter_MATRIX;
1091 for (unsigned q = 0; q < 64; q++)
1092 if (q % 2 == 1)
1093 fqm.QuantizerMatrix8x8[q] = 0x10;
1094 }
1095 }
1096
1097 for (uint32_t slice_id = 0; slice_id < frame_info->naluSliceEntryCount; slice_id++) {
1098 const VkVideoEncodeH264NaluSliceInfoKHR *nalu = &frame_info->pNaluSliceEntries[slice_id];
1099 const StdVideoEncodeH264SliceHeader *slice_header = nalu->pStdSliceHeader;
1100 const StdVideoEncodeH264SliceHeader *next_slice_header = NULL;
1101
1102 bool is_last = (slice_id == frame_info->naluSliceEntryCount - 1);
1103 uint32_t slice_type = slice_header->slice_type % 5;
1104 uint32_t slice_qp = rc_disable ? nalu->constantQp : pps->pic_init_qp_minus26 + 26;
1105
1106 if (!is_last)
1107 next_slice_header = slice_header + 1;
1108
1109 if (slice_type != STD_VIDEO_H264_SLICE_TYPE_I) {
1110 anv_batch_emit(&cmd->batch, GENX(MFX_AVC_REF_IDX_STATE), ref) {
1111 ref.ReferencePictureListSelect = 0;
1112
1113 for (uint32_t i = 0; i < ref_list_info->num_ref_idx_l0_active_minus1 + 1; i++) {
1114 const VkVideoReferenceSlotInfoKHR ref_slot = enc_info->pReferenceSlots[i];
1115 ref.ReferenceListEntry[i] = dpb_idx[ref_slot.slotIndex];
1116 }
1117 }
1118 }
1119
1120 if (slice_type == STD_VIDEO_H264_SLICE_TYPE_B) {
1121 anv_batch_emit(&cmd->batch, GENX(MFX_AVC_REF_IDX_STATE), ref) {
1122 ref.ReferencePictureListSelect = 1;
1123
1124 for (uint32_t i = 0; i < ref_list_info->num_ref_idx_l1_active_minus1 + 1; i++) {
1125 const VkVideoReferenceSlotInfoKHR ref_slot = enc_info->pReferenceSlots[i];
1126 ref.ReferenceListEntry[i] = dpb_idx[ref_slot.slotIndex];
1127 }
1128 }
1129 }
1130
1131 if (pps->flags.weighted_pred_flag && slice_type == STD_VIDEO_H265_SLICE_TYPE_P) {
1132 /* TODO. */
1133 assert(0);
1134 anv_batch_emit(&cmd->batch, GENX(MFX_AVC_WEIGHTOFFSET_STATE), w) {
1135 }
1136 }
1137
1138 if (pps->flags.weighted_pred_flag && slice_type == STD_VIDEO_H265_SLICE_TYPE_B) {
1139 /* TODO. */
1140 assert(0);
1141 anv_batch_emit(&cmd->batch, GENX(MFX_AVC_WEIGHTOFFSET_STATE), w) {
1142 }
1143 }
1144
1145 const StdVideoEncodeH264WeightTable* weight_table = slice_header->pWeightTable;
1146
1147 unsigned w_in_mb = align(src_img->vk.extent.width, ANV_MB_WIDTH) / ANV_MB_WIDTH;
1148 unsigned h_in_mb = align(src_img->vk.extent.height, ANV_MB_HEIGHT) / ANV_MB_HEIGHT;
1149
1150 uint8_t slice_header_data[256] = { 0, };
1151 size_t slice_header_data_len_in_bytes = 0;
1152 vk_video_encode_h264_slice_header(frame_info->pStdPictureInfo,
1153 sps,
1154 pps,
1155 slice_header,
1156 slice_qp - (pps->pic_init_qp_minus26 + 26),
1157 &slice_header_data_len_in_bytes,
1158 &slice_header_data);
1159 uint32_t slice_header_data_len_in_bits = slice_header_data_len_in_bytes * 8;
1160
1161 anv_batch_emit(&cmd->batch, GENX(MFX_AVC_SLICE_STATE), avc_slice) {
1162 avc_slice.SliceType = slice_type;
1163
1164 if (slice_type != STD_VIDEO_H264_SLICE_TYPE_I && weight_table) {
1165 avc_slice.Log2WeightDenominatorLuma = weight_table->luma_log2_weight_denom;
1166 avc_slice.Log2WeightDenominatorChroma = weight_table->chroma_log2_weight_denom;
1167 }
1168
1169 avc_slice.NumberofReferencePicturesinInterpredictionList0 =
1170 slice_type == STD_VIDEO_H264_SLICE_TYPE_I ? 0 : ref_list_info->num_ref_idx_l0_active_minus1 + 1;
1171 avc_slice.NumberofReferencePicturesinInterpredictionList1 =
1172 (slice_type == STD_VIDEO_H264_SLICE_TYPE_I ||
1173 slice_type == STD_VIDEO_H264_SLICE_TYPE_P) ? 0 : ref_list_info->num_ref_idx_l1_active_minus1 + 1;
1174
1175 avc_slice.SliceAlphaC0OffsetDiv2 = slice_header->slice_alpha_c0_offset_div2 & 0x7;
1176 avc_slice.SliceBetaOffsetDiv2 = slice_header->slice_beta_offset_div2 & 0x7;
1177 avc_slice.SliceQuantizationParameter = slice_qp;
1178 avc_slice.CABACInitIDC = slice_header->cabac_init_idc;
1179 avc_slice.DisableDeblockingFilterIndicator =
1180 pps->flags.deblocking_filter_control_present_flag ? slice_header->disable_deblocking_filter_idc : 0;
1181 avc_slice.DirectPredictionType = slice_header->flags.direct_spatial_mv_pred_flag;
1182
1183 avc_slice.SliceStartMBNumber = slice_header->first_mb_in_slice;
1184 avc_slice.SliceHorizontalPosition =
1185 slice_header->first_mb_in_slice % (w_in_mb);
1186 avc_slice.SliceVerticalPosition =
1187 slice_header->first_mb_in_slice / (w_in_mb);
1188
1189 if (is_last) {
1190 avc_slice.NextSliceHorizontalPosition = 0;
1191 avc_slice.NextSliceVerticalPosition = h_in_mb;
1192 } else {
1193 avc_slice.NextSliceHorizontalPosition = next_slice_header->first_mb_in_slice % w_in_mb;
1194 avc_slice.NextSliceVerticalPosition = next_slice_header->first_mb_in_slice / w_in_mb;
1195 }
1196
1197 avc_slice.SliceID = slice_id;
1198 avc_slice.CABACZeroWordInsertionEnable = 1;
1199 avc_slice.EmulationByteSliceInsertEnable = 1;
1200 avc_slice.SliceDataInsertionPresent = 1;
1201 avc_slice.HeaderInsertionPresent = 1;
1202 avc_slice.LastSliceGroup = is_last;
1203 avc_slice.RateControlCounterEnable = false;
1204
1205 /* TODO. Available only when RateControlCounterEnable is true. */
1206 avc_slice.RateControlPanicType = CBPPanic;
1207 avc_slice.RateControlPanicEnable = false;
1208 avc_slice.RateControlTriggleMode = LooseRateControl;
1209 avc_slice.ResetRateControlCounter = true;
1210 avc_slice.IndirectPAKBSEDataStartAddress = enc_info->dstBufferOffset;
1211
1212 avc_slice.RoundIntra = 5;
1213 avc_slice.RoundIntraEnable = true;
1214 /* TODO. Needs to get a different value of rounding inter under various conditions. */
1215 avc_slice.RoundInter = 2;
1216 avc_slice.RoundInterEnable = false;
1217
1218 if (slice_type == STD_VIDEO_H264_SLICE_TYPE_P) {
1219 avc_slice.WeightedPredictionIndicator = pps->flags.weighted_pred_flag;
1220 avc_slice.NumberofReferencePicturesinInterpredictionList0 = ref_list_info->num_ref_idx_l0_active_minus1 + 1;
1221 } else if (slice_type == STD_VIDEO_H264_SLICE_TYPE_B) {
1222 avc_slice.WeightedPredictionIndicator = pps->weighted_bipred_idc;
1223 avc_slice.NumberofReferencePicturesinInterpredictionList0 = ref_list_info->num_ref_idx_l0_active_minus1 + 1;
1224 avc_slice.NumberofReferencePicturesinInterpredictionList1 = ref_list_info->num_ref_idx_l1_active_minus1 + 1;
1225 }
1226 }
1227
1228 uint32_t length_in_dw, data_bits_in_last_dw;
1229 uint32_t *dw;
1230
1231 /* Insert zero slice data */
1232 unsigned int insert_zero[] = { 0, };
1233 length_in_dw = 1;
1234 data_bits_in_last_dw = 8;
1235
1236 dw = anv_batch_emitn(&cmd->batch, length_in_dw + 2, GENX(MFX_PAK_INSERT_OBJECT),
1237 .DataBitsInLastDW = data_bits_in_last_dw > 0 ? data_bits_in_last_dw : 32,
1238 .HeaderLengthExcludedFromSize = ACCUMULATE);
1239
1240 memcpy(dw + 2, insert_zero, length_in_dw * 4);
1241
1242 slice_header_data_len_in_bits -= 8;
1243
1244 length_in_dw = ALIGN(slice_header_data_len_in_bits, 32) >> 5;
1245 data_bits_in_last_dw = slice_header_data_len_in_bits & 0x1f;
1246
1247 dw = anv_batch_emitn(&cmd->batch, length_in_dw + 2, GENX(MFX_PAK_INSERT_OBJECT),
1248 .LastHeader = true,
1249 .DataBitsInLastDW = data_bits_in_last_dw > 0 ? data_bits_in_last_dw : 32,
1250 .SliceHeaderIndicator = true,
1251 .HeaderLengthExcludedFromSize = ACCUMULATE);
1252
1253 memcpy(dw + 2, slice_header_data + 1, length_in_dw * 4);
1254
1255 anv_batch_emit(&cmd->batch, GENX(VDENC_WEIGHTSOFFSETS_STATE), vdenc_offsets) {
1256 vdenc_offsets.WeightsForwardReference0 = 1;
1257 vdenc_offsets.WeightsForwardReference1 = 1;
1258 vdenc_offsets.WeightsForwardReference2 = 1;
1259
1260 }
1261
1262 anv_batch_emit(&cmd->batch, GENX(VDENC_WALKER_STATE), vdenc_walker) {
1263 vdenc_walker.NextSliceMBStartYPosition = h_in_mb;
1264 vdenc_walker.Log2WeightDenominatorLuma = weight_table ? weight_table->luma_log2_weight_denom : 0;
1265 #if GFX_VER >= 12
1266 vdenc_walker.TileWidth = src_img->vk.extent.width - 1;
1267 #endif
1268 }
1269
1270 anv_batch_emit(&cmd->batch, GENX(VD_PIPELINE_FLUSH), flush) {
1271 flush.MFXPipelineDone = true;
1272 flush.VDENCPipelineDone = true;
1273 flush.VDCommandMessageParserDone = true;
1274 flush.VDENCPipelineCommandFlush = true;
1275 }
1276 }
1277
1278 anv_batch_emit(&cmd->batch, GENX(MI_FLUSH_DW), flush) {
1279 flush.DWordLength = 2;
1280 flush.VideoPipelineCacheInvalidate = 1;
1281 };
1282
1283 }
1284
1285 static uint8_t
anv_h265_get_ref_poc(const VkVideoEncodeInfoKHR * enc_info,const StdVideoEncodeH265ReferenceListsInfo * ref_lists,const bool l0,const uint8_t slot_num,bool * long_term)1286 anv_h265_get_ref_poc(const VkVideoEncodeInfoKHR *enc_info,
1287 const StdVideoEncodeH265ReferenceListsInfo* ref_lists,
1288 const bool l0,
1289 const uint8_t slot_num,
1290 bool *long_term)
1291 {
1292 uint8_t ref_poc = 0xff;
1293 unsigned ref_cnt = l0 ? ref_lists->num_ref_idx_l0_active_minus1 + 1 :
1294 ref_lists->num_ref_idx_l1_active_minus1 + 1;
1295
1296 for (unsigned i = 0; i < ref_cnt; i++) {
1297 const VkVideoReferenceSlotInfoKHR ref_slot_info = enc_info->pReferenceSlots[i];
1298 const VkVideoEncodeH265DpbSlotInfoKHR *dpb =
1299 vk_find_struct_const(ref_slot_info.pNext, VIDEO_ENCODE_H265_DPB_SLOT_INFO_KHR);
1300
1301 if (!dpb)
1302 return ref_poc;
1303
1304 if (ref_slot_info.slotIndex == slot_num) {
1305 ref_poc = dpb->pStdReferenceInfo->PicOrderCntVal;
1306 *long_term |= dpb->pStdReferenceInfo->flags.used_for_long_term_reference;
1307 break;
1308 }
1309 }
1310
1311 return ref_poc;
1312 }
1313
1314 static void
scaling_list(struct anv_cmd_buffer * cmd_buffer,const StdVideoH265ScalingLists * scaling_list)1315 scaling_list(struct anv_cmd_buffer *cmd_buffer,
1316 const StdVideoH265ScalingLists *scaling_list)
1317 {
1318 /* 4x4, 8x8, 16x16, 32x32 */
1319 for (uint8_t size = 0; size < 4; size++) {
1320 /* Intra, Inter */
1321 for (uint8_t pred = 0; pred < 2; pred++) {
1322 /* Y, Cb, Cr */
1323 for (uint8_t color = 0; color < 3; color++) {
1324 if (size == 3 && color > 0)
1325 continue;
1326
1327 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_QM_STATE), qm) {
1328 qm.SizeID = size;
1329 qm.PredictionType = pred;
1330 qm.ColorComponent = color;
1331
1332 qm.DCCoefficient = size > 1 ?
1333 (size == 2 ? scaling_list->ScalingListDCCoef16x16[3 * pred + color] :
1334 scaling_list->ScalingListDCCoef32x32[pred]) : 0;
1335
1336 if (size == 0) {
1337 for (uint8_t i = 0; i < 4; i++)
1338 for (uint8_t j = 0; j < 4; j++)
1339 qm.QuantizerMatrix8x8[4 * i + j] =
1340 scaling_list->ScalingList4x4[3 * pred + color][4 * i + j];
1341 } else if (size == 1) {
1342 for (uint8_t i = 0; i < 8; i++)
1343 for (uint8_t j = 0; j < 8; j++)
1344 qm.QuantizerMatrix8x8[8 * i + j] =
1345 scaling_list->ScalingList8x8[3 * pred + color][8 * i + j];
1346 } else if (size == 2) {
1347 for (uint8_t i = 0; i < 8; i++)
1348 for (uint8_t j = 0; j < 8; j++)
1349 qm.QuantizerMatrix8x8[8 * i + j] =
1350 scaling_list->ScalingList16x16[3 * pred + color][8 * i + j];
1351 } else if (size == 3) {
1352 for (uint8_t i = 0; i < 8; i++)
1353 for (uint8_t j = 0; j < 8; j++)
1354 qm.QuantizerMatrix8x8[8 * i + j] =
1355 scaling_list->ScalingList32x32[pred][8 * i + j];
1356 }
1357 }
1358 }
1359 }
1360 }
1361 }
1362
1363 static uint16_t
lcu_max_bits_size_allowed(const StdVideoH265SequenceParameterSet * sps)1364 lcu_max_bits_size_allowed(const StdVideoH265SequenceParameterSet *sps)
1365 {
1366 uint16_t log2_max_coding_block_size =
1367 sps->log2_diff_max_min_luma_coding_block_size +
1368 sps->log2_min_luma_coding_block_size_minus3 + 3;
1369 uint32_t raw_ctu_bits = (1 << (2 * log2_max_coding_block_size));
1370
1371 switch (sps->chroma_format_idc)
1372 {
1373 case 1:
1374 raw_ctu_bits = raw_ctu_bits * 3 / 2;
1375 break;
1376 case 2:
1377 raw_ctu_bits = raw_ctu_bits * 2;
1378 break;
1379 case 3:
1380 raw_ctu_bits = raw_ctu_bits * 3;
1381 break;
1382 default:
1383 break;
1384 };
1385
1386 raw_ctu_bits = raw_ctu_bits * (sps->bit_depth_luma_minus8 + 8);
1387 raw_ctu_bits = (5 * raw_ctu_bits / 3);
1388
1389 return raw_ctu_bits & 0xffff;
1390 }
1391
1392 static void
anv_h265_encode_video(struct anv_cmd_buffer * cmd,const VkVideoEncodeInfoKHR * enc_info)1393 anv_h265_encode_video(struct anv_cmd_buffer *cmd, const VkVideoEncodeInfoKHR *enc_info)
1394 {
1395 /* Supported on Gen12(+) for using VDEnc Mode */
1396 #if GFX_VER >= 12
1397 ANV_FROM_HANDLE(anv_buffer, dst_buffer, enc_info->dstBuffer);
1398 struct anv_video_session *vid = cmd->video.vid;
1399 struct anv_video_session_params *params = cmd->video.params;
1400
1401 const struct VkVideoEncodeH265PictureInfoKHR *frame_info =
1402 vk_find_struct_const(enc_info->pNext, VIDEO_ENCODE_H265_PICTURE_INFO_KHR);
1403
1404 const StdVideoH265VideoParameterSet *vps = vk_video_find_h265_enc_std_vps(¶ms->vk, frame_info->pStdPictureInfo->sps_video_parameter_set_id);
1405 const StdVideoH265SequenceParameterSet *sps = vk_video_find_h265_enc_std_sps(¶ms->vk, frame_info->pStdPictureInfo->pps_seq_parameter_set_id);
1406 const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_enc_std_pps(¶ms->vk, frame_info->pStdPictureInfo->pps_pic_parameter_set_id);
1407 const StdVideoEncodeH265ReferenceListsInfo *ref_list_info = frame_info->pStdPictureInfo->pRefLists;
1408
1409 const struct anv_image_view *iv = anv_image_view_from_handle(enc_info->srcPictureResource.imageViewBinding);
1410 const struct anv_image *src_img = iv->image;
1411
1412 const struct anv_image_view *base_ref_iv;
1413
1414 bool rc_disable = cmd->video.params->rc_mode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
1415
1416 if (enc_info->pSetupReferenceSlot) {
1417 base_ref_iv = anv_image_view_from_handle(enc_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
1418 } else {
1419 base_ref_iv = iv;
1420 }
1421
1422 const struct anv_image *base_ref_img = base_ref_iv->image;
1423 uint8_t dpb_idx[ANV_VIDEO_H265_MAX_NUM_REF_FRAME] = { 0,};
1424
1425 anv_batch_emit(&cmd->batch, GENX(MI_FLUSH_DW), flush) {
1426 flush.VideoPipelineCacheInvalidate = 1;
1427 };
1428
1429
1430 anv_batch_emit(&cmd->batch, GENX(MI_FORCE_WAKEUP), wake) {
1431 wake.MFXPowerWellControl = 1;
1432 wake.HEVCPowerWellControl = 1;
1433 wake.MaskBits = 768;
1434 }
1435
1436 anv_batch_emit(&cmd->batch, GENX(VDENC_CONTROL_STATE), v) {
1437 v.VdencInitialization = true;
1438 }
1439
1440 anv_batch_emit(&cmd->batch, GENX(VD_CONTROL_STATE), v) {
1441 v.PipelineInitialization = true;
1442 }
1443
1444 anv_batch_emit(&cmd->batch, GENX(MFX_WAIT), mfx) {
1445 mfx.MFXSyncControlFlag = 1;
1446 }
1447
1448 anv_batch_emit(&cmd->batch, GENX(HCP_PIPE_MODE_SELECT), sel) {
1449 sel.CodecSelect = Encode;
1450 sel.CodecStandardSelect = HEVC;
1451 sel.VDEncMode = VM_VDEncMode;
1452 }
1453
1454 anv_batch_emit(&cmd->batch, GENX(MFX_WAIT), mfx) {
1455 mfx.MFXSyncControlFlag = 1;
1456 }
1457
1458
1459 for (uint32_t i = 0; i < 3; i++) {
1460 anv_batch_emit(&cmd->batch, GENX(HCP_SURFACE_STATE), ss) {
1461 struct anv_image *img_ = NULL;
1462
1463 switch(i) {
1464 case 0:
1465 img_ = (struct anv_image *) src_img;
1466 ss.SurfaceID = HCP_SourceInputPicture;
1467 break;
1468 case 1:
1469 //img_ = (struct anv_image *) src_img;
1470 img_ = (struct anv_image *) base_ref_img;
1471 ss.SurfaceID = HCP_CurrentDecodedPicture;
1472 break;
1473 case 2:
1474 img_ = (struct anv_image *) base_ref_img;
1475 ss.SurfaceID = HCP_ReferencePicture;
1476 break;
1477 default:
1478 assert(0);
1479 }
1480
1481 ss.SurfacePitch = img_->planes[0].primary_surface.isl.row_pitch_B - 1;
1482 ss.SurfaceFormat = PLANAR_420_8;
1483
1484 ss.YOffsetforUCb = img_->planes[1].primary_surface.memory_range.offset /
1485 img_->planes[0].primary_surface.isl.row_pitch_B;
1486 ss.YOffsetforVCr = ss.YOffsetforUCb;
1487 }
1488 }
1489
1490 anv_batch_emit(&cmd->batch, GENX(HCP_PIPE_BUF_ADDR_STATE), buf) {
1491 buf.DecodedPictureAddress =
1492 anv_image_address(base_ref_img, &base_ref_img->planes[0].primary_surface.memory_range);
1493
1494 buf.DecodedPictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1495 .MOCS = anv_mocs(cmd->device, buf.DecodedPictureAddress.bo, 0),
1496 };
1497
1498 buf.DeblockingFilterLineBufferAddress = (struct anv_address) {
1499 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].mem->bo,
1500 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].offset
1501 };
1502
1503 buf.DeblockingFilterLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1504 .MOCS = anv_mocs(cmd->device, buf.DeblockingFilterLineBufferAddress.bo, 0),
1505 };
1506
1507 buf.DeblockingFilterTileLineBufferAddress = (struct anv_address) {
1508 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].mem->bo,
1509 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].offset
1510 };
1511
1512 buf.DeblockingFilterTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1513 .MOCS = anv_mocs(cmd->device, buf.DeblockingFilterTileLineBufferAddress.bo, 0),
1514 };
1515
1516 buf.DeblockingFilterTileColumnBufferAddress = (struct anv_address) {
1517 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].mem->bo,
1518 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].offset
1519 };
1520
1521 buf.DeblockingFilterTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1522 .MOCS = anv_mocs(cmd->device, buf.DeblockingFilterTileColumnBufferAddress.bo, 0),
1523 };
1524
1525 buf.MetadataLineBufferAddress = (struct anv_address) {
1526 vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].mem->bo,
1527 vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].offset
1528 };
1529
1530 buf.MetadataLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1531 .MOCS = anv_mocs(cmd->device, buf.MetadataLineBufferAddress.bo, 0),
1532 };
1533
1534 buf.MetadataTileLineBufferAddress = (struct anv_address) {
1535 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].mem->bo,
1536 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].offset
1537 };
1538
1539 buf.MetadataTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1540 .MOCS = anv_mocs(cmd->device, buf.MetadataTileLineBufferAddress.bo, 0),
1541 };
1542
1543 buf.MetadataTileColumnBufferAddress = (struct anv_address) {
1544 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].mem->bo,
1545 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].offset
1546 };
1547
1548 buf.MetadataTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1549 .MOCS = anv_mocs(cmd->device, buf.MetadataTileColumnBufferAddress.bo, 0),
1550 };
1551
1552 buf.SAOLineBufferAddress = (struct anv_address) {
1553 vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].mem->bo,
1554 vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].offset
1555 };
1556
1557 buf.SAOLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1558 .MOCS = anv_mocs(cmd->device, buf.SAOLineBufferAddress.bo, 0),
1559 };
1560
1561 buf.SAOTileLineBufferAddress = (struct anv_address) {
1562 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].mem->bo,
1563 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].offset
1564 };
1565
1566 buf.SAOTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1567 .MOCS = anv_mocs(cmd->device, buf.SAOTileLineBufferAddress.bo, 0),
1568 };
1569
1570 buf.SAOTileColumnBufferAddress = (struct anv_address) {
1571 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].mem->bo,
1572 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].offset
1573 };
1574
1575 buf.SAOTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1576 .MOCS = anv_mocs(cmd->device, buf.SAOTileColumnBufferAddress.bo, 0),
1577 };
1578
1579 buf.CurrentMVTemporalBufferAddress = anv_image_address(src_img, &src_img->vid_dmv_top_surface);
1580
1581 buf.CurrentMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1582 .MOCS = anv_mocs(cmd->device, buf.CurrentMVTemporalBufferAddress.bo, 0),
1583 };
1584
1585 for (unsigned i = 0; i < enc_info->referenceSlotCount; i++) {
1586 const struct anv_image_view *ref_iv =
1587 anv_image_view_from_handle(enc_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
1588 int slot_idx = enc_info->pReferenceSlots[i].slotIndex;
1589
1590 assert(slot_idx < ANV_VIDEO_H265_MAX_NUM_REF_FRAME);
1591 dpb_idx[slot_idx] = i;
1592
1593 buf.ReferencePictureAddress[i] =
1594 anv_image_address(ref_iv->image, &ref_iv->image->planes[0].primary_surface.memory_range);
1595 }
1596
1597 buf.ReferencePictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1598 .MOCS = anv_mocs(cmd->device, NULL, 0),
1599 };
1600
1601 buf.OriginalUncompressedPictureSourceAddress =
1602 anv_image_address(src_img, &src_img->planes[0].primary_surface.memory_range);
1603 buf.OriginalUncompressedPictureSourceMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1604 .MOCS = anv_mocs(cmd->device, buf.OriginalUncompressedPictureSourceAddress.bo, 0),
1605 };
1606
1607 buf.StreamOutDataDestinationMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1608 .MOCS = anv_mocs(cmd->device, NULL, 0),
1609 };
1610
1611 buf.DecodedPictureStatusBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1612 .MOCS = anv_mocs(cmd->device, NULL, 0),
1613 };
1614
1615 buf.LCUILDBStreamOutBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1616 .MOCS = anv_mocs(cmd->device, NULL, 0),
1617 };
1618
1619 for (unsigned i = 0; i < enc_info->referenceSlotCount; i++) {
1620 const struct anv_image_view *ref_iv =
1621 anv_image_view_from_handle(enc_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
1622
1623 buf.CollocatedMVTemporalBufferAddress[i] =
1624 anv_image_address(ref_iv->image, &ref_iv->image->vid_dmv_top_surface);
1625 }
1626
1627 buf.CollocatedMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1628 .MOCS = anv_mocs(cmd->device, buf.CollocatedMVTemporalBufferAddress[0].bo, 0),
1629 };
1630
1631 buf.VP9ProbabilityBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1632 .MOCS = anv_mocs(cmd->device, NULL, 0),
1633 };
1634
1635 buf.VP9SegmentIDBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1636 .MOCS = anv_mocs(cmd->device, NULL, 0),
1637 };
1638
1639 buf.VP9HVDLineRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1640 .MOCS = anv_mocs(cmd->device, NULL, 0),
1641 };
1642
1643 buf.VP9HVDTileRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1644 .MOCS = anv_mocs(cmd->device, NULL, 0),
1645 };
1646
1647 buf.SAOStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1648 .MOCS = anv_mocs(cmd->device, NULL, 0),
1649 };
1650 buf.FrameStatisticsStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1651 .MOCS = anv_mocs(cmd->device, NULL, 0),
1652 };
1653
1654 buf.SSESourcePixelRowStoreBufferBaseAddress = (struct anv_address) {
1655 vid->vid_mem[ANV_VID_MEM_H265_SSE_SRC_PIX_ROW_STORE].mem->bo,
1656 vid->vid_mem[ANV_VID_MEM_H265_SSE_SRC_PIX_ROW_STORE].offset
1657 };
1658
1659 buf.SSESourcePixelRowStoreBufferMemoryAddressAttributesReadWrite = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1660 .MOCS = anv_mocs(cmd->device, buf.SSESourcePixelRowStoreBufferBaseAddress.bo, 0),
1661 };
1662
1663 buf.HCPScalabilitySliceStateBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1664 .MOCS = anv_mocs(cmd->device, NULL, 0),
1665 };
1666 buf.HCPScalabilityCABACDecodedSyntaxElementsBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1667 .MOCS = anv_mocs(cmd->device, NULL, 0),
1668 };
1669 buf.MVUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1670 .MOCS = anv_mocs(cmd->device, NULL, 0),
1671 };
1672 buf.IntraPredictionUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1673 .MOCS = anv_mocs(cmd->device, NULL, 0),
1674 };
1675 buf.IntraPredictionLeftReconColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1676 .MOCS = anv_mocs(cmd->device, NULL, 0),
1677 };
1678 }
1679
1680 anv_batch_emit(&cmd->batch, GENX(HCP_IND_OBJ_BASE_ADDR_STATE), indirect) {
1681 indirect.HCPIndirectBitstreamObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1682 .MOCS = anv_mocs(cmd->device, NULL, 0),
1683 };
1684
1685 indirect.HCPIndirectCUObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1686 .MOCS = anv_mocs(cmd->device, NULL, 0),
1687 };
1688
1689 indirect.HCPPAKBSEObjectBaseAddress =
1690 anv_address_add(dst_buffer->address, align(enc_info->dstBufferOffset, 4096));
1691 indirect.HCPPAKBSEObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1692 .MOCS = anv_mocs(cmd->device, indirect.HCPPAKBSEObjectBaseAddress.bo, 0),
1693 };
1694
1695 indirect.HCPVP9PAKCompressedHeaderSyntaxStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1696 .MOCS = anv_mocs(cmd->device, NULL, 0),
1697 };
1698 indirect.HCPVP9PAKProbabilityCounterStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1699 .MOCS = anv_mocs(cmd->device, NULL, 0),
1700 };
1701 indirect.HCPVP9PAKProbabilityDeltasStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1702 .MOCS = anv_mocs(cmd->device, NULL, 0),
1703 };
1704 indirect.HCPVP9PAKTileRecordStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1705 .MOCS = anv_mocs(cmd->device, NULL, 0),
1706 };
1707 indirect.HCPVP9PAKCULevelStatisticStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1708 .MOCS = anv_mocs(cmd->device, NULL, 0),
1709 };
1710 }
1711
1712 if (sps->flags.scaling_list_enabled_flag) {
1713 assert(0);
1714 /* FIXME */
1715 if (pps->flags.pps_scaling_list_data_present_flag) {
1716 scaling_list(cmd, pps->pScalingLists);
1717 } else if (sps->flags.sps_scaling_list_data_present_flag) {
1718 scaling_list(cmd, sps->pScalingLists);
1719 }
1720 } else {
1721 for (uint8_t size = 0; size < 4; size++) {
1722 for (uint8_t pred = 0; pred < 2; pred++) {
1723 anv_batch_emit(&cmd->batch, GENX(HCP_FQM_STATE), fqm) {
1724 fqm.SizeID = size;
1725 fqm.IntraInter = pred;
1726 fqm.ColorComponent = 0;
1727 fqm.FQMDCValue = size < 2 ? 0 : 0x1000;
1728
1729 unsigned len = (size == 0) ? 32 : 128;
1730
1731 for (uint8_t q = 0; q < len; q++) {
1732 fqm.QuantizerMatrix8x8[q] = q % 2 == 0 ? 0 : 0x10;
1733 }
1734 }
1735 }
1736 }
1737 }
1738
1739 if (sps->flags.scaling_list_enabled_flag) {
1740 assert(0);
1741 /* FIXME */
1742 if (pps->flags.pps_scaling_list_data_present_flag) {
1743 scaling_list(cmd, pps->pScalingLists);
1744 } else if (sps->flags.sps_scaling_list_data_present_flag) {
1745 scaling_list(cmd, sps->pScalingLists);
1746 }
1747 } else {
1748 for (uint8_t size = 0; size < 4; size++) {
1749 for (uint8_t pred = 0; pred < 2; pred++) {
1750 for (uint8_t color = 0; color < 3; color++) {
1751
1752 if (size == 3 && color > 0)
1753 continue;
1754
1755 anv_batch_emit(&cmd->batch, GENX(HCP_QM_STATE), qm) {
1756 qm.SizeID = size;
1757 qm.PredictionType = pred;
1758 qm.ColorComponent = color;
1759 qm.DCCoefficient = (size > 1) ? 16 : 0;
1760 unsigned len = (size == 0) ? 16 : 64;
1761
1762 for (uint8_t q = 0; q < len; q++)
1763 qm.QuantizerMatrix8x8[q] = 0x10;
1764 }
1765 }
1766 }
1767 }
1768 }
1769
1770
1771 anv_batch_emit(&cmd->batch, GENX(VDENC_PIPE_MODE_SELECT), vdenc_pipe_mode) {
1772 vdenc_pipe_mode.StandardSelect = SS_HEVC;
1773 vdenc_pipe_mode.PAKChromaSubSamplingType = _420;
1774 vdenc_pipe_mode.HMERegionPrefetchEnable = !vdenc_pipe_mode.TLBPrefetchEnable;
1775 vdenc_pipe_mode.TopPrefetchEnableMode = 1;
1776 vdenc_pipe_mode.LeftPrefetchAtWrapAround = true;
1777 vdenc_pipe_mode.HzShift32Minus1 = 3;
1778 vdenc_pipe_mode.NumberofVerticalRequests = 11;
1779 vdenc_pipe_mode.NumberofHorizontalRequests = 2;
1780
1781 vdenc_pipe_mode.SourceLumaPackedDataTLBPrefetchEnable = true;
1782 vdenc_pipe_mode.SourceChromaTLBPrefetchEnable = true;
1783 vdenc_pipe_mode.HzShift32Minus1Src = 3;
1784 vdenc_pipe_mode.PrefetchOffsetforSource = 4;
1785 }
1786
1787 anv_batch_emit(&cmd->batch, GENX(VDENC_SRC_SURFACE_STATE), vdenc_surface) {
1788 vdenc_surface.SurfaceState.Width = src_img->vk.extent.width - 1;
1789 vdenc_surface.SurfaceState.Height = src_img->vk.extent.height - 1;
1790 vdenc_surface.SurfaceState.SurfaceFormat = VDENC_PLANAR_420_8;
1791
1792 vdenc_surface.SurfaceState.TileWalk = TW_YMAJOR;
1793 vdenc_surface.SurfaceState.TiledSurface = src_img->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR;
1794 vdenc_surface.SurfaceState.SurfacePitch = src_img->planes[0].primary_surface.isl.row_pitch_B - 1;
1795 vdenc_surface.SurfaceState.YOffsetforUCb = src_img->planes[1].primary_surface.memory_range.offset /
1796 src_img->planes[0].primary_surface.isl.row_pitch_B;
1797 vdenc_surface.SurfaceState.YOffsetforVCr = src_img->planes[1].primary_surface.memory_range.offset /
1798 src_img->planes[0].primary_surface.isl.row_pitch_B;
1799 }
1800
1801 anv_batch_emit(&cmd->batch, GENX(VDENC_REF_SURFACE_STATE), vdenc_surface) {
1802 vdenc_surface.SurfaceState.Width = base_ref_img->vk.extent.width - 1;
1803 vdenc_surface.SurfaceState.Height = base_ref_img->vk.extent.height - 1;
1804 vdenc_surface.SurfaceState.SurfaceFormat = VDENC_PLANAR_420_8;
1805 vdenc_surface.SurfaceState.SurfacePitch = base_ref_img->planes[0].primary_surface.isl.row_pitch_B - 1;
1806
1807 vdenc_surface.SurfaceState.TileWalk = TW_YMAJOR;
1808 vdenc_surface.SurfaceState.TiledSurface = base_ref_img->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR;
1809 vdenc_surface.SurfaceState.YOffsetforUCb = base_ref_img->planes[1].primary_surface.memory_range.offset /
1810 base_ref_img->planes[0].primary_surface.isl.row_pitch_B;
1811 vdenc_surface.SurfaceState.YOffsetforVCr = base_ref_img->planes[1].primary_surface.memory_range.offset /
1812 base_ref_img->planes[0].primary_surface.isl.row_pitch_B;
1813 }
1814
1815 /* TODO. add a cmd for VDENC_DS_REF_SURFACE_STATE */
1816
1817 anv_batch_emit(&cmd->batch, GENX(VDENC_PIPE_BUF_ADDR_STATE), vdenc_buf) {
1818 /* TODO. add DSFWDREF and FWDREF */
1819 vdenc_buf.DSFWDREF0.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1820 .MOCS = anv_mocs(cmd->device, NULL, 0),
1821 };
1822
1823 vdenc_buf.DSFWDREF1.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1824 .MOCS = anv_mocs(cmd->device, NULL, 0),
1825 };
1826
1827 vdenc_buf.OriginalUncompressedPicture.Address =
1828 anv_image_address(src_img, &src_img->planes[0].primary_surface.memory_range);
1829 vdenc_buf.OriginalUncompressedPicture.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1830 .MOCS = anv_mocs(cmd->device, vdenc_buf.OriginalUncompressedPicture.Address.bo, 0),
1831 };
1832
1833 vdenc_buf.StreamInDataPicture.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1834 .MOCS = anv_mocs(cmd->device, NULL, 0),
1835 };
1836
1837 vdenc_buf.RowStoreScratchBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1838 .MOCS = anv_mocs(cmd->device, NULL, 0),
1839 };
1840
1841 const struct anv_image_view *ref_iv[3] = { 0, };
1842
1843 for (unsigned i = 0; i < enc_info->referenceSlotCount && i < 3; i++)
1844 ref_iv[i] = anv_image_view_from_handle(enc_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
1845
1846 if (ref_iv[0]) {
1847 vdenc_buf.ColocatedMVReadBuffer.Address =
1848 anv_image_address(ref_iv[0]->image, &ref_iv[0]->image->vid_dmv_top_surface);
1849 vdenc_buf.FWDREF0.Address =
1850 anv_image_address(ref_iv[0]->image, &ref_iv[0]->image->planes[0].primary_surface.memory_range);
1851 }
1852
1853 vdenc_buf.ColocatedMVReadBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1854 .MOCS = anv_mocs(cmd->device, vdenc_buf.ColocatedMVReadBuffer.Address.bo, 0),
1855 };
1856
1857 vdenc_buf.FWDREF0.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1858 .MOCS = anv_mocs(cmd->device, vdenc_buf.FWDREF0.Address.bo, 0),
1859 };
1860
1861 if (ref_iv[1])
1862 vdenc_buf.FWDREF1.Address =
1863 anv_image_address(ref_iv[1]->image, &ref_iv[1]->image->planes[0].primary_surface.memory_range);
1864
1865 vdenc_buf.FWDREF1.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1866 .MOCS = anv_mocs(cmd->device, vdenc_buf.FWDREF1.Address.bo, 0),
1867 };
1868
1869 if (ref_iv[2])
1870 vdenc_buf.FWDREF2.Address =
1871 anv_image_address(ref_iv[2]->image, &ref_iv[2]->image->planes[0].primary_surface.memory_range);
1872
1873 vdenc_buf.FWDREF2.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1874 .MOCS = anv_mocs(cmd->device, vdenc_buf.FWDREF2.Address.bo, 0),
1875 };
1876
1877 vdenc_buf.BWDREF0.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1878 .MOCS = anv_mocs(cmd->device, NULL, 0),
1879 };
1880
1881 vdenc_buf.VDEncStatisticsStreamOut.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1882 .MOCS = anv_mocs(cmd->device, NULL, 0),
1883 };
1884
1885 vdenc_buf.DSFWDREF04X.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1886 .MOCS = anv_mocs(cmd->device, NULL, 0),
1887 };
1888 vdenc_buf.DSFWDREF14X.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1889 .MOCS = anv_mocs(cmd->device, NULL, 0),
1890 };
1891 vdenc_buf.VDEncCURecordStreamOutBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1892 .MOCS = anv_mocs(cmd->device, NULL, 0),
1893 };
1894 vdenc_buf.VDEncLCUPAK_OBJ_CMDBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1895 .MOCS = anv_mocs(cmd->device, NULL, 0),
1896 };
1897 vdenc_buf.ScaledReferenceSurface8X.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1898 .MOCS = anv_mocs(cmd->device, NULL, 0),
1899 };
1900 vdenc_buf.ScaledReferenceSurface4X.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1901 .MOCS = anv_mocs(cmd->device, NULL, 0),
1902 };
1903 vdenc_buf.VP9SegmentationMapStreamInBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1904 .MOCS = anv_mocs(cmd->device, NULL, 0),
1905 };
1906 vdenc_buf.VP9SegmentationMapStreamOutBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1907 .MOCS = anv_mocs(cmd->device, NULL, 0),
1908 };
1909 vdenc_buf.VDEncTileRowStoreBuffer.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1910 .MOCS = anv_mocs(cmd->device, NULL, 0),
1911 };
1912 vdenc_buf.VDEncCumulativeCUCountStreamOutSurface.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1913 .MOCS = anv_mocs(cmd->device, NULL, 0),
1914 };
1915 vdenc_buf.VDEncPaletteModeStreamOutSurface.PictureFields = (struct GENX(VDENC_SURFACE_CONTROL_BITS)) {
1916 .MOCS = anv_mocs(cmd->device, NULL, 0),
1917 };
1918 }
1919
1920 anv_batch_emit(&cmd->batch, GENX(VDENC_CMD1), cmd1) {
1921 /* Magic numbers taken from media-driver */
1922 cmd1.Values[0] = 0x5030200;
1923 cmd1.Values[1] = 0xb090806;
1924 cmd1.Values[2] = 0x1c140c04;
1925 cmd1.Values[3] = 0x3c342c24;
1926 cmd1.Values[4] = 0x5c544c44;
1927 cmd1.Values[5] = 0x1c140c04;
1928 cmd1.Values[6] = 0x3c342c24;
1929 cmd1.Values[7] = 0x5c544c44;
1930 cmd1.Values[13] = 0x0;
1931 cmd1.Values[14] = 0x0;
1932 cmd1.Values[15] &= 0xffff0000;
1933
1934 cmd1.Values[18] = (cmd1.Values[18] & 0xff0000ff) | 0x140400;
1935 cmd1.Values[19] = 0x14141414;
1936 cmd1.Values[20] = 0x14141414;
1937
1938 cmd1.Values[21] = 0x10101010;
1939 cmd1.Values[22] = 0x10101010;
1940 cmd1.Values[23] = 0x10101010;
1941 cmd1.Values[24] = 0x10101010;
1942 cmd1.Values[25] = 0x10101010;
1943 cmd1.Values[26] = 0x10101010;
1944 cmd1.Values[27] = 0x10101010;
1945 cmd1.Values[28] = 0x10101010;
1946
1947 if (anv_vdenc_h265_picture_type(frame_info->pStdPictureInfo->pic_type) == 0) {
1948 cmd1.Values[9] = 0x23131f0f;
1949 cmd1.Values[10] = (cmd1.Values[10] & 0xffff0000) | 0x2313;
1950 cmd1.Values[11] = 0x3e5c445c;
1951 cmd1.Values[12] = (cmd1.Values[12] & 0xff00) | 0x1e040044;
1952 cmd1.Values[15] = (cmd1.Values[15] & 0xffff) | 0x70000;
1953 cmd1.Values[16] = 0xd0e1007;
1954 cmd1.Values[17] = (cmd1.Values[17] & 0xffffff00) | 0x32;
1955 /* Handle Number of ROI */
1956 cmd1.Values[17] = (cmd1.Values[17] & 0xffff00ff) | 0x1e00;
1957 cmd1.Values[29] = (cmd1.Values[29] & 0xff000000) | 0x101010;
1958 } else {
1959 cmd1.Values[9] = 0x23131f0f;
1960 cmd1.Values[10] = 0x331b2313;
1961 cmd1.Values[11] = 0x476e4d6e;
1962 cmd1.Values[12] = 0x3604004d;
1963 cmd1.Values[15] = (cmd1.Values[15] & 0xffff) | 0x4150000;
1964 cmd1.Values[16] = 0x23231415;
1965 cmd1.Values[17] = (cmd1.Values[17] & 0xffffff00) | 0x3f;
1966 /* Handle Number of ROI */
1967 cmd1.Values[17] = (cmd1.Values[17] & 0xffff00ff) | 0x4400;
1968 cmd1.Values[29] = (cmd1.Values[29] & 0xff000000) | 0x232323;
1969 }
1970 }
1971
1972 uint32_t frame_width_in_min_cb = sps->pic_width_in_luma_samples >> (sps->log2_min_luma_coding_block_size_minus3 + 3);
1973 uint32_t frame_height_in_min_cb = sps->pic_height_in_luma_samples >> (sps->log2_min_luma_coding_block_size_minus3 + 3);
1974 uint32_t width_in_pix = frame_width_in_min_cb << (sps->log2_min_luma_coding_block_size_minus3 + 3);
1975 uint32_t height_in_pix = frame_height_in_min_cb << (sps->log2_min_luma_coding_block_size_minus3 + 3);
1976
1977 anv_batch_emit(&cmd->batch, GENX(HCP_PIC_STATE), pic) {
1978 pic.FrameWidthInMinimumCodingBlockSize = frame_width_in_min_cb - 1;
1979 pic.FrameHeightInMinimumCodingBlockSize = frame_height_in_min_cb - 1;
1980 pic.TransformSkipEnable = pps->flags.transform_skip_enabled_flag;
1981 pic.TransformSkipEnable = true;
1982
1983 pic.MinCUSize = sps->log2_min_luma_coding_block_size_minus3;
1984 pic.LCUSize = sps->log2_diff_max_min_luma_coding_block_size + sps->log2_min_luma_coding_block_size_minus3;
1985
1986 pic.MinTUSize = sps->log2_min_luma_transform_block_size_minus2;
1987 pic.MaxTUSize = sps->log2_diff_max_min_luma_transform_block_size + sps->log2_min_luma_transform_block_size_minus2;
1988
1989 pic.MinPCMSize = 0;
1990 pic.MaxPCMSize = 0;
1991
1992 pic.ChromaSubsampling = sps->chroma_format_idc;
1993
1994 const StdVideoEncodeH265SliceSegmentHeader *slice_header = NULL;
1995 for (uint32_t slice_id = 0; slice_id < frame_info->naluSliceSegmentEntryCount; slice_id++) {
1996 const VkVideoEncodeH265NaluSliceSegmentInfoKHR *nalu = &frame_info->pNaluSliceSegmentEntries[slice_id];
1997 if (nalu) {
1998 slice_header = nalu->pStdSliceSegmentHeader;
1999 break;
2000 }
2001 }
2002
2003 pic.CollocatedPictureIsISlice = false;
2004 pic.CurrentPictureIsISlice = false;
2005
2006 pic.SampleAdaptiveOffsetEnable = sps->flags.sample_adaptive_offset_enabled_flag ? slice_header->flags.slice_sao_chroma_flag ||
2007 slice_header->flags.slice_sao_luma_flag : 0;
2008 pic.PCMEnable = sps->flags.pcm_enabled_flag;
2009 pic.CUQPDeltaEnable = pps->flags.cu_qp_delta_enabled_flag;
2010 pic.MaxDQPDepth = pps->diff_cu_qp_delta_depth;
2011 pic.PCMLoopFilterDisable = sps->flags.pcm_loop_filter_disabled_flag;
2012 pic.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
2013 pic.TilingEnable = pps->flags.tiles_enabled_flag;
2014 pic.WeightedBiPredicationEnable = pps->flags.weighted_bipred_flag;
2015 pic.WeightedPredicationEnable = pps->flags.weighted_pred_flag;
2016 pic.FieldPic = 0;
2017 pic.TopField = false;
2018 pic.TransformSkipEnable = pps->flags.transform_skip_enabled_flag;
2019 pic.AMPEnable = sps->flags.amp_enabled_flag;
2020 pic.TransquantBypassEnable = pps->flags.transquant_bypass_enabled_flag;
2021 pic.StrongIntraSmoothingEnable = sps->flags.strong_intra_smoothing_enabled_flag;
2022 pic.CUPacketStructure = 0;
2023
2024 pic.PictureCbQPOffset = pps->pps_cb_qp_offset & 0x1f;
2025 pic.PictureCrQPOffset = pps->pps_cr_qp_offset & 0x1f;
2026 pic.IntraMaxTransformHierarchyDepth = 2;
2027 pic.InterMaxTransformHierarchyDepth = 2;
2028 pic.ChromaPCMSampleBitDepth = sps->pcm_sample_bit_depth_chroma_minus1 & 0xf;
2029 pic.LumaPCMSampleBitDepth = sps->pcm_sample_bit_depth_luma_minus1 & 0xf;
2030
2031 pic.ChromaBitDepth = sps->bit_depth_chroma_minus8;
2032 pic.LumaBitDepth = sps->bit_depth_luma_minus8;
2033
2034 pic.LCUMaxBitSizeAllowed = lcu_max_bits_size_allowed(sps);
2035 pic.CbQPOffsetList0 = pps->cb_qp_offset_list[0];
2036 pic.CbQPOffsetList1 = pps->cb_qp_offset_list[1];
2037 pic.CbQPOffsetList2 = pps->cb_qp_offset_list[2];
2038 pic.CbQPOffsetList3 = pps->cb_qp_offset_list[3];
2039 pic.CbQPOffsetList4 = pps->cb_qp_offset_list[4];
2040 pic.CbQPOffsetList5 = pps->cb_qp_offset_list[5];
2041
2042 pic.CrQPOffsetList0 = pps->cr_qp_offset_list[0];
2043 pic.CrQPOffsetList1 = pps->cr_qp_offset_list[1];
2044 pic.CrQPOffsetList2 = pps->cr_qp_offset_list[2];
2045 pic.CrQPOffsetList3 = pps->cr_qp_offset_list[3];
2046 pic.CrQPOffsetList4 = pps->cr_qp_offset_list[4];
2047 pic.CrQPOffsetList5 = pps->cr_qp_offset_list[5];
2048 pic.FirstSliceSegmentInPic = true;
2049 pic.SSEEnable = true;
2050 }
2051
2052 anv_batch_emit(&cmd->batch, GENX(VDENC_CMD2), cmd2) {
2053 /* Magic numbers taken from media-driver */
2054 cmd2.Values9 = (cmd2.Values9 & 0xffff) | 0x43840000;
2055 cmd2.Values12 = 0xffffffff;
2056 cmd2.Values15 = 0x4e201f40;
2057 cmd2.Values16 = (cmd2.Values16 & 0xf0ff0000) | 0xf003300;
2058 cmd2.Values17 = (cmd2.Values17 & 0xfff00000) | 0x2710;
2059 cmd2.Values19 = (cmd2.Values19 & 0x80ffffff) | 0x18000000;
2060 cmd2.Values19 = (cmd2.Values19 & 0x80ffffff) | 0x18000000;
2061 cmd2.Values21 &= 0xfffffff;
2062 cmd2.Values22 = 0x1f001102;
2063 cmd2.Values23 = 0xaaaa1f00;
2064 cmd2.Values27 = (cmd2.Values27 & 0xffff0000) | 0x1a1a;
2065
2066 cmd2.FrameWidthInPixelsMinusOne = width_in_pix - 1;
2067 cmd2.FrameHeightInPixelsMinusOne = height_in_pix - 1;
2068 cmd2.PictureType = anv_vdenc_h265_picture_type(frame_info->pStdPictureInfo->pic_type);
2069 cmd2.TemporalMVPEnableFlag =
2070 anv_vdenc_h265_picture_type(frame_info->pStdPictureInfo->pic_type) == 0 ?
2071 0 : sps->flags.sps_temporal_mvp_enabled_flag;
2072 cmd2.TransformSkip = pps->flags.transform_skip_enabled_flag;
2073
2074 if (anv_vdenc_h265_picture_type(frame_info->pStdPictureInfo->pic_type) != 0) {
2075 cmd2.NumRefIdxL0MinusOne = ref_list_info->num_ref_idx_l0_active_minus1;
2076 cmd2.NumRefIdxL1MinusOne = ref_list_info->num_ref_idx_l1_active_minus1;
2077 }
2078
2079 cmd2.Values5 = (cmd2.Values5 & 0xff83ffff) | 0x400000;
2080 cmd2.Values14 = (cmd2.Values14 & 0xffff) | 0x7d00000;
2081 cmd2.Values15 = 0x4e201f40;
2082 cmd2.Values17 = (cmd2.Values17 & 0xfff00000) | 0x2710;
2083 cmd2.Values18 = (cmd2.Values18 & 0xffff) | 0x600000;
2084 cmd2.Values19 = (cmd2.Values19 & 0xffff0000) | 0xc0;
2085 cmd2.Values20 &= 0xfffeffff;
2086 cmd2.TilingEnable = pps->flags.tiles_enabled_flag;
2087
2088 if (anv_vdenc_h265_picture_type(frame_info->pStdPictureInfo->pic_type) != 0) {
2089 const StdVideoEncodeH265ReferenceListsInfo* ref_lists = frame_info->pStdPictureInfo->pRefLists;
2090
2091 bool long_term = false;
2092 uint8_t ref_slot = ref_lists->RefPicList0[0];
2093 uint8_t cur_poc = frame_info->pStdPictureInfo->PicOrderCntVal;
2094 uint8_t ref_poc = anv_h265_get_ref_poc(enc_info, ref_lists, true, ref_slot, &long_term);
2095 int8_t diff_poc = cur_poc - ref_poc;
2096
2097 cmd2.POCNumberForRefid0InL0 = CLAMP(diff_poc, -16, 16);
2098 cmd2.LongTermReferenceFlagsL0 |= long_term;
2099
2100 ref_slot = ref_lists->RefPicList0[1];
2101 ref_poc = anv_h265_get_ref_poc(enc_info, ref_lists, true, ref_slot, &long_term);
2102 diff_poc = ref_poc == 0xff ? 0 : cur_poc - ref_poc;
2103
2104 cmd2.POCNumberForRefid1InL0 = CLAMP(diff_poc, -16, 16);
2105 cmd2.LongTermReferenceFlagsL0 |= long_term;
2106
2107 ref_slot = ref_lists->RefPicList0[2];
2108 ref_poc = anv_h265_get_ref_poc(enc_info, ref_lists, true, ref_slot, &long_term);
2109 diff_poc = ref_poc == 0xff ? 0 : cur_poc - ref_poc;
2110
2111 cmd2.POCNumberForRefid2InL0 = CLAMP(diff_poc, -16, 16);
2112 cmd2.LongTermReferenceFlagsL0 |= long_term;
2113
2114
2115 ref_slot = ref_lists->RefPicList1[0];
2116 ref_poc = anv_h265_get_ref_poc(enc_info, ref_lists, false, ref_slot, &long_term);
2117 diff_poc = ref_poc == 0xff ? 0 : cur_poc - ref_poc;
2118
2119 cmd2.POCNumberForRefid0InL1 = CLAMP(diff_poc, -16, 16);
2120 cmd2.LongTermReferenceFlagsL1 |= long_term;
2121
2122 cmd2.POCNumberForRefid1InL1 = cmd2.POCNumberForRefid2InL1 = cmd2.POCNumberForRefid0InL1;
2123 cmd2.SubPelMode = 3;
2124 }
2125 }
2126
2127 for (uint32_t slice_id = 0; slice_id < frame_info->naluSliceSegmentEntryCount; slice_id++) {
2128 const VkVideoEncodeH265NaluSliceSegmentInfoKHR *nalu = &frame_info->pNaluSliceSegmentEntries[slice_id];
2129 const StdVideoEncodeH265SliceSegmentHeader *next_slice_header = NULL;
2130 StdVideoEncodeH265SliceSegmentHeader *slice_header =
2131 (StdVideoEncodeH265SliceSegmentHeader *)nalu->pStdSliceSegmentHeader;
2132
2133 bool is_last = (slice_id == frame_info->naluSliceSegmentEntryCount - 1);
2134 uint32_t slice_type = slice_header->slice_type % 5;
2135 uint32_t slice_qp = rc_disable ? nalu->constantQp : pps->init_qp_minus26 + 26;
2136 uint32_t slice_qp_delta = slice_qp - (pps->init_qp_minus26 + 26);
2137
2138 if (slice_type == STD_VIDEO_H265_SLICE_TYPE_P)
2139 slice_header->slice_type = slice_type = STD_VIDEO_H265_SLICE_TYPE_B;
2140
2141 assert(slice_qp >= 10 && slice_qp <= 51);
2142
2143 uint32_t ctb_size = 1 << (sps->log2_diff_max_min_luma_coding_block_size +
2144 sps->log2_min_luma_coding_block_size_minus3 + 3);
2145 uint32_t ctb_w = DIV_ROUND_UP(width_in_pix, ctb_size);
2146 uint32_t ctb_h = DIV_ROUND_UP(height_in_pix, ctb_size);
2147
2148 if (!is_last)
2149 next_slice_header = slice_header + 1;
2150
2151 if (slice_type != STD_VIDEO_H265_SLICE_TYPE_I) {
2152 anv_batch_emit(&cmd->batch, GENX(HCP_REF_IDX_STATE), ref) {
2153 ref.ReferencePictureListSelect = 0;
2154 ref.NumberofReferenceIndexesActive = ref_list_info->num_ref_idx_l0_active_minus1;
2155
2156 for (uint32_t i = 0; i < ref_list_info->num_ref_idx_l0_active_minus1 + 1; i++) {
2157 const VkVideoReferenceSlotInfoKHR ref_slot = enc_info->pReferenceSlots[i];
2158 const VkVideoEncodeH265DpbSlotInfoKHR *dpb =
2159 vk_find_struct_const(ref_slot.pNext, VIDEO_ENCODE_H265_DPB_SLOT_INFO_KHR);
2160
2161 ref.ReferenceListEntry[i].ListEntry = dpb_idx[ref_slot.slotIndex];
2162
2163 unsigned ref_poc = dpb->pStdReferenceInfo->PicOrderCntVal;
2164 int32_t diff_poc = frame_info->pStdPictureInfo->PicOrderCntVal - ref_poc;
2165
2166
2167 ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff;
2168 ref.ReferenceListEntry[i].TopField = true;
2169 }
2170 }
2171 }
2172
2173 if (slice_type == STD_VIDEO_H265_SLICE_TYPE_B) {
2174 anv_batch_emit(&cmd->batch, GENX(HCP_REF_IDX_STATE), ref) {
2175 ref.ReferencePictureListSelect = 1;
2176 ref.NumberofReferenceIndexesActive = ref_list_info->num_ref_idx_l1_active_minus1;
2177
2178 for (uint32_t i = 0; i < ref_list_info->num_ref_idx_l1_active_minus1 + 1; i++) {
2179 const VkVideoReferenceSlotInfoKHR ref_slot = enc_info->pReferenceSlots[i];
2180
2181 const VkVideoEncodeH265DpbSlotInfoKHR *dpb =
2182 vk_find_struct_const(ref_slot.pNext, VIDEO_ENCODE_H265_DPB_SLOT_INFO_KHR);
2183
2184 ref.ReferenceListEntry[i].ListEntry = dpb_idx[ref_slot.slotIndex];
2185
2186 unsigned ref_poc = dpb->pStdReferenceInfo->PicOrderCntVal;
2187 int32_t diff_poc = frame_info->pStdPictureInfo->PicOrderCntVal - ref_poc;
2188
2189 ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff;
2190 ref.ReferenceListEntry[i].TopField = true;
2191 }
2192 }
2193 }
2194
2195 uint8_t chroma_log2_weight_denom = 0;
2196
2197 if ((pps->flags.weighted_pred_flag && (slice_type == STD_VIDEO_H265_SLICE_TYPE_P)) ||
2198 (pps->flags.weighted_bipred_flag && (slice_type == STD_VIDEO_H265_SLICE_TYPE_B))) {
2199 assert (slice_header->pWeightTable);
2200
2201 uint16_t chroma_weight, chroma_offset;
2202 const StdVideoEncodeH265WeightTable *w_tbl = slice_header->pWeightTable;
2203 chroma_log2_weight_denom = w_tbl->luma_log2_weight_denom + w_tbl->delta_chroma_log2_weight_denom;
2204
2205 anv_batch_emit(&cmd->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) {
2206 w.ReferencePictureListSelect = 0;
2207
2208 for (unsigned i = 0; i < STD_VIDEO_H265_MAX_NUM_LIST_REF; i++) {
2209
2210 w.LumaOffsets[i].DeltaLumaWeightLX = w_tbl->delta_luma_weight_l0[i] & 0xff;
2211 w.LumaOffsets[i].LumaOffsetLX = w_tbl->luma_offset_l0[i] & 0xff;
2212 w.ChromaOffsets[i].DeltaChromaWeightLX0 = w_tbl->delta_chroma_weight_l0[i][0] & 0xff;
2213 w.ChromaOffsets[i].DeltaChromaWeightLX1 = w_tbl->delta_chroma_weight_l0[i][1] & 0xff;
2214
2215
2216 chroma_weight = (1 << chroma_log2_weight_denom) + w_tbl->delta_chroma_weight_l0[i][0];
2217 chroma_offset = CLAMP(w_tbl->delta_chroma_offset_l0[i][0] -
2218 ((128 * chroma_weight) >> chroma_log2_weight_denom) + 128, -128, 127);
2219 w.ChromaOffsets[i].ChromaOffsetLX0 = chroma_offset & 0xff;
2220
2221 chroma_weight = (1 << chroma_log2_weight_denom) + w_tbl->delta_chroma_weight_l0[i][1];
2222 chroma_offset = CLAMP(w_tbl->delta_chroma_offset_l0[i][1] -
2223 ((128 * chroma_weight) >> chroma_log2_weight_denom) + 128, -128, 127);
2224 w.ChromaOffsets[i].ChromaOffsetLX1 = chroma_offset & 0xff;
2225 }
2226 }
2227
2228 if (slice_type == STD_VIDEO_H265_SLICE_TYPE_B) {
2229 anv_batch_emit(&cmd->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) {
2230 w.ReferencePictureListSelect = 1;
2231
2232 for (unsigned i = 0; i < STD_VIDEO_H265_MAX_NUM_LIST_REF; i++) {
2233 w.LumaOffsets[i].DeltaLumaWeightLX = w_tbl->delta_luma_weight_l1[i] & 0xff;
2234 w.LumaOffsets[i].LumaOffsetLX = w_tbl->luma_offset_l1[i] & 0xff;
2235 w.ChromaOffsets[i].DeltaChromaWeightLX0 = w_tbl->delta_chroma_weight_l1[i][0] & 0xff;
2236 w.ChromaOffsets[i].DeltaChromaWeightLX1 = w_tbl->delta_chroma_weight_l1[i][1] & 0xff;
2237
2238 chroma_weight = (1 << chroma_log2_weight_denom) + w_tbl->delta_chroma_weight_l1[i][0];
2239 chroma_offset = CLAMP(w_tbl->delta_chroma_offset_l1[i][0] -
2240 ((128 * chroma_weight) >> chroma_log2_weight_denom) + 128, -128, 127);
2241 w.ChromaOffsets[i].ChromaOffsetLX0 = chroma_offset & 0xff;
2242
2243 chroma_weight = (1 << chroma_log2_weight_denom) + w_tbl->delta_chroma_weight_l1[i][1];
2244 chroma_offset = CLAMP(w_tbl->delta_chroma_offset_l1[i][1] -
2245 ((128 * chroma_weight) >> chroma_log2_weight_denom) + 128, -128, 127);
2246 w.ChromaOffsets[i].ChromaOffsetLX1 = chroma_offset & 0xff;
2247 }
2248 }
2249 }
2250 }
2251
2252 uint8_t slice_header_data[256] = { 0, };
2253 size_t slice_header_data_len_in_bytes = 0;
2254 vk_video_encode_h265_slice_header(frame_info->pStdPictureInfo,
2255 vps,
2256 sps,
2257 pps,
2258 slice_header,
2259 slice_qp_delta,
2260 &slice_header_data_len_in_bytes,
2261 &slice_header_data);
2262 uint32_t slice_header_data_len_in_bits = slice_header_data_len_in_bytes * 8;
2263
2264 anv_batch_emit(&cmd->batch, GENX(HCP_SLICE_STATE), slice) {
2265 slice.SliceHorizontalPosition = slice_header->slice_segment_address % ctb_w;
2266 slice.SliceVerticalPosition = slice_header->slice_segment_address / ctb_w;
2267
2268 if (is_last) {
2269 slice.NextSliceHorizontalPosition = 0;
2270 slice.NextSliceVerticalPosition = 0;
2271 } else {
2272 slice.NextSliceHorizontalPosition = next_slice_header->slice_segment_address % ctb_w;
2273 slice.NextSliceVerticalPosition = next_slice_header->slice_segment_address / ctb_w;
2274 }
2275
2276 slice.SliceType = slice_type;
2277 slice.LastSlice = is_last;
2278 slice.DependentSlice = slice_header->flags.dependent_slice_segment_flag;
2279 slice.SliceTemporalMVPEnable = frame_info->pStdPictureInfo->flags.slice_temporal_mvp_enabled_flag;;
2280 slice.SliceQP = slice_qp;
2281 slice.SliceCbQPOffset = slice_header->slice_cb_qp_offset;
2282 slice.SliceCrQPOffset = slice_header->slice_cr_qp_offset;
2283 slice.SliceHeaderDisableDeblockingFilter = slice_header->flags.slice_deblocking_filter_disabled_flag;
2284 slice.SliceTCOffsetDiv2 = slice_header->slice_tc_offset_div2;
2285 slice.SliceBetaOffsetDiv2 = slice_header->slice_beta_offset_div2;
2286 slice.SliceLoopFilterEnable = slice_header->flags.slice_loop_filter_across_slices_enabled_flag;
2287 slice.SliceSAOChroma = slice_header->flags.slice_sao_chroma_flag;
2288 slice.SliceSAOLuma = slice_header->flags.slice_sao_luma_flag;
2289 slice.MVDL1Zero = slice_header->flags.mvd_l1_zero_flag;
2290 slice.CollocatedFromL0 = slice_header->flags.collocated_from_l0_flag;
2291 /* TODO. Support Low Delay mode */
2292 slice.LowDelay = false;
2293
2294 if (slice_type != STD_VIDEO_H265_SLICE_TYPE_I && slice_header->pWeightTable) {
2295 slice.Log2WeightDenominatorChroma = slice_header->pWeightTable->luma_log2_weight_denom +
2296 (chroma_log2_weight_denom - slice_header->pWeightTable->luma_log2_weight_denom);
2297 slice.Log2WeightDenominatorLuma = slice_header->pWeightTable->luma_log2_weight_denom;
2298 }
2299 slice.CABACInit = slice_header->flags.cabac_init_flag;
2300 slice.MaxMergeIndex = slice_header->MaxNumMergeCand - 1;
2301
2302 slice.CollocatedMVTemporalBufferIndex = dpb_idx[slice_header->collocated_ref_idx];
2303 assert(slice.CollocatedMVTemporalBufferIndex < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
2304
2305 /* For VDEnc mode */
2306 slice.RoundInter = 4;
2307 slice.RoundIntra = 10;
2308
2309 slice.SliceHeaderLength = 0;
2310 slice.CABACZeroWordInsertionEnable = false;
2311 slice.EmulationByteSliceInsertEnable = true;
2312 slice.TailInsertionPresent = false;
2313 slice.SliceDataInsertionPresent = true;
2314 slice.HeaderInsertionPresent = true;
2315
2316 slice.IndirectPAKBSEDataStartOffset = 0;
2317 slice.TransformSkipLambda = 162;
2318 slice.TransformSkipNumberofZeroCoeffsFactor0 = 42;
2319 slice.TransformSkipNumberofNonZeroCoeffsFactor0 = 72;
2320 slice.TransformSkipNumberofZeroCoeffsFactor1 = 32;
2321 slice.TransformSkipNumberofNonZeroCoeffsFactor1 = 77;
2322
2323 slice.OriginalSliceStartCtbX = slice_header->slice_segment_address % ctb_w;
2324 slice.OriginalSliceStartCtbY = slice_header->slice_segment_address / ctb_w;
2325 }
2326
2327 uint32_t *dw;
2328 uint32_t length_in_dw;
2329 uint32_t data_bits_in_last_dw;
2330
2331 length_in_dw = ALIGN(slice_header_data_len_in_bits, 32) >> 5;
2332 data_bits_in_last_dw = slice_header_data_len_in_bits & 0x1f;
2333
2334 dw = anv_batch_emitn(&cmd->batch, length_in_dw + 2, GENX(HCP_PAK_INSERT_OBJECT),
2335 .LastHeader = true,
2336 .EndofSlice = true,
2337 .DataBitsInLastDW = data_bits_in_last_dw > 0 ? data_bits_in_last_dw : 32,
2338 .SliceHeaderIndicator = true,
2339 .HeaderLengthExcludedFromSize = ACCUMULATE);
2340
2341 memcpy(dw + 2, slice_header_data, length_in_dw * 4);
2342
2343 anv_batch_emit(&cmd->batch, GENX(VDENC_WEIGHTSOFFSETS_STATE), vdenc_offsets) {
2344 vdenc_offsets.WeightsForwardReference0 = 1;
2345 vdenc_offsets.WeightsForwardReference1 = 1;
2346 vdenc_offsets.WeightsForwardReference2 = 1;
2347 vdenc_offsets.HEVCVP9WeightsBackwardReference0 = 1;
2348 }
2349
2350 anv_batch_emit(&cmd->batch, GENX(VDENC_WALKER_STATE), vdenc_walker) {
2351 uint32_t slice_block_rows = DIV_ROUND_UP(src_img->vk.extent.height, ANV_MAX_H265_CTB_SIZE);
2352 uint32_t slice_block_cols = DIV_ROUND_UP(src_img->vk.extent.width, ANV_MAX_H265_CTB_SIZE);
2353 uint32_t num_ctu_in_slice = slice_block_cols * slice_block_rows;
2354
2355 vdenc_walker.MBLCUStartYPosition = slice_header->slice_segment_address % ctb_w;
2356 vdenc_walker.NextSliceMBLCUStartXPosition = (slice_header->slice_segment_address + num_ctu_in_slice) / ctb_h;
2357 vdenc_walker.NextSliceMBStartYPosition = (slice_header->slice_segment_address + num_ctu_in_slice) / ctb_w;
2358 vdenc_walker.NextSliceMBLCUStartXPosition = (slice_header->slice_segment_address + num_ctu_in_slice) / ctb_h;
2359 vdenc_walker.TileWidth = width_in_pix - 1;
2360 vdenc_walker.TileHeight = height_in_pix - 1;
2361 }
2362
2363 anv_batch_emit(&cmd->batch, GENX(VD_PIPELINE_FLUSH), flush) {
2364 flush.MFXPipelineDone = true;
2365 flush.VDENCPipelineDone = true;
2366 flush.VDENCPipelineCommandFlush = true;
2367 flush.VDCommandMessageParserDone = true;
2368 }
2369 }
2370
2371 anv_batch_emit(&cmd->batch, GENX(MI_FLUSH_DW), flush) {
2372 flush.VideoPipelineCacheInvalidate = 1;
2373 };
2374
2375 anv_batch_emit(&cmd->batch, GENX(VD_PIPELINE_FLUSH), flush) {
2376 flush.HEVCPipelineDone = true;
2377 flush.HEVCPipelineCommandFlush = true;
2378 flush.VDCommandMessageParserDone = true;
2379 }
2380
2381 anv_batch_emit(&cmd->batch, GENX(MI_FLUSH_DW), flush) {
2382 flush.VideoPipelineCacheInvalidate = 0;
2383 };
2384
2385 #endif // GFX_VER >= 12
2386
2387 }
2388
2389 void
genX(CmdEncodeVideoKHR)2390 genX(CmdEncodeVideoKHR)(VkCommandBuffer commandBuffer,
2391 const VkVideoEncodeInfoKHR *pEncodeInfo)
2392 {
2393 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
2394
2395 switch (cmd_buffer->video.vid->vk.op) {
2396 case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
2397 anv_h264_encode_video(cmd_buffer, pEncodeInfo);
2398 break;
2399 case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
2400 anv_h265_encode_video(cmd_buffer, pEncodeInfo);
2401 break;
2402 default:
2403 assert(0);
2404 }
2405 }
2406