xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_video.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  * Copyright 2021 Red Hat Inc.
4  * All Rights Reserved.
5  *
6  * SPDX-License-Identifier: MIT
7  */
8 
9 #ifndef _WIN32
10 #include "drm-uapi/amdgpu_drm.h"
11 #endif
12 
13 #include "util/vl_zscan_data.h"
14 #include "vk_video/vulkan_video_codecs_common.h"
15 #include "ac_uvd_dec.h"
16 #include "ac_vcn_av1_default.h"
17 #include "ac_vcn_dec.h"
18 
19 #include "radv_buffer.h"
20 #include "radv_cs.h"
21 #include "radv_debug.h"
22 #include "radv_device_memory.h"
23 #include "radv_entrypoints.h"
24 #include "radv_image.h"
25 #include "radv_image_view.h"
26 #include "radv_video.h"
27 
28 #define NUM_H2645_REFS               16
29 #define FB_BUFFER_OFFSET             0x1000
30 #define FB_BUFFER_SIZE               2048
31 #define FB_BUFFER_SIZE_TONGA         (2048 * 64)
32 #define IT_SCALING_TABLE_SIZE        992
33 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
34 
35 /* Not 100% sure this isn't too much but works */
36 #define VID_DEFAULT_ALIGNMENT 256
37 
38 static bool
radv_enable_tier2(struct radv_physical_device * pdev)39 radv_enable_tier2(struct radv_physical_device *pdev)
40 {
41    const struct radv_instance *instance = radv_physical_device_instance(pdev);
42 
43    if (pdev->info.vcn_ip_version >= VCN_3_0_0 && !(instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH))
44       return true;
45    return false;
46 }
47 
48 static uint32_t
radv_video_get_db_alignment(struct radv_physical_device * pdev,int width,bool is_h265_main_10_or_av1)49 radv_video_get_db_alignment(struct radv_physical_device *pdev, int width, bool is_h265_main_10_or_av1)
50 {
51    if (pdev->info.vcn_ip_version >= VCN_2_0_0 && width > 32 && is_h265_main_10_or_av1)
52       return 64;
53    return 32;
54 }
55 
56 static bool
radv_vid_buffer_upload_alloc(struct radv_cmd_buffer * cmd_buffer,unsigned size,unsigned * out_offset,void ** ptr)57 radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr)
58 {
59    return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT, out_offset, ptr);
60 }
61 
62 /* vcn unified queue (sq) ib header */
63 void
radv_vcn_sq_header(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq,unsigned type)64 radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned type)
65 {
66    /* vcn ib signature */
67    radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE);
68    radeon_emit(cs, RADEON_VCN_SIGNATURE);
69    sq->ib_checksum = &cs->buf[cs->cdw];
70    radeon_emit(cs, 0);
71    sq->ib_total_size_in_dw = &cs->buf[cs->cdw];
72    radeon_emit(cs, 0);
73 
74    /* vcn ib engine info */
75    radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE);
76    radeon_emit(cs, RADEON_VCN_ENGINE_INFO);
77    radeon_emit(cs, type);
78    radeon_emit(cs, 0);
79 }
80 
81 void
radv_vcn_sq_tail(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq)82 radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq)
83 {
84    uint32_t *end;
85    uint32_t size_in_dw;
86    uint32_t checksum = 0;
87 
88    if (sq->ib_checksum == NULL || sq->ib_total_size_in_dw == NULL)
89       return;
90 
91    end = &cs->buf[cs->cdw];
92    size_in_dw = end - sq->ib_total_size_in_dw - 1;
93    *sq->ib_total_size_in_dw = size_in_dw;
94    *(sq->ib_total_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
95 
96    for (int i = 0; i < size_in_dw; i++)
97       checksum += *(sq->ib_checksum + 2 + i);
98 
99    *sq->ib_checksum = checksum;
100 }
101 
102 void
radv_vcn_write_event(struct radv_cmd_buffer * cmd_buffer,struct radv_event * event,unsigned value)103 radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, unsigned value)
104 {
105    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
106    struct radv_physical_device *pdev = radv_device_physical(device);
107    struct rvcn_sq_var sq;
108    struct radeon_cmdbuf *cs = cmd_buffer->cs;
109 
110    if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED)
111       return;
112 
113    radv_cs_add_buffer(device->ws, cs, event->bo);
114    uint64_t va = radv_buffer_get_va(event->bo);
115 
116    radeon_check_space(device->ws, cs, 256);
117    radv_vcn_sq_header(cs, &sq, RADEON_VCN_ENGINE_TYPE_COMMON);
118    struct rvcn_cmn_engine_ib_package *ib_header = (struct rvcn_cmn_engine_ib_package *)&(cs->buf[cs->cdw]);
119    ib_header->package_size = sizeof(struct rvcn_cmn_engine_ib_package) + sizeof(struct rvcn_cmn_engine_op_writememory);
120    cs->cdw++;
121    ib_header->package_type = RADEON_VCN_IB_COMMON_OP_WRITEMEMORY;
122    cs->cdw++;
123 
124    struct rvcn_cmn_engine_op_writememory *write_memory = (struct rvcn_cmn_engine_op_writememory *)&(cs->buf[cs->cdw]);
125    write_memory->dest_addr_lo = va & 0xffffffff;
126    write_memory->dest_addr_hi = va >> 32;
127    write_memory->data = value;
128 
129    cs->cdw += sizeof(*write_memory) / 4;
130    radv_vcn_sq_tail(cs, &sq);
131 }
132 
133 static void
radv_vcn_sq_start(struct radv_cmd_buffer * cmd_buffer)134 radv_vcn_sq_start(struct radv_cmd_buffer *cmd_buffer)
135 {
136    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
137 
138    radeon_check_space(device->ws, cmd_buffer->cs, 256);
139    radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_DECODE);
140    rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
141    ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
142    cmd_buffer->cs->cdw++;
143    ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
144    cmd_buffer->cs->cdw++;
145    cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
146    cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
147    memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
148 }
149 
150 /* generate an stream handle */
151 static unsigned
radv_vid_alloc_stream_handle(struct radv_physical_device * pdev)152 radv_vid_alloc_stream_handle(struct radv_physical_device *pdev)
153 {
154    unsigned stream_handle = pdev->stream_handle_base;
155 
156    stream_handle ^= ++pdev->stream_handle_counter;
157    return stream_handle;
158 }
159 
160 static void
init_uvd_decoder(struct radv_physical_device * pdev)161 init_uvd_decoder(struct radv_physical_device *pdev)
162 {
163    if (pdev->info.family >= CHIP_VEGA10) {
164       pdev->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
165       pdev->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
166       pdev->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
167       pdev->vid_dec_reg.cntl = RUVD_ENGINE_CNTL_SOC15;
168    } else {
169       pdev->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0;
170       pdev->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1;
171       pdev->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD;
172       pdev->vid_dec_reg.cntl = RUVD_ENGINE_CNTL;
173    }
174 }
175 
176 static void
init_vcn_decoder(struct radv_physical_device * pdev)177 init_vcn_decoder(struct radv_physical_device *pdev)
178 {
179    switch (pdev->info.vcn_ip_version) {
180    case VCN_1_0_0:
181    case VCN_1_0_1:
182       pdev->vid_dec_reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0;
183       pdev->vid_dec_reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1;
184       pdev->vid_dec_reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD;
185       pdev->vid_dec_reg.cntl = RDECODE_VCN1_ENGINE_CNTL;
186       break;
187    case VCN_2_0_0:
188    case VCN_2_0_2:
189    case VCN_2_0_3:
190    case VCN_2_2_0:
191       pdev->vid_dec_reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0;
192       pdev->vid_dec_reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1;
193       pdev->vid_dec_reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD;
194       pdev->vid_dec_reg.cntl = RDECODE_VCN2_ENGINE_CNTL;
195       break;
196    case VCN_2_5_0:
197    case VCN_2_6_0:
198    case VCN_3_0_0:
199    case VCN_3_0_2:
200    case VCN_3_0_16:
201    case VCN_3_0_33:
202    case VCN_3_1_1:
203    case VCN_3_1_2:
204       pdev->vid_dec_reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0;
205       pdev->vid_dec_reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1;
206       pdev->vid_dec_reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
207       pdev->vid_dec_reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
208       break;
209    case VCN_4_0_3:
210       pdev->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
211       pdev->av1_version = RDECODE_AV1_VER_1;
212       break;
213    case VCN_4_0_0:
214    case VCN_4_0_2:
215    case VCN_4_0_4:
216    case VCN_4_0_5:
217    case VCN_4_0_6:
218       pdev->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
219       pdev->av1_version = RDECODE_AV1_VER_1;
220       break;
221    default:
222       break;
223    }
224 }
225 
226 void
radv_init_physical_device_decoder(struct radv_physical_device * pdev)227 radv_init_physical_device_decoder(struct radv_physical_device *pdev)
228 {
229    if (pdev->info.vcn_ip_version >= VCN_4_0_0)
230       pdev->vid_decode_ip = AMD_IP_VCN_UNIFIED;
231    else if (radv_has_uvd(pdev))
232       pdev->vid_decode_ip = AMD_IP_UVD;
233    else
234       pdev->vid_decode_ip = AMD_IP_VCN_DEC;
235    pdev->av1_version = RDECODE_AV1_VER_0;
236 
237    pdev->stream_handle_counter = 0;
238    pdev->stream_handle_base = 0;
239 
240    pdev->stream_handle_base = util_bitreverse(getpid());
241 
242    pdev->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_LINEAR;
243 
244    if (radv_has_uvd(pdev))
245       init_uvd_decoder(pdev);
246    else
247       init_vcn_decoder(pdev);
248 }
249 
250 void
radv_probe_video_decode(struct radv_physical_device * pdev)251 radv_probe_video_decode(struct radv_physical_device *pdev)
252 {
253    const struct radv_instance *instance = radv_physical_device_instance(pdev);
254 
255    pdev->video_decode_enabled = false;
256 
257    if (pdev->info.vcn_ip_version >= VCN_4_0_0) {
258       if (pdev->info.vcn_enc_major_version > 1)
259          pdev->video_decode_enabled = true;
260       /* VCN 4 FW 1.22 has all the necessary pieces to pass CTS */
261       /* VCN 4 has unified fw so use the enc versions */
262       if (pdev->info.vcn_enc_major_version == 1 && pdev->info.vcn_enc_minor_version >= 22)
263          pdev->video_decode_enabled = true;
264    }
265    if (instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
266       pdev->video_decode_enabled = true;
267    }
268 }
269 
270 static bool
have_it(struct radv_video_session * vid)271 have_it(struct radv_video_session *vid)
272 {
273    return vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265;
274 }
275 
276 static bool
have_probs(struct radv_video_session * vid)277 have_probs(struct radv_video_session *vid)
278 {
279    return vid->stream_type == RDECODE_CODEC_AV1;
280 }
281 
282 static unsigned
calc_ctx_size_h264_perf(struct radv_video_session * vid)283 calc_ctx_size_h264_perf(struct radv_video_session *vid)
284 {
285    unsigned width_in_mb, height_in_mb, ctx_size;
286    unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
287    unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
288 
289    unsigned max_references = vid->vk.max_dpb_slots + 1;
290 
291    /* picture width & height in 16 pixel units */
292    width_in_mb = width / VL_MACROBLOCK_WIDTH;
293    height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
294 
295    ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
296 
297    return ctx_size;
298 }
299 
300 static unsigned
calc_ctx_size_h265_main(struct radv_video_session * vid)301 calc_ctx_size_h265_main(struct radv_video_session *vid)
302 {
303    unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
304    unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
305 
306    unsigned max_references = vid->vk.max_dpb_slots + 1;
307 
308    if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
309       max_references = MAX2(max_references, 8);
310    else
311       max_references = MAX2(max_references, 17);
312 
313    width = align(width, 16);
314    height = align(height, 16);
315    return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
316 }
317 
318 static unsigned
calc_ctx_size_h265_main10(struct radv_video_session * vid)319 calc_ctx_size_h265_main10(struct radv_video_session *vid)
320 {
321    unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
322    unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
323    unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
324 
325    unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
326    unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
327    unsigned coeff_10bit = 2;
328 
329    unsigned max_references = vid->vk.max_dpb_slots + 1;
330 
331    if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
332       max_references = MAX2(max_references, 8);
333    else
334       max_references = MAX2(max_references, 17);
335 
336    /* 64x64 is the maximum ctb size. */
337    log2_ctb_size = 6;
338 
339    width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
340    height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
341 
342    num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
343    context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
344    max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
345 
346    cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
347    db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
348 
349    return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
350 }
351 
352 static unsigned
calc_ctx_size_av1(struct radv_device * device,struct radv_video_session * vid)353 calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid)
354 {
355    const struct radv_physical_device *pdev = radv_device_physical(device);
356    return ac_vcn_dec_calc_ctx_size_av1(pdev->av1_version);
357 }
358 
359 static void
radv_video_patch_session_parameters(struct vk_video_session_parameters * params)360 radv_video_patch_session_parameters(struct vk_video_session_parameters *params)
361 {
362    switch (params->op) {
363    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
364    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
365    default:
366       return;
367    case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
368    case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
369       radv_video_patch_encode_session_parameters(params);
370       break;
371    }
372 }
373 
374 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionKHR(VkDevice _device,const VkVideoSessionCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionKHR * pVideoSession)375 radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo,
376                            const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
377 {
378    VK_FROM_HANDLE(radv_device, device, _device);
379    struct radv_physical_device *pdev = radv_device_physical(device);
380    const struct radv_instance *instance = radv_physical_device_instance(pdev);
381 
382    struct radv_video_session *vid =
383       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
384    if (!vid)
385       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
386 
387    memset(vid, 0, sizeof(struct radv_video_session));
388 
389    VkResult result = vk_video_session_init(&device->vk, &vid->vk, pCreateInfo);
390    if (result != VK_SUCCESS) {
391       vk_free2(&device->vk.alloc, pAllocator, vid);
392       return result;
393    }
394 
395    vid->interlaced = false;
396    vid->dpb_type = DPB_MAX_RES;
397 
398    switch (vid->vk.op) {
399    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
400       vid->stream_type = RDECODE_CODEC_H264_PERF;
401       if (radv_enable_tier2(pdev))
402          vid->dpb_type = DPB_DYNAMIC_TIER_2;
403       break;
404    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
405       vid->stream_type = RDECODE_CODEC_H265;
406       if (radv_enable_tier2(pdev))
407          vid->dpb_type = DPB_DYNAMIC_TIER_2;
408       break;
409    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
410       vid->stream_type = RDECODE_CODEC_AV1;
411       vid->dpb_type = DPB_DYNAMIC_TIER_2;
412       break;
413    case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
414       vid->encode = true;
415       vid->enc_session.encode_standard = RENCODE_ENCODE_STANDARD_H264;
416       vid->enc_session.aligned_picture_width = align(vid->vk.max_coded.width, 16);
417       vid->enc_session.aligned_picture_height = align(vid->vk.max_coded.height, 16);
418       vid->enc_session.padding_width = vid->enc_session.aligned_picture_width - vid->vk.max_coded.width;
419       vid->enc_session.padding_height = vid->enc_session.aligned_picture_height - vid->vk.max_coded.height;
420       vid->enc_session.display_remote = 0;
421       vid->enc_session.pre_encode_mode = 0;
422       vid->enc_session.pre_encode_chroma_enabled = 0;
423       switch (vid->vk.enc_usage.tuning_mode) {
424       case VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR:
425       default:
426          vid->enc_preset_mode = RENCODE_PRESET_MODE_BALANCE;
427          break;
428       case VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR:
429       case VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR:
430          vid->enc_preset_mode = RENCODE_PRESET_MODE_SPEED;
431          break;
432       case VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR:
433       case VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR:
434          vid->enc_preset_mode = RENCODE_PRESET_MODE_QUALITY;
435          break;
436       }
437       break;
438    case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
439       vid->encode = true;
440       vid->enc_session.encode_standard = RENCODE_ENCODE_STANDARD_HEVC;
441       vid->enc_session.aligned_picture_width = align(vid->vk.max_coded.width, 64);
442       vid->enc_session.aligned_picture_height = align(vid->vk.max_coded.height, 64);
443       vid->enc_session.padding_width = vid->enc_session.aligned_picture_width - vid->vk.max_coded.width;
444       vid->enc_session.padding_height = vid->enc_session.aligned_picture_height - vid->vk.max_coded.height;
445       vid->enc_session.display_remote = 0;
446       vid->enc_session.pre_encode_mode = 0;
447       vid->enc_session.pre_encode_chroma_enabled = 0;
448       switch (vid->vk.enc_usage.tuning_mode) {
449       case VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR:
450       default:
451          vid->enc_preset_mode = RENCODE_PRESET_MODE_BALANCE;
452          break;
453       case VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR:
454       case VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR:
455          vid->enc_preset_mode = RENCODE_PRESET_MODE_SPEED;
456          break;
457       case VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR:
458       case VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR:
459          vid->enc_preset_mode = RENCODE_PRESET_MODE_QUALITY;
460          break;
461       }
462       break;
463    default:
464       return VK_ERROR_FEATURE_NOT_PRESENT;
465    }
466 
467    vid->stream_handle = radv_vid_alloc_stream_handle(pdev);
468    vid->dbg_frame_cnt = 0;
469    vid->db_alignment = radv_video_get_db_alignment(
470       pdev, vid->vk.max_coded.width,
471       (vid->stream_type == RDECODE_CODEC_AV1 ||
472        (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)));
473 
474    *pVideoSession = radv_video_session_to_handle(vid);
475    return VK_SUCCESS;
476 }
477 
478 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionKHR(VkDevice _device,VkVideoSessionKHR _session,const VkAllocationCallbacks * pAllocator)479 radv_DestroyVideoSessionKHR(VkDevice _device, VkVideoSessionKHR _session, const VkAllocationCallbacks *pAllocator)
480 {
481    VK_FROM_HANDLE(radv_device, device, _device);
482    VK_FROM_HANDLE(radv_video_session, vid, _session);
483    if (!_session)
484       return;
485 
486    vk_object_base_finish(&vid->vk.base);
487    vk_free2(&device->vk.alloc, pAllocator, vid);
488 }
489 
490 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionParametersKHR(VkDevice _device,const VkVideoSessionParametersCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionParametersKHR * pVideoSessionParameters)491 radv_CreateVideoSessionParametersKHR(VkDevice _device, const VkVideoSessionParametersCreateInfoKHR *pCreateInfo,
492                                      const VkAllocationCallbacks *pAllocator,
493                                      VkVideoSessionParametersKHR *pVideoSessionParameters)
494 {
495    VK_FROM_HANDLE(radv_device, device, _device);
496    VK_FROM_HANDLE(radv_video_session, vid, pCreateInfo->videoSession);
497    VK_FROM_HANDLE(radv_video_session_params, templ, pCreateInfo->videoSessionParametersTemplate);
498    const struct radv_physical_device *pdev = radv_device_physical(device);
499    const struct radv_instance *instance = radv_physical_device_instance(pdev);
500    struct radv_video_session_params *params =
501       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*params), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
502    if (!params)
503       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
504 
505    VkResult result =
506       vk_video_session_parameters_init(&device->vk, &params->vk, &vid->vk, templ ? &templ->vk : NULL, pCreateInfo);
507    if (result != VK_SUCCESS) {
508       vk_free2(&device->vk.alloc, pAllocator, params);
509       return result;
510    }
511 
512    radv_video_patch_session_parameters(&params->vk);
513 
514    *pVideoSessionParameters = radv_video_session_params_to_handle(params);
515    return VK_SUCCESS;
516 }
517 
518 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR _params,const VkAllocationCallbacks * pAllocator)519 radv_DestroyVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR _params,
520                                       const VkAllocationCallbacks *pAllocator)
521 {
522    VK_FROM_HANDLE(radv_device, device, _device);
523    VK_FROM_HANDLE(radv_video_session_params, params, _params);
524 
525    vk_video_session_parameters_finish(&device->vk, &params->vk);
526    vk_free2(&device->vk.alloc, pAllocator, params);
527 }
528 
529 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice,const VkVideoProfileInfoKHR * pVideoProfile,VkVideoCapabilitiesKHR * pCapabilities)530 radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, const VkVideoProfileInfoKHR *pVideoProfile,
531                                            VkVideoCapabilitiesKHR *pCapabilities)
532 {
533    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
534    const struct video_codec_cap *cap = NULL;
535    bool is_encode = false;
536 
537    switch (pVideoProfile->videoCodecOperation) {
538 #ifndef _WIN32
539    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
540       cap = &pdev->info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC];
541       break;
542    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
543       cap = &pdev->info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC];
544       break;
545    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
546       cap = &pdev->info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1];
547       break;
548    case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
549       cap = &pdev->info.enc_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC];
550       is_encode = true;
551       break;
552    case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
553       cap = &pdev->info.enc_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC];
554       is_encode = true;
555       break;
556 #endif
557    default:
558       unreachable("unsupported operation");
559    }
560 
561    if (cap && !cap->valid)
562       cap = NULL;
563 
564    pCapabilities->flags = 0;
565    pCapabilities->pictureAccessGranularity.width = VL_MACROBLOCK_WIDTH;
566    pCapabilities->pictureAccessGranularity.height = VL_MACROBLOCK_HEIGHT;
567    pCapabilities->minCodedExtent.width = VL_MACROBLOCK_WIDTH;
568    pCapabilities->minCodedExtent.height = VL_MACROBLOCK_HEIGHT;
569 
570    struct VkVideoDecodeCapabilitiesKHR *dec_caps = NULL;
571    struct VkVideoEncodeCapabilitiesKHR *enc_caps = NULL;
572    if (!is_encode) {
573       dec_caps =
574          (struct VkVideoDecodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
575       if (dec_caps)
576          dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR;
577       pCapabilities->minBitstreamBufferOffsetAlignment = 128;
578       pCapabilities->minBitstreamBufferSizeAlignment = 128;
579    } else {
580       enc_caps =
581          (struct VkVideoEncodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_ENCODE_CAPABILITIES_KHR);
582 
583       if (enc_caps) {
584          enc_caps->flags = 0;
585          enc_caps->rateControlModes = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR |
586                                       VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR |
587                                       VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR;
588          enc_caps->maxRateControlLayers = RADV_ENC_MAX_RATE_LAYER;
589          enc_caps->maxBitrate = 1000000000;
590          enc_caps->maxQualityLevels = 2;
591          enc_caps->encodeInputPictureGranularity.width = 1;
592          enc_caps->encodeInputPictureGranularity.height = 1;
593          enc_caps->supportedEncodeFeedbackFlags = VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BUFFER_OFFSET_BIT_KHR |
594                                                   VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BYTES_WRITTEN_BIT_KHR;
595       }
596       pCapabilities->minBitstreamBufferOffsetAlignment = 16;
597       pCapabilities->minBitstreamBufferSizeAlignment = 16;
598    }
599 
600    switch (pVideoProfile->videoCodecOperation) {
601    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
602       /* H264 allows different luma and chroma bit depths */
603       if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
604          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
605 
606       struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)vk_find_struct(
607          pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
608 
609       const struct VkVideoDecodeH264ProfileInfoKHR *h264_profile =
610          vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H264_PROFILE_INFO_KHR);
611 
612       if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
613           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
614           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_HIGH)
615          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
616 
617       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR)
618          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
619 
620       pCapabilities->maxDpbSlots = NUM_H2645_REFS + 1;
621       pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
622 
623       /* for h264 on navi21+ separate dpb images should work */
624       if (radv_enable_tier2(pdev))
625          pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
626       ext->fieldOffsetGranularity.x = 0;
627       ext->fieldOffsetGranularity.y = 0;
628       ext->maxLevelIdc = STD_VIDEO_H264_LEVEL_IDC_5_1;
629       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME);
630       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION;
631       break;
632    }
633    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
634       /* H265 allows different luma and chroma bit depths */
635       if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
636          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
637 
638       struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)vk_find_struct(
639          pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR);
640 
641       const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
642          vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
643 
644       if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
645           h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10 &&
646           h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_STILL_PICTURE)
647          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
648 
649       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
650           pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
651          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
652 
653       pCapabilities->maxDpbSlots = NUM_H2645_REFS + 1;
654       pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
655       /* for h265 on navi21+ separate dpb images should work */
656       if (radv_enable_tier2(pdev))
657          pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
658       ext->maxLevelIdc = STD_VIDEO_H265_LEVEL_IDC_5_1;
659       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME);
660       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION;
661       break;
662    }
663    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
664       const bool have_12bit = pdev->info.vcn_ip_version >= VCN_5_0_0 ||
665                               pdev->info.vcn_ip_version == VCN_4_0_0;
666       /* Monochrome sampling implies an undefined chroma bit depth, and is supported in profile MAIN for AV1. */
667       if (pVideoProfile->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR &&
668           pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
669          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
670       struct VkVideoDecodeAV1CapabilitiesKHR *ext =
671          vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_AV1_CAPABILITIES_KHR);
672 
673       const struct VkVideoDecodeAV1ProfileInfoKHR *av1_profile =
674          vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_AV1_PROFILE_INFO_KHR);
675 
676       if (av1_profile->stdProfile != STD_VIDEO_AV1_PROFILE_MAIN &&
677           (!have_12bit || av1_profile->stdProfile != STD_VIDEO_AV1_PROFILE_PROFESSIONAL))
678          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
679 
680       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
681           pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR &&
682           (!have_12bit || pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR))
683          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
684 
685       pCapabilities->maxDpbSlots = 9;
686       pCapabilities->maxActiveReferencePictures = STD_VIDEO_AV1_NUM_REF_FRAMES;
687       pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
688       ext->maxLevel = STD_VIDEO_AV1_LEVEL_6_1; /* For VCN3/4, the only h/w currently with AV1 decode support */
689       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME);
690       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION;
691       break;
692    }
693    case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: {
694       struct VkVideoEncodeH264CapabilitiesKHR *ext = (struct VkVideoEncodeH264CapabilitiesKHR *)vk_find_struct(
695          pCapabilities->pNext, VIDEO_ENCODE_H264_CAPABILITIES_KHR);
696 
697       const struct VkVideoEncodeH264ProfileInfoKHR *h264_profile =
698          vk_find_struct_const(pVideoProfile->pNext, VIDEO_ENCODE_H264_PROFILE_INFO_KHR);
699 
700       if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
701           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
702           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_HIGH)
703          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
704 
705       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR)
706          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
707 
708       pCapabilities->maxDpbSlots = NUM_H2645_REFS;
709       pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
710       ext->flags = VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_KHR |
711                    VK_VIDEO_ENCODE_H264_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR;
712       ext->maxLevelIdc = cap ? cap->max_level : 0;
713       ext->maxSliceCount = 128;
714       ext->maxPPictureL0ReferenceCount = 1;
715       ext->maxBPictureL0ReferenceCount = 0;
716       ext->maxL1ReferenceCount = 0;
717       ext->maxTemporalLayerCount = 4;
718       ext->expectDyadicTemporalLayerPattern = false;
719       ext->minQp = 0;
720       ext->maxQp = 51;
721       ext->prefersGopRemainingFrames = false;
722       ext->requiresGopRemainingFrames = false;
723       ext->stdSyntaxFlags = VK_VIDEO_ENCODE_H264_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR |
724                             VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_UNSET_BIT_KHR |
725                             VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_SET_BIT_KHR;
726       if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3)
727          ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_H264_STD_WEIGHTED_BIPRED_IDC_EXPLICIT_BIT_KHR;
728 
729       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME);
730       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION;
731       break;
732    }
733    case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: {
734       struct VkVideoEncodeH265CapabilitiesKHR *ext = (struct VkVideoEncodeH265CapabilitiesKHR *)vk_find_struct(
735          pCapabilities->pNext, VIDEO_ENCODE_H265_CAPABILITIES_KHR);
736 
737       const struct VkVideoEncodeH265ProfileInfoKHR *h265_profile =
738          vk_find_struct_const(pVideoProfile->pNext, VIDEO_ENCODE_H265_PROFILE_INFO_KHR);
739 
740       if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
741           (pdev->enc_hw_ver < RADV_VIDEO_ENC_HW_2 ||
742            h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10))
743          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
744 
745       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
746           (pdev->enc_hw_ver < RADV_VIDEO_ENC_HW_2 ||
747            pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR))
748          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
749 
750       pCapabilities->maxDpbSlots = NUM_H2645_REFS;
751       pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
752       ext->flags = VK_VIDEO_ENCODE_H265_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR;
753       ext->maxLevelIdc = cap ? cap->max_level : 0;
754       ext->maxSliceSegmentCount = 128;
755       ext->maxTiles.width = 1;
756       ext->maxTiles.height = 1;
757       ext->ctbSizes = VK_VIDEO_ENCODE_H265_CTB_SIZE_64_BIT_KHR;
758       ext->transformBlockSizes =
759          VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_4_BIT_KHR | VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_8_BIT_KHR |
760          VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_16_BIT_KHR | VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_32_BIT_KHR;
761       ext->maxPPictureL0ReferenceCount = 1;
762       ext->maxBPictureL0ReferenceCount = 0;
763       ext->maxL1ReferenceCount = 0;
764       ext->maxSubLayerCount = 4;
765       ext->expectDyadicTemporalSubLayerPattern = false;
766       ext->minQp = 0;
767       ext->maxQp = 51;
768       ext->prefersGopRemainingFrames = false;
769       ext->requiresGopRemainingFrames = false;
770       ext->stdSyntaxFlags = VK_VIDEO_ENCODE_H265_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR |
771                             VK_VIDEO_ENCODE_H265_STD_DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG_SET_BIT_KHR |
772                             VK_VIDEO_ENCODE_H265_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR |
773                             VK_VIDEO_ENCODE_H265_STD_ENTROPY_CODING_SYNC_ENABLED_FLAG_SET_BIT_KHR;
774 
775       if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_2)
776          ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_H265_STD_SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG_SET_BIT_KHR;
777 
778       if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3)
779          ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_H265_STD_TRANSFORM_SKIP_ENABLED_FLAG_SET_BIT_KHR;
780       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME);
781       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION;
782       break;
783    }
784    default:
785       break;
786    }
787 
788    if (cap) {
789       pCapabilities->maxCodedExtent.width = cap->max_width;
790       pCapabilities->maxCodedExtent.height = cap->max_height;
791    } else {
792       switch (pVideoProfile->videoCodecOperation) {
793       case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
794          pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_TONGA) ? 2048 : 4096;
795          pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_TONGA) ? 1152 : 4096;
796          break;
797       case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
798          pCapabilities->maxCodedExtent.width =
799             (pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
800          pCapabilities->maxCodedExtent.height =
801             (pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
802          break;
803       default:
804          break;
805       }
806    }
807 
808    return VK_SUCCESS;
809 }
810 
811 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceVideoFormatInfoKHR * pVideoFormatInfo,uint32_t * pVideoFormatPropertyCount,VkVideoFormatPropertiesKHR * pVideoFormatProperties)812 radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,
813                                                const VkPhysicalDeviceVideoFormatInfoKHR *pVideoFormatInfo,
814                                                uint32_t *pVideoFormatPropertyCount,
815                                                VkVideoFormatPropertiesKHR *pVideoFormatProperties)
816 {
817    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
818 
819    if ((pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR |
820                                         VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)) &&
821        !pdev->video_encode_enabled)
822       return VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR;
823 
824    /* radv requires separate allocates for DPB and decode video. */
825    if ((pVideoFormatInfo->imageUsage &
826         (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) ==
827        (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
828       return VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR;
829 
830    VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, pVideoFormatProperties, pVideoFormatPropertyCount);
831 
832    bool need_8bit = true;
833    bool need_10bit = false;
834    bool need_12bit = false;
835    const struct VkVideoProfileListInfoKHR *prof_list =
836       (struct VkVideoProfileListInfoKHR *)vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
837    if (prof_list) {
838       for (unsigned i = 0; i < prof_list->profileCount; i++) {
839          const VkVideoProfileInfoKHR *profile = &prof_list->pProfiles[i];
840          if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
841             need_10bit = true;
842          else if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR)
843             need_12bit = true;
844       }
845    }
846 
847    if (need_12bit) {
848       vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
849       {
850          p->format = VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16;
851          p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
852          p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
853          p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
854          p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
855          p->imageCreateFlags = 0;
856          if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
857             p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
858          p->imageType = VK_IMAGE_TYPE_2D;
859          p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
860          p->imageUsageFlags = pVideoFormatInfo->imageUsage;
861       }
862 
863       if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) {
864          need_8bit = false;
865          need_10bit = false;
866       }
867    }
868 
869    if (need_10bit) {
870       vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
871       {
872          p->format = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16;
873          p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
874          p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
875          p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
876          p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
877          p->imageCreateFlags = 0;
878          if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
879             p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
880          p->imageType = VK_IMAGE_TYPE_2D;
881          p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
882          p->imageUsageFlags = pVideoFormatInfo->imageUsage;
883       }
884 
885       if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
886          need_8bit = false;
887    }
888 
889    if (need_8bit) {
890       vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
891       {
892          p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
893          p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
894          p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
895          p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
896          p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
897          p->imageCreateFlags = 0;
898          if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
899             p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
900          p->imageType = VK_IMAGE_TYPE_2D;
901          p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
902          p->imageUsageFlags = pVideoFormatInfo->imageUsage;
903       }
904    }
905 
906    return vk_outarray_status(&out);
907 }
908 
909 #define RADV_BIND_SESSION_CTX 0
910 #define RADV_BIND_DECODER_CTX 1
911 
912 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t * pMemoryRequirementsCount,VkVideoSessionMemoryRequirementsKHR * pMemoryRequirements)913 radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR videoSession,
914                                           uint32_t *pMemoryRequirementsCount,
915                                           VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements)
916 {
917    VK_FROM_HANDLE(radv_device, device, _device);
918    VK_FROM_HANDLE(radv_video_session, vid, videoSession);
919    const struct radv_physical_device *pdev = radv_device_physical(device);
920 
921    uint32_t memory_type_bits = (1u << pdev->memory_properties.memoryTypeCount) - 1;
922 
923    if (vid->encode) {
924       return radv_video_get_encode_session_memory_requirements(device, vid, pMemoryRequirementsCount,
925                                                                pMemoryRequirements);
926    }
927    VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
928    /* 1 buffer for session context */
929    if (pdev->info.family >= CHIP_POLARIS10) {
930       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
931       {
932          m->memoryBindIndex = RADV_BIND_SESSION_CTX;
933          m->memoryRequirements.size = RDECODE_SESSION_CONTEXT_SIZE;
934          m->memoryRequirements.alignment = 0;
935          m->memoryRequirements.memoryTypeBits = memory_type_bits;
936       }
937    }
938 
939    if (vid->stream_type == RDECODE_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10) {
940       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
941       {
942          m->memoryBindIndex = RADV_BIND_DECODER_CTX;
943          m->memoryRequirements.size = align(calc_ctx_size_h264_perf(vid), 4096);
944          m->memoryRequirements.alignment = 0;
945          m->memoryRequirements.memoryTypeBits = memory_type_bits;
946       }
947    }
948    if (vid->stream_type == RDECODE_CODEC_H265) {
949       uint32_t ctx_size;
950 
951       if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
952          ctx_size = calc_ctx_size_h265_main10(vid);
953       else
954          ctx_size = calc_ctx_size_h265_main(vid);
955       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
956       {
957          m->memoryBindIndex = RADV_BIND_DECODER_CTX;
958          m->memoryRequirements.size = align(ctx_size, 4096);
959          m->memoryRequirements.alignment = 0;
960          m->memoryRequirements.memoryTypeBits = memory_type_bits;
961       }
962    }
963    if (vid->stream_type == RDECODE_CODEC_AV1) {
964       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
965       {
966          m->memoryBindIndex = RADV_BIND_DECODER_CTX;
967          m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096);
968          m->memoryRequirements.alignment = 0;
969          m->memoryRequirements.memoryTypeBits = 0;
970          for (unsigned i = 0; i < pdev->memory_properties.memoryTypeCount; i++)
971             if (pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
972                m->memoryRequirements.memoryTypeBits |= (1 << i);
973       }
974    }
975    return vk_outarray_status(&out);
976 }
977 
978 VKAPI_ATTR VkResult VKAPI_CALL
radv_UpdateVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR videoSessionParameters,const VkVideoSessionParametersUpdateInfoKHR * pUpdateInfo)979 radv_UpdateVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR videoSessionParameters,
980                                      const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo)
981 {
982    VK_FROM_HANDLE(radv_video_session_params, params, videoSessionParameters);
983 
984    VkResult result = vk_video_session_parameters_update(&params->vk, pUpdateInfo);
985    if (result != VK_SUCCESS)
986       return result;
987    radv_video_patch_session_parameters(&params->vk);
988    return result;
989 }
990 
991 static void
copy_bind(struct radv_vid_mem * dst,const VkBindVideoSessionMemoryInfoKHR * src)992 copy_bind(struct radv_vid_mem *dst, const VkBindVideoSessionMemoryInfoKHR *src)
993 {
994    dst->mem = radv_device_memory_from_handle(src->memory);
995    dst->offset = src->memoryOffset;
996    dst->size = src->memorySize;
997 }
998 
999 VKAPI_ATTR VkResult VKAPI_CALL
radv_BindVideoSessionMemoryKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t videoSessionBindMemoryCount,const VkBindVideoSessionMemoryInfoKHR * pBindSessionMemoryInfos)1000 radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession, uint32_t videoSessionBindMemoryCount,
1001                                const VkBindVideoSessionMemoryInfoKHR *pBindSessionMemoryInfos)
1002 {
1003    VK_FROM_HANDLE(radv_video_session, vid, videoSession);
1004 
1005    for (unsigned i = 0; i < videoSessionBindMemoryCount; i++) {
1006       switch (pBindSessionMemoryInfos[i].memoryBindIndex) {
1007       case RADV_BIND_SESSION_CTX:
1008          copy_bind(&vid->sessionctx, &pBindSessionMemoryInfos[i]);
1009          break;
1010       case RADV_BIND_DECODER_CTX:
1011          copy_bind(&vid->ctx, &pBindSessionMemoryInfos[i]);
1012          break;
1013       default:
1014          assert(0);
1015          break;
1016       }
1017    }
1018    return VK_SUCCESS;
1019 }
1020 
1021 /* add a new set register command to the IB */
1022 static void
set_reg(struct radv_cmd_buffer * cmd_buffer,unsigned reg,uint32_t val)1023 set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
1024 {
1025    struct radeon_cmdbuf *cs = cmd_buffer->cs;
1026    radeon_emit(cs, RDECODE_PKT0(reg >> 2, 0));
1027    radeon_emit(cs, val);
1028 }
1029 
1030 static void
send_cmd(struct radv_cmd_buffer * cmd_buffer,unsigned cmd,struct radeon_winsys_bo * bo,uint32_t offset)1031 send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
1032 {
1033    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1034    const struct radv_physical_device *pdev = radv_device_physical(device);
1035    uint64_t addr;
1036 
1037    radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
1038    addr = radv_buffer_get_va(bo);
1039    addr += offset;
1040 
1041    if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
1042       radeon_check_space(device->ws, cmd_buffer->cs, 6);
1043       set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
1044       set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
1045       set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
1046       return;
1047    }
1048    switch (cmd) {
1049    case RDECODE_CMD_MSG_BUFFER:
1050       cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
1051       cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32);
1052       cmd_buffer->video.decode_buffer->msg_buffer_address_lo = (addr);
1053       break;
1054    case RDECODE_CMD_DPB_BUFFER:
1055       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
1056       cmd_buffer->video.decode_buffer->dpb_buffer_address_hi = (addr >> 32);
1057       cmd_buffer->video.decode_buffer->dpb_buffer_address_lo = (addr);
1058       break;
1059    case RDECODE_CMD_DECODING_TARGET_BUFFER:
1060       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
1061       cmd_buffer->video.decode_buffer->target_buffer_address_hi = (addr >> 32);
1062       cmd_buffer->video.decode_buffer->target_buffer_address_lo = (addr);
1063       break;
1064    case RDECODE_CMD_FEEDBACK_BUFFER:
1065       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
1066       cmd_buffer->video.decode_buffer->feedback_buffer_address_hi = (addr >> 32);
1067       cmd_buffer->video.decode_buffer->feedback_buffer_address_lo = (addr);
1068       break;
1069    case RDECODE_CMD_PROB_TBL_BUFFER:
1070       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
1071       cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
1072       cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_lo = (addr);
1073       break;
1074    case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
1075       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
1076       cmd_buffer->video.decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
1077       cmd_buffer->video.decode_buffer->session_contex_buffer_address_lo = (addr);
1078       break;
1079    case RDECODE_CMD_BITSTREAM_BUFFER:
1080       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
1081       cmd_buffer->video.decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
1082       cmd_buffer->video.decode_buffer->bitstream_buffer_address_lo = (addr);
1083       break;
1084    case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
1085       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
1086       cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
1087       cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_lo = (addr);
1088       break;
1089    case RDECODE_CMD_CONTEXT_BUFFER:
1090       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
1091       cmd_buffer->video.decode_buffer->context_buffer_address_hi = (addr >> 32);
1092       cmd_buffer->video.decode_buffer->context_buffer_address_lo = (addr);
1093       break;
1094    default:
1095       assert(0);
1096    }
1097 }
1098 
1099 static void
rvcn_dec_message_create(struct radv_video_session * vid,void * ptr,uint32_t size)1100 rvcn_dec_message_create(struct radv_video_session *vid, void *ptr, uint32_t size)
1101 {
1102    rvcn_dec_message_header_t *header = ptr;
1103    rvcn_dec_message_create_t *create = (void *)((char *)ptr + sizeof(rvcn_dec_message_header_t));
1104 
1105    memset(ptr, 0, size);
1106    header->header_size = sizeof(rvcn_dec_message_header_t);
1107    header->total_size = size;
1108    header->num_buffers = 1;
1109    header->msg_type = RDECODE_MSG_CREATE;
1110    header->stream_handle = vid->stream_handle;
1111    header->status_report_feedback_number = 0;
1112 
1113    header->index[0].message_id = RDECODE_MESSAGE_CREATE;
1114    header->index[0].offset = sizeof(rvcn_dec_message_header_t);
1115    header->index[0].size = sizeof(rvcn_dec_message_create_t);
1116    header->index[0].filled = 0;
1117 
1118    create->stream_type = vid->stream_type;
1119    create->session_flags = 0;
1120    create->width_in_samples = vid->vk.max_coded.width;
1121    create->height_in_samples = vid->vk.max_coded.height;
1122 }
1123 
1124 static void
rvcn_dec_message_feedback(void * ptr)1125 rvcn_dec_message_feedback(void *ptr)
1126 {
1127    rvcn_dec_feedback_header_t *header = (void *)ptr;
1128 
1129    header->header_size = sizeof(rvcn_dec_feedback_header_t);
1130    header->total_size = sizeof(rvcn_dec_feedback_header_t);
1131    header->num_buffers = 0;
1132 }
1133 
1134 static const uint8_t h264_levels[] = {10, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52, 60, 61, 62};
1135 static uint8_t
get_h264_level(StdVideoH264LevelIdc level)1136 get_h264_level(StdVideoH264LevelIdc level)
1137 {
1138    assert(level <= STD_VIDEO_H264_LEVEL_IDC_6_2);
1139    return h264_levels[level];
1140 }
1141 
1142 static void
update_h264_scaling(unsigned char scaling_list_4x4[6][16],unsigned char scaling_list_8x8[2][64],const StdVideoH264ScalingLists * scaling_lists)1143 update_h264_scaling(unsigned char scaling_list_4x4[6][16], unsigned char scaling_list_8x8[2][64],
1144                     const StdVideoH264ScalingLists *scaling_lists)
1145 {
1146    for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) {
1147       for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS; j++)
1148          scaling_list_4x4[i][vl_zscan_normal_16[j]] = scaling_lists->ScalingList4x4[i][j];
1149    }
1150 
1151    for (int i = 0; i < 2; i++) {
1152       for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS; j++)
1153          scaling_list_8x8[i][vl_zscan_normal[j]] = scaling_lists->ScalingList8x8[i][j];
1154    }
1155 }
1156 
1157 static rvcn_dec_message_avc_t
get_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)1158 get_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
1159              const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
1160              uint32_t *height_in_samples, void *it_ptr)
1161 {
1162    rvcn_dec_message_avc_t result;
1163    const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
1164       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
1165 
1166    *slice_offset = h264_pic_info->pSliceOffsets[0];
1167 
1168    memset(&result, 0, sizeof(result));
1169 
1170    assert(params->vk.h264_dec.h264_sps_count > 0);
1171    const StdVideoH264SequenceParameterSet *sps =
1172       vk_video_find_h264_dec_std_sps(&params->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
1173    switch (sps->profile_idc) {
1174    case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
1175       result.profile = RDECODE_H264_PROFILE_BASELINE;
1176       break;
1177    case STD_VIDEO_H264_PROFILE_IDC_MAIN:
1178       result.profile = RDECODE_H264_PROFILE_MAIN;
1179       break;
1180    case STD_VIDEO_H264_PROFILE_IDC_HIGH:
1181       result.profile = RDECODE_H264_PROFILE_HIGH;
1182       break;
1183    default:
1184       fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
1185       result.profile = RDECODE_H264_PROFILE_MAIN;
1186       break;
1187    }
1188 
1189    *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
1190    *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
1191    if (!sps->flags.frame_mbs_only_flag)
1192       *height_in_samples *= 2;
1193    result.level = get_h264_level(sps->level_idc);
1194 
1195    result.sps_info_flags = 0;
1196 
1197    result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
1198    result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
1199    result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
1200    result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
1201    if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
1202       result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
1203 
1204    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
1205    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
1206    result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
1207    result.pic_order_cnt_type = sps->pic_order_cnt_type;
1208    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
1209 
1210    result.chroma_format = sps->chroma_format_idc;
1211 
1212    const StdVideoH264PictureParameterSet *pps =
1213       vk_video_find_h264_dec_std_pps(&params->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
1214    result.pps_info_flags = 0;
1215    result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
1216    result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
1217    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
1218    result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
1219    result.pps_info_flags |= pps->weighted_bipred_idc << 4;
1220    result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
1221    result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
1222    result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
1223 
1224    result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
1225    result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
1226    result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
1227 
1228    StdVideoH264ScalingLists scaling_lists;
1229    vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
1230    update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
1231 
1232    memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
1233    memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
1234    memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
1235 
1236    result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
1237    result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
1238 
1239    result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
1240    result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
1241 
1242    result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
1243 
1244    result.num_ref_frames = sps->max_num_ref_frames;
1245    result.non_existing_frame_flags = 0;
1246    result.used_for_reference_flags = 0;
1247 
1248    memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
1249    memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
1250    for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
1251       int idx = frame_info->pReferenceSlots[i].slotIndex;
1252       const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
1253          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
1254 
1255       result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
1256       result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
1257       result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
1258 
1259       result.ref_frame_list[i] = idx;
1260 
1261       if (dpb_slot->pStdReferenceInfo->flags.top_field_flag)
1262          result.used_for_reference_flags |= (1 << (2 * i));
1263       if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
1264          result.used_for_reference_flags |= (1 << (2 * i + 1));
1265 
1266       if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
1267          result.used_for_reference_flags |= (3 << (2 * i));
1268 
1269       if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
1270          result.ref_frame_list[i] |= 0x80;
1271       if (dpb_slot->pStdReferenceInfo->flags.is_non_existing)
1272          result.non_existing_frame_flags |= 1 << i;
1273    }
1274    result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
1275    result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
1276 
1277    return result;
1278 }
1279 
1280 static void
update_h265_scaling(void * it_ptr,const StdVideoH265ScalingLists * scaling_lists)1281 update_h265_scaling(void *it_ptr, const StdVideoH265ScalingLists *scaling_lists)
1282 {
1283    if (scaling_lists) {
1284       memcpy(it_ptr, scaling_lists->ScalingList4x4,
1285              STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1286       memcpy((char *)it_ptr + 96, scaling_lists->ScalingList8x8,
1287              STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1288       memcpy((char *)it_ptr + 480, scaling_lists->ScalingList16x16,
1289              STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1290       memcpy((char *)it_ptr + 864, scaling_lists->ScalingList32x32,
1291              STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1292    } else {
1293       memset(it_ptr, 0, STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1294       memset((char *)it_ptr + 96, 0,
1295              STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1296       memset((char *)it_ptr + 480, 0,
1297              STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1298       memset((char *)it_ptr + 864, 0,
1299              STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1300    }
1301 }
1302 
1303 static rvcn_dec_message_hevc_t
get_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)1304 get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1305              const struct VkVideoDecodeInfoKHR *frame_info,
1306              uint32_t *width_in_samples,
1307              uint32_t *height_in_samples,
1308              void *it_ptr)
1309 {
1310    const struct radv_physical_device *pdev = radv_device_physical(device);
1311    rvcn_dec_message_hevc_t result;
1312    int i, j;
1313    const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
1314       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
1315    memset(&result, 0, sizeof(result));
1316 
1317    const StdVideoH265SequenceParameterSet *sps =
1318       vk_video_find_h265_dec_std_sps(&params->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
1319    const StdVideoH265PictureParameterSet *pps =
1320       vk_video_find_h265_dec_std_pps(&params->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
1321 
1322    result.sps_info_flags = 0;
1323    result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
1324    result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
1325    result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
1326    result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
1327    result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
1328    result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
1329    result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
1330    result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
1331    result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
1332 
1333    if (pdev->info.family == CHIP_CARRIZO)
1334       result.sps_info_flags |= 1 << 9;
1335 
1336    if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) {
1337       result.sps_info_flags |= 1 << 11;
1338    }
1339    result.st_rps_bits = h265_pic_info->pStdPictureInfo->NumBitsForSTRefPicSetInSlice;
1340 
1341    *width_in_samples = sps->pic_width_in_luma_samples;
1342    *height_in_samples = sps->pic_height_in_luma_samples;
1343    result.chroma_format = sps->chroma_format_idc;
1344    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
1345    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
1346    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
1347    result.sps_max_dec_pic_buffering_minus1 =
1348       sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
1349    result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
1350    result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
1351    result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
1352    result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
1353    result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
1354    result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
1355    if (sps->flags.pcm_enabled_flag) {
1356       result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
1357       result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
1358       result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
1359       result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
1360    }
1361    result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
1362 
1363    result.pps_info_flags = 0;
1364    result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
1365    result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
1366    result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
1367    result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
1368    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
1369    result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
1370    result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
1371    result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
1372    result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
1373    result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
1374    result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
1375    result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
1376    result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
1377    result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
1378    result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
1379    result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
1380    result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
1381    result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
1382    result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
1383    result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
1384 
1385    result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
1386    result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
1387    result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
1388    result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
1389    result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
1390    result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
1391    result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
1392    result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
1393    result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
1394    result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
1395    result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
1396    result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
1397    result.init_qp_minus26 = pps->init_qp_minus26;
1398 
1399    for (i = 0; i < 19; ++i)
1400       result.column_width_minus1[i] = pps->column_width_minus1[i];
1401 
1402    for (i = 0; i < 21; ++i)
1403       result.row_height_minus1[i] = pps->row_height_minus1[i];
1404 
1405    result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
1406    result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
1407 
1408    uint8_t idxs[16];
1409    memset(result.poc_list, 0, 16 * sizeof(int));
1410    memset(result.ref_pic_list, 0x7f, 16);
1411    memset(idxs, 0xff, 16);
1412    for (i = 0; i < frame_info->referenceSlotCount; i++) {
1413       const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
1414          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
1415       int idx = frame_info->pReferenceSlots[i].slotIndex;
1416       result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
1417       result.ref_pic_list[i] = idx;
1418       idxs[idx] = i;
1419    }
1420    result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
1421 
1422 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
1423    for (i = 0; i < 8; ++i)
1424       result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
1425 
1426    for (i = 0; i < 8; ++i)
1427       result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
1428 
1429    for (i = 0; i < 8; ++i)
1430       result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
1431 
1432    const StdVideoH265ScalingLists *scaling_lists = NULL;
1433    if (pps->flags.pps_scaling_list_data_present_flag)
1434       scaling_lists = pps->pScalingLists;
1435    else if (sps->flags.sps_scaling_list_data_present_flag)
1436       scaling_lists = sps->pScalingLists;
1437 
1438    update_h265_scaling(it_ptr, scaling_lists);
1439 
1440    if (scaling_lists) {
1441       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
1442          result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
1443 
1444       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
1445          result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
1446    }
1447 
1448    for (i = 0; i < 2; i++) {
1449       for (j = 0; j < 15; j++)
1450          result.direct_reflist[i][j] = 0xff;
1451    }
1452 
1453    if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
1454       if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
1455          result.p010_mode = 1;
1456          result.msb_mode = 1;
1457       } else {
1458          result.p010_mode = 0;
1459          result.luma_10to8 = 5;
1460          result.chroma_10to8 = 5;
1461          result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
1462          result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
1463       }
1464    }
1465 
1466    return result;
1467 }
1468 
1469 enum {
1470    AV1_RESTORE_NONE = 0,
1471    AV1_RESTORE_WIENER = 1,
1472    AV1_RESTORE_SGRPROJ = 2,
1473    AV1_RESTORE_SWITCHABLE = 3,
1474 };
1475 
1476 #define AV1_SUPERRES_NUM       8
1477 #define AV1_SUPERRES_DENOM_MIN 9
1478 
1479 static rvcn_dec_message_av1_t
get_av1_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,void * probs_ptr,int * update_reference_slot)1480 get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1481             const struct VkVideoDecodeInfoKHR *frame_info, void *probs_ptr, int *update_reference_slot)
1482 {
1483    rvcn_dec_message_av1_t result;
1484    unsigned i, j;
1485    const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
1486       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
1487    const StdVideoDecodeAV1PictureInfo *pi = av1_pic_info->pStdPictureInfo;
1488    const StdVideoAV1SequenceHeader *seq_hdr = &params->vk.av1_dec.seq_hdr.base;
1489    memset(&result, 0, sizeof(result));
1490 
1491    const int intra_only_decoding = vid->vk.max_dpb_slots == 0;
1492    if (intra_only_decoding)
1493       assert(frame_info->pSetupReferenceSlot == NULL);
1494 
1495    *update_reference_slot = !(intra_only_decoding || pi->refresh_frame_flags == 0);
1496 
1497    result.frame_header_flags = (1 /*av1_pic_info->frame_header->flags.show_frame*/
1498                                 << RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_SHIFT) &
1499                                RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_MASK;
1500 
1501    result.frame_header_flags |= (pi->flags.disable_cdf_update << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_SHIFT) &
1502                                 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_MASK;
1503 
1504    result.frame_header_flags |=
1505       ((!pi->flags.disable_frame_end_update_cdf) << RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_SHIFT) &
1506       RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_MASK;
1507 
1508    result.frame_header_flags |=
1509       ((pi->frame_type == STD_VIDEO_AV1_FRAME_TYPE_INTRA_ONLY) << RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_SHIFT) &
1510       RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_MASK;
1511 
1512    result.frame_header_flags |= (pi->flags.allow_intrabc << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_SHIFT) &
1513                                 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_MASK;
1514 
1515    result.frame_header_flags |=
1516       (pi->flags.allow_high_precision_mv << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
1517       RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
1518 
1519    result.frame_header_flags |=
1520       (seq_hdr->pColorConfig->flags.mono_chrome << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
1521       RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
1522 
1523    result.frame_header_flags |= (pi->flags.skip_mode_present << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
1524                                 RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
1525 
1526    result.frame_header_flags |=
1527       (pi->pQuantization->flags.using_qmatrix << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
1528       RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
1529 
1530    result.frame_header_flags |=
1531       (seq_hdr->flags.enable_filter_intra << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
1532       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_MASK;
1533 
1534    result.frame_header_flags |=
1535       (seq_hdr->flags.enable_intra_edge_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_SHIFT) &
1536       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_MASK;
1537 
1538    result.frame_header_flags |=
1539       (seq_hdr->flags.enable_interintra_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_SHIFT) &
1540       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_MASK;
1541 
1542    result.frame_header_flags |=
1543       (seq_hdr->flags.enable_masked_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_SHIFT) &
1544       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_MASK;
1545 
1546    result.frame_header_flags |=
1547       (pi->flags.allow_warped_motion << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_SHIFT) &
1548       RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_MASK;
1549 
1550    result.frame_header_flags |=
1551       (seq_hdr->flags.enable_dual_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_SHIFT) &
1552       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_MASK;
1553 
1554    result.frame_header_flags |=
1555       (seq_hdr->flags.enable_order_hint << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_SHIFT) &
1556       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_MASK;
1557 
1558    result.frame_header_flags |= (seq_hdr->flags.enable_jnt_comp << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_SHIFT) &
1559                                 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_MASK;
1560 
1561    result.frame_header_flags |= (pi->flags.use_ref_frame_mvs << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_SHIFT) &
1562                                 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_MASK;
1563 
1564    result.frame_header_flags |=
1565       (pi->flags.allow_screen_content_tools << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_SHIFT) &
1566       RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_MASK;
1567 
1568    result.frame_header_flags |=
1569       (pi->flags.force_integer_mv << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
1570       RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
1571 
1572    result.frame_header_flags |=
1573       (pi->pLoopFilter->flags.loop_filter_delta_enabled << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
1574       RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
1575 
1576    result.frame_header_flags |=
1577       (pi->pLoopFilter->flags.loop_filter_delta_update << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
1578       RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
1579 
1580    result.frame_header_flags |= (pi->flags.delta_q_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
1581                                 RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
1582 
1583    result.frame_header_flags |= (pi->flags.delta_lf_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_SHIFT) &
1584                                 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_MASK;
1585 
1586    result.frame_header_flags |= (pi->flags.reduced_tx_set << RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_SHIFT) &
1587                                 RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_MASK;
1588 
1589    result.frame_header_flags |=
1590       (pi->flags.segmentation_enabled << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_SHIFT) &
1591       RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_MASK;
1592 
1593    result.frame_header_flags |=
1594       (pi->flags.segmentation_update_map << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_SHIFT) &
1595       RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_MASK;
1596 
1597    result.frame_header_flags |=
1598       (pi->flags.segmentation_temporal_update << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
1599       RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_MASK;
1600 
1601    result.frame_header_flags |= (pi->flags.delta_lf_multi << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_SHIFT) &
1602                                 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_MASK;
1603 
1604    result.frame_header_flags |=
1605       (pi->flags.is_motion_mode_switchable << RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_SHIFT) &
1606       RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_MASK;
1607 
1608    result.frame_header_flags |= ((!intra_only_decoding ? !(pi->refresh_frame_flags) : 1)
1609                                  << RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_SHIFT) &
1610                                 RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_MASK;
1611 
1612    result.frame_header_flags |=
1613       ((!seq_hdr->flags.enable_ref_frame_mvs) << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_SHIFT) &
1614       RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_MASK;
1615 
1616    result.current_frame_id = pi->current_frame_id;
1617    result.frame_offset = pi->OrderHint;
1618    result.profile = seq_hdr->seq_profile;
1619    result.is_annexb = 0;
1620 
1621    result.frame_type = pi->frame_type;
1622    result.primary_ref_frame = pi->primary_ref_frame;
1623 
1624    const struct VkVideoDecodeAV1DpbSlotInfoKHR *setup_dpb_slot =
1625       intra_only_decoding
1626          ? NULL
1627          : vk_find_struct_const(frame_info->pSetupReferenceSlot->pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1628 
1629    /* The AMD FW interface does not need this information, since it's
1630     * redundant with the information derivable from the current frame header,
1631     * which the FW is parsing and tracking.
1632     */
1633    (void)setup_dpb_slot;
1634    result.curr_pic_idx = intra_only_decoding ? 0 : frame_info->pSetupReferenceSlot->slotIndex;
1635 
1636    result.sb_size = seq_hdr->flags.use_128x128_superblock;
1637    result.interp_filter = pi->interpolation_filter;
1638    for (i = 0; i < 2; ++i)
1639       result.filter_level[i] = pi->pLoopFilter->loop_filter_level[i];
1640    result.filter_level_u = pi->pLoopFilter->loop_filter_level[2];
1641    result.filter_level_v = pi->pLoopFilter->loop_filter_level[3];
1642    result.sharpness_level = pi->pLoopFilter->loop_filter_sharpness;
1643    for (i = 0; i < 8; ++i)
1644       result.ref_deltas[i] = pi->pLoopFilter->loop_filter_ref_deltas[i];
1645    for (i = 0; i < 2; ++i)
1646       result.mode_deltas[i] = pi->pLoopFilter->loop_filter_mode_deltas[i];
1647    result.base_qindex = pi->pQuantization->base_q_idx;
1648    result.y_dc_delta_q = pi->pQuantization->DeltaQYDc;
1649    result.u_dc_delta_q = pi->pQuantization->DeltaQUDc;
1650    result.v_dc_delta_q = pi->pQuantization->DeltaQVDc;
1651    result.u_ac_delta_q = pi->pQuantization->DeltaQUAc;
1652    result.v_ac_delta_q = pi->pQuantization->DeltaQVAc;
1653 
1654    if (pi->pQuantization->flags.using_qmatrix) {
1655       result.qm_y = pi->pQuantization->qm_y | 0xf0;
1656       result.qm_u = pi->pQuantization->qm_u | 0xf0;
1657       result.qm_v = pi->pQuantization->qm_v | 0xf0;
1658    } else {
1659       result.qm_y = 0xff;
1660       result.qm_u = 0xff;
1661       result.qm_v = 0xff;
1662    }
1663    result.delta_q_res = (1 << pi->delta_q_res);
1664    result.delta_lf_res = (1 << pi->delta_lf_res);
1665    result.tile_cols = pi->pTileInfo->TileCols;
1666    result.tile_rows = pi->pTileInfo->TileRows;
1667 
1668    result.tx_mode = pi->TxMode;
1669    result.reference_mode = (pi->flags.reference_select == 1) ? 2 : 0;
1670    result.chroma_format = seq_hdr->pColorConfig->flags.mono_chrome ? 0 : 1;
1671    result.tile_size_bytes = pi->pTileInfo->tile_size_bytes_minus_1;
1672    result.context_update_tile_id = pi->pTileInfo->context_update_tile_id;
1673 
1674    for (i = 0; i < result.tile_cols; i++)
1675       result.tile_col_start_sb[i] = pi->pTileInfo->pMiColStarts[i];
1676    result.tile_col_start_sb[result.tile_cols] =
1677       result.tile_col_start_sb[result.tile_cols - 1] + pi->pTileInfo->pWidthInSbsMinus1[result.tile_cols - 1] + 1;
1678    for (i = 0; i < pi->pTileInfo->TileRows; i++)
1679       result.tile_row_start_sb[i] = pi->pTileInfo->pMiRowStarts[i];
1680    result.tile_row_start_sb[result.tile_rows] =
1681       result.tile_row_start_sb[result.tile_rows - 1] + pi->pTileInfo->pHeightInSbsMinus1[result.tile_rows - 1] + 1;
1682 
1683    result.max_width = seq_hdr->max_frame_width_minus_1 + 1;
1684    result.max_height = seq_hdr->max_frame_height_minus_1 + 1;
1685    VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
1686    result.superres_scale_denominator =
1687       pi->flags.use_superres ? pi->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM;
1688    if (pi->flags.use_superres) {
1689       result.width =
1690          (frameExtent.width * 8 + result.superres_scale_denominator / 2) / result.superres_scale_denominator;
1691    } else {
1692       result.width = frameExtent.width;
1693    }
1694    result.height = frameExtent.height;
1695 
1696    result.superres_upscaled_width = frameExtent.width;
1697 
1698    result.order_hint_bits = seq_hdr->order_hint_bits_minus_1 + 1;
1699 
1700    /* The VCN FW will evict references that aren't specified in
1701     * ref_frame_map, even if they are still valid. To prevent this we will
1702     * specify every possible reference in ref_frame_map.
1703     */
1704    uint16_t used_slots = (1 << result.curr_pic_idx);
1705    for (i = 0; i < frame_info->referenceSlotCount; i++) {
1706       const struct VkVideoDecodeAV1DpbSlotInfoKHR *ref_dpb_slot =
1707          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1708       (void)ref_dpb_slot; /* Again, the FW is tracking this information for us, so no need for it. */
1709       (void)ref_dpb_slot; /* the FW is tracking this information for us, so no need for it. */
1710       int32_t slotIndex = frame_info->pReferenceSlots[i].slotIndex;
1711       result.ref_frame_map[i] = slotIndex;
1712       used_slots |= 1 << slotIndex;
1713    }
1714    /* Go through all the slots and fill in the ones that haven't been used. */
1715    for (j = 0; j < STD_VIDEO_AV1_NUM_REF_FRAMES + 1; j++) {
1716       if ((used_slots & (1 << j)) == 0) {
1717          result.ref_frame_map[i] = j;
1718          used_slots |= 1 << j;
1719          i++;
1720       }
1721    }
1722 
1723    assert(used_slots == 0x1ff && i == STD_VIDEO_AV1_NUM_REF_FRAMES);
1724 
1725    for (i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; ++i) {
1726       result.frame_refs[i] =
1727          av1_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : av1_pic_info->referenceNameSlotIndices[i];
1728    }
1729 
1730    result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = seq_hdr->pColorConfig->BitDepth - 8;
1731 
1732    int16_t *feature_data = (int16_t *)probs_ptr;
1733    int fd_idx = 0;
1734    for (i = 0; i < 8; ++i) {
1735       result.feature_mask[i] = pi->pSegmentation->FeatureEnabled[i];
1736       for (j = 0; j < 8; ++j) {
1737          result.feature_data[i][j] = pi->pSegmentation->FeatureData[i][j];
1738          feature_data[fd_idx++] = result.feature_data[i][j];
1739       }
1740    }
1741 
1742    memcpy(((char *)probs_ptr + 128), result.feature_mask, 8);
1743    result.cdef_damping = pi->pCDEF->cdef_damping_minus_3 + 3;
1744    result.cdef_bits = pi->pCDEF->cdef_bits;
1745    for (i = 0; i < 8; ++i) {
1746       result.cdef_strengths[i] = (pi->pCDEF->cdef_y_pri_strength[i] << 2) + pi->pCDEF->cdef_y_sec_strength[i];
1747       result.cdef_uv_strengths[i] = (pi->pCDEF->cdef_uv_pri_strength[i] << 2) + pi->pCDEF->cdef_uv_sec_strength[i];
1748    }
1749 
1750    if (pi->flags.UsesLr) {
1751       for (int plane = 0; plane < STD_VIDEO_AV1_MAX_NUM_PLANES; plane++) {
1752          result.frame_restoration_type[plane] = pi->pLoopRestoration->FrameRestorationType[plane];
1753          result.log2_restoration_unit_size_minus5[plane] = pi->pLoopRestoration->LoopRestorationSize[plane];
1754       }
1755    }
1756 
1757    if (seq_hdr->pColorConfig->BitDepth > 8) {
1758       if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 ||
1759           vid->vk.picture_format == VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) {
1760          result.p010_mode = 1;
1761          result.msb_mode = 1;
1762       } else {
1763          result.luma_10to8 = 1;
1764          result.chroma_10to8 = 1;
1765       }
1766    }
1767 
1768    result.preskip_segid = 0;
1769    result.last_active_segid = 0;
1770    for (i = 0; i < 8; i++) {
1771       for (j = 0; j < 8; j++) {
1772          if (result.feature_mask[i] & (1 << j)) {
1773             result.last_active_segid = i;
1774             if (j >= 5)
1775                result.preskip_segid = 1;
1776          }
1777       }
1778    }
1779    result.seg_lossless_flag = 0;
1780    for (i = 0; i < 8; ++i) {
1781       int av1_get_qindex, qindex;
1782       int segfeature_active = result.feature_mask[i] & (1 << 0);
1783       if (segfeature_active) {
1784          int seg_qindex = result.base_qindex + result.feature_data[i][0];
1785          av1_get_qindex = seg_qindex < 0 ? 0 : (seg_qindex > 255 ? 255 : seg_qindex);
1786       } else {
1787          av1_get_qindex = result.base_qindex;
1788       }
1789       qindex = pi->flags.segmentation_enabled ? av1_get_qindex : result.base_qindex;
1790       result.seg_lossless_flag |= (((qindex == 0) && result.y_dc_delta_q == 0 && result.u_dc_delta_q == 0 &&
1791                                     result.v_dc_delta_q == 0 && result.u_ac_delta_q == 0 && result.v_ac_delta_q == 0)
1792                                    << i);
1793    }
1794 
1795    rvcn_dec_film_grain_params_t *fg_params = &result.film_grain;
1796    fg_params->apply_grain = pi->flags.apply_grain;
1797    if (fg_params->apply_grain) {
1798       rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)((char *)probs_ptr + 256);
1799       fg_params->random_seed = pi->pFilmGrain->grain_seed;
1800       fg_params->grain_scale_shift = pi->pFilmGrain->grain_scale_shift;
1801       fg_params->scaling_shift = pi->pFilmGrain->grain_scaling_minus_8 + 8;
1802       fg_params->chroma_scaling_from_luma = pi->pFilmGrain->flags.chroma_scaling_from_luma;
1803       fg_params->num_y_points = pi->pFilmGrain->num_y_points;
1804       fg_params->num_cb_points = pi->pFilmGrain->num_cb_points;
1805       fg_params->num_cr_points = pi->pFilmGrain->num_cr_points;
1806       fg_params->cb_mult = pi->pFilmGrain->cb_mult;
1807       fg_params->cb_luma_mult = pi->pFilmGrain->cb_luma_mult;
1808       fg_params->cb_offset = pi->pFilmGrain->cb_offset;
1809       fg_params->cr_mult = pi->pFilmGrain->cr_mult;
1810       fg_params->cr_luma_mult = pi->pFilmGrain->cr_luma_mult;
1811       fg_params->cr_offset = pi->pFilmGrain->cr_offset;
1812       fg_params->bit_depth_minus_8 = result.bit_depth_luma_minus8;
1813       for (i = 0; i < fg_params->num_y_points; ++i) {
1814          fg_params->scaling_points_y[i][0] = pi->pFilmGrain->point_y_value[i];
1815          fg_params->scaling_points_y[i][1] = pi->pFilmGrain->point_y_scaling[i];
1816       }
1817       for (i = 0; i < fg_params->num_cb_points; ++i) {
1818          fg_params->scaling_points_cb[i][0] = pi->pFilmGrain->point_cb_value[i];
1819          fg_params->scaling_points_cb[i][1] = pi->pFilmGrain->point_cb_scaling[i];
1820       }
1821       for (i = 0; i < fg_params->num_cr_points; ++i) {
1822          fg_params->scaling_points_cr[i][0] = pi->pFilmGrain->point_cr_value[i];
1823          fg_params->scaling_points_cr[i][1] = pi->pFilmGrain->point_cr_scaling[i];
1824       }
1825 
1826       fg_params->ar_coeff_lag = pi->pFilmGrain->ar_coeff_lag;
1827       fg_params->ar_coeff_shift = pi->pFilmGrain->ar_coeff_shift_minus_6 + 6;
1828 
1829       for (i = 0; i < 24; ++i)
1830          fg_params->ar_coeffs_y[i] = pi->pFilmGrain->ar_coeffs_y_plus_128[i] - 128;
1831 
1832       for (i = 0; i < 25; ++i) {
1833          fg_params->ar_coeffs_cb[i] = pi->pFilmGrain->ar_coeffs_cb_plus_128[i] - 128;
1834          fg_params->ar_coeffs_cr[i] = pi->pFilmGrain->ar_coeffs_cr_plus_128[i] - 128;
1835       }
1836 
1837       fg_params->overlap_flag = pi->pFilmGrain->flags.overlap_flag;
1838       fg_params->clip_to_restricted_range = pi->pFilmGrain->flags.clip_to_restricted_range;
1839       ac_vcn_av1_init_film_grain_buffer(fg_params, fg_buf);
1840    }
1841 
1842    result.uncompressed_header_size = 0;
1843    for (i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; ++i) {
1844       result.global_motion[i].wmtype = pi->pGlobalMotion->GmType[i];
1845       for (j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; ++j)
1846          result.global_motion[i].wmmat[j] = pi->pGlobalMotion->gm_params[i][j];
1847    }
1848    for (i = 0; i < av1_pic_info->tileCount && i < 256; ++i) {
1849       result.tile_info[i].offset = av1_pic_info->pTileOffsets[i];
1850       result.tile_info[i].size = av1_pic_info->pTileSizes[i];
1851    }
1852 
1853    return result;
1854 }
1855 
1856 
1857 static bool
rvcn_dec_message_decode(struct radv_cmd_buffer * cmd_buffer,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_probs_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)1858 rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid,
1859                         struct radv_video_session_params *params, void *ptr, void *it_probs_ptr, uint32_t *slice_offset,
1860                         const struct VkVideoDecodeInfoKHR *frame_info)
1861 {
1862    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1863    const struct radv_physical_device *pdev = radv_device_physical(device);
1864    rvcn_dec_message_header_t *header;
1865    rvcn_dec_message_index_t *index_codec;
1866    rvcn_dec_message_decode_t *decode;
1867    rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
1868    rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
1869    void *codec;
1870    unsigned sizes = 0, offset_decode, offset_codec, offset_dynamic_dpb;
1871    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
1872    struct radv_image *img = dst_iv->image;
1873    struct radv_image_plane *luma = &img->planes[0];
1874    struct radv_image_plane *chroma = &img->planes[1];
1875 
1876    header = ptr;
1877    sizes += sizeof(rvcn_dec_message_header_t);
1878 
1879    index_codec = (void *)((char *)header + sizes);
1880    sizes += sizeof(rvcn_dec_message_index_t);
1881 
1882    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
1883       index_dynamic_dpb = (void *)((char *)header + sizes);
1884       sizes += sizeof(rvcn_dec_message_index_t);
1885    }
1886 
1887    offset_decode = sizes;
1888    decode = (void *)((char *)header + sizes);
1889    sizes += sizeof(rvcn_dec_message_decode_t);
1890 
1891    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
1892       offset_dynamic_dpb = sizes;
1893       dynamic_dpb_t2 = (void *)((char *)header + sizes);
1894       sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1895    }
1896 
1897    offset_codec = sizes;
1898    codec = (void *)((char *)header + sizes);
1899 
1900    memset(ptr, 0, sizes);
1901 
1902    header->header_size = sizeof(rvcn_dec_message_header_t);
1903    header->total_size = sizes;
1904    header->msg_type = RDECODE_MSG_DECODE;
1905    header->stream_handle = vid->stream_handle;
1906    header->status_report_feedback_number = vid->dbg_frame_cnt++;
1907 
1908    header->index[0].message_id = RDECODE_MESSAGE_DECODE;
1909    header->index[0].offset = offset_decode;
1910    header->index[0].size = sizeof(rvcn_dec_message_decode_t);
1911    header->index[0].filled = 0;
1912    header->num_buffers = 1;
1913 
1914    index_codec->offset = offset_codec;
1915    index_codec->filled = 0;
1916    ++header->num_buffers;
1917 
1918    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
1919       index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
1920       index_dynamic_dpb->offset = offset_dynamic_dpb;
1921       index_dynamic_dpb->filled = 0;
1922       ++header->num_buffers;
1923       index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1924    }
1925 
1926    decode->stream_type = vid->stream_type;
1927    decode->decode_flags = 0;
1928    decode->width_in_samples = frame_info->dstPictureResource.codedExtent.width;
1929    decode->height_in_samples = frame_info->dstPictureResource.codedExtent.height;
1930 
1931    decode->bsd_size = frame_info->srcBufferRange;
1932 
1933    decode->dt_size = dst_iv->image->planes[0].surface.total_size + dst_iv->image->planes[1].surface.total_size;
1934    decode->sct_size = 0;
1935    decode->sc_coeff_size = 0;
1936 
1937    decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
1938 
1939    decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
1940    decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
1941 
1942    if (luma->surface.meta_offset) {
1943       fprintf(stderr, "DCC SURFACES NOT SUPPORTED.\n");
1944       return false;
1945    }
1946 
1947    decode->dt_tiling_mode = 0;
1948    decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
1949    decode->dt_array_mode = pdev->vid_addr_gfx_mode;
1950    decode->dt_field_mode = vid->interlaced ? 1 : 0;
1951    decode->dt_surf_tile_config = 0;
1952    decode->dt_uv_surf_tile_config = 0;
1953 
1954    decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
1955    decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
1956 
1957    if (decode->dt_field_mode) {
1958       decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
1959       decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
1960    } else {
1961       decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
1962       decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
1963    }
1964    if (vid->stream_type == RDECODE_CODEC_AV1)
1965       decode->db_pitch_uv = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
1966 
1967    *slice_offset = 0;
1968 
1969    /* Intra-only decoding will only work without a setup slot for AV1
1970     * currently, other codecs require the application to pass a
1971     * setup slot for this use-case, since the FW is not able to skip write-out
1972     * for H26X.  In order to fix that properly, additional scratch space will
1973     * be needed in the video session just for intra-only DPB targets.
1974     */
1975    int dpb_update_required = 1;
1976 
1977    switch (vid->vk.op) {
1978    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
1979       index_codec->size = sizeof(rvcn_dec_message_avc_t);
1980       rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples,
1981                                                 &decode->height_in_samples, it_probs_ptr);
1982       memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
1983       index_codec->message_id = RDECODE_MESSAGE_AVC;
1984       break;
1985    }
1986    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
1987       index_codec->size = sizeof(rvcn_dec_message_hevc_t);
1988       rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info,
1989                                                   &decode->width_in_samples,
1990                                                   &decode->height_in_samples,
1991                                                   it_probs_ptr);
1992       memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t));
1993       index_codec->message_id = RDECODE_MESSAGE_HEVC;
1994       break;
1995    }
1996    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
1997       index_codec->size = sizeof(rvcn_dec_message_av1_t);
1998       rvcn_dec_message_av1_t av1 = get_av1_msg(device, vid, params, frame_info, it_probs_ptr, &dpb_update_required);
1999       memcpy(codec, (void *)&av1, sizeof(rvcn_dec_message_av1_t));
2000       index_codec->message_id = RDECODE_MESSAGE_AV1;
2001       assert(frame_info->referenceSlotCount < 9);
2002       break;
2003    }
2004    default:
2005       unreachable("unknown operation");
2006    }
2007 
2008    if (dpb_update_required)
2009       assert(frame_info->pSetupReferenceSlot != NULL);
2010 
2011    struct radv_image_view *dpb_iv =
2012       dpb_update_required
2013          ? radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding)
2014          : NULL;
2015    struct radv_image *dpb = dpb_update_required ? dpb_iv->image : img;
2016 
2017    int dpb_array_idx = 0;
2018    if (dpb_update_required) {
2019       if (dpb_iv->vk.view_type == VK_IMAGE_VIEW_TYPE_2D_ARRAY)
2020          dpb_array_idx = frame_info->pSetupReferenceSlot->pPictureResource->baseArrayLayer;
2021    }
2022 
2023    decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2024    decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
2025    decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height;
2026    decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode;
2027    decode->db_array_mode = pdev->vid_addr_gfx_mode;
2028 
2029    decode->hw_ctxt_size = vid->ctx.size;
2030 
2031    if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
2032       return true;
2033 
2034    uint64_t addr;
2035    radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2036    addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2037 
2038    addr += dpb_array_idx * (dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size);
2039    dynamic_dpb_t2->dpbCurrLo = addr;
2040    dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2041 
2042    if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
2043       /* The following loop will fill in the references for the current frame,
2044        * this ensures all DPB addresses are "valid" (pointing at the current
2045        * decode target), so that the firmware doesn't evict things it should not.
2046        * It will not perform any actual writes to these dummy slots.
2047        */
2048       for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) {
2049          dynamic_dpb_t2->dpbAddrHi[i] = addr;
2050          dynamic_dpb_t2->dpbAddrLo[i] = addr >> 32;
2051       }
2052    }
2053 
2054    for (int i = 0; i < frame_info->referenceSlotCount; i++) {
2055       int32_t slot_idx = frame_info->pReferenceSlots[i].slotIndex;
2056       assert(slot_idx >= 0 && slot_idx < 16);
2057       struct radv_image_view *f_dpb_iv =
2058          radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
2059       assert(f_dpb_iv != NULL);
2060       struct radv_image *dpb_img = f_dpb_iv->image;
2061       int f_dpb_array_idx = 0;
2062       if (f_dpb_iv->vk.view_type == VK_IMAGE_VIEW_TYPE_2D_ARRAY)
2063          f_dpb_array_idx = frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer;
2064 
2065       radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
2066       addr = radv_buffer_get_va(dpb_img->bindings[0].bo) + dpb_img->bindings[0].offset;
2067       addr += f_dpb_array_idx * (dpb_img->planes[0].surface.u.gfx9.surf_slice_size + dpb_img->planes[1].surface.u.gfx9.surf_slice_size);
2068       dynamic_dpb_t2->dpbAddrLo[i] = addr;
2069       dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
2070 
2071       ++dynamic_dpb_t2->dpbArraySize;
2072    }
2073 
2074    radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2075    addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2076    addr += dpb_array_idx * (dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size);
2077    dynamic_dpb_t2->dpbCurrLo = addr;
2078    dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2079 
2080    decode->decode_flags = 1;
2081    dynamic_dpb_t2->dpbConfigFlags = 0;
2082    dynamic_dpb_t2->dpbLumaPitch = luma->surface.u.gfx9.surf_pitch;
2083    dynamic_dpb_t2->dpbLumaAlignedHeight = luma->surface.u.gfx9.surf_height;
2084    dynamic_dpb_t2->dpbLumaAlignedSize = luma->surface.u.gfx9.surf_slice_size;
2085 
2086    dynamic_dpb_t2->dpbChromaPitch = chroma->surface.u.gfx9.surf_pitch;
2087    dynamic_dpb_t2->dpbChromaAlignedHeight = chroma->surface.u.gfx9.surf_height;
2088    dynamic_dpb_t2->dpbChromaAlignedSize = chroma->surface.u.gfx9.surf_slice_size;
2089 
2090    return true;
2091 }
2092 
2093 static struct ruvd_h264
get_uvd_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)2094 get_uvd_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
2095                  const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
2096                  uint32_t *height_in_samples, void *it_ptr)
2097 {
2098    struct ruvd_h264 result;
2099    const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
2100       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
2101 
2102    *slice_offset = h264_pic_info->pSliceOffsets[0];
2103 
2104    memset(&result, 0, sizeof(result));
2105 
2106    const StdVideoH264SequenceParameterSet *sps =
2107       vk_video_find_h264_dec_std_sps(&params->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
2108    switch (sps->profile_idc) {
2109    case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
2110       result.profile = RUVD_H264_PROFILE_BASELINE;
2111       break;
2112    case STD_VIDEO_H264_PROFILE_IDC_MAIN:
2113       result.profile = RUVD_H264_PROFILE_MAIN;
2114       break;
2115    case STD_VIDEO_H264_PROFILE_IDC_HIGH:
2116       result.profile = RUVD_H264_PROFILE_HIGH;
2117       break;
2118    default:
2119       fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
2120       result.profile = RUVD_H264_PROFILE_MAIN;
2121       break;
2122    }
2123 
2124    *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
2125    *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
2126    if (!sps->flags.frame_mbs_only_flag)
2127       *height_in_samples *= 2;
2128    result.level = get_h264_level(sps->level_idc);
2129 
2130    result.sps_info_flags = 0;
2131 
2132    result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
2133    result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
2134    result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
2135    result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
2136    result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
2137 
2138    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2139    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2140    result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
2141    result.pic_order_cnt_type = sps->pic_order_cnt_type;
2142    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2143 
2144    result.chroma_format = sps->chroma_format_idc;
2145 
2146    const StdVideoH264PictureParameterSet *pps =
2147       vk_video_find_h264_dec_std_pps(&params->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
2148    result.pps_info_flags = 0;
2149    result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
2150    result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
2151    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
2152    result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
2153    result.pps_info_flags |= pps->weighted_bipred_idc << 4;
2154    result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
2155    result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
2156    result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
2157 
2158    result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
2159    result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
2160    result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
2161 
2162    StdVideoH264ScalingLists scaling_lists;
2163    vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
2164    update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
2165 
2166    memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
2167    memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
2168    memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
2169 
2170    result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2171    result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2172 
2173    result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
2174    result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
2175 
2176    result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
2177 
2178    result.num_ref_frames = sps->max_num_ref_frames;
2179    memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
2180    memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
2181    for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
2182       int idx = frame_info->pReferenceSlots[i].slotIndex;
2183       const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
2184          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
2185 
2186       result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
2187       result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
2188       result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
2189 
2190       result.ref_frame_list[i] = idx;
2191 
2192       if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
2193          result.ref_frame_list[i] |= 0x80;
2194    }
2195    result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
2196    result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
2197 
2198    return result;
2199 }
2200 
2201 static struct ruvd_h265
get_uvd_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)2202 get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
2203                  const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples,
2204                  uint32_t *height_in_samples, void *it_ptr)
2205 {
2206    const struct radv_physical_device *pdev = radv_device_physical(device);
2207    struct ruvd_h265 result;
2208    int i, j;
2209    const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
2210       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
2211 
2212    memset(&result, 0, sizeof(result));
2213 
2214    const StdVideoH265SequenceParameterSet *sps =
2215       vk_video_find_h265_dec_std_sps(&params->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
2216    const StdVideoH265PictureParameterSet *pps =
2217       vk_video_find_h265_dec_std_pps(&params->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
2218 
2219    result.sps_info_flags = 0;
2220    result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
2221    result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
2222    result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
2223    result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
2224    result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
2225    result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
2226    result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
2227    result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
2228    result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
2229 
2230    if (pdev->info.family == CHIP_CARRIZO)
2231       result.sps_info_flags |= 1 << 9;
2232 
2233    *width_in_samples = sps->pic_width_in_luma_samples;
2234    *height_in_samples = sps->pic_height_in_luma_samples;
2235    result.chroma_format = sps->chroma_format_idc;
2236    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2237    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2238    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2239    result.sps_max_dec_pic_buffering_minus1 =
2240       sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
2241    result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
2242    result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
2243    result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
2244    result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
2245    result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
2246    result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
2247    if (sps->flags.pcm_enabled_flag) {
2248       result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
2249       result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
2250       result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
2251       result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
2252    }
2253    result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
2254 
2255    result.pps_info_flags = 0;
2256    result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
2257    result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
2258    result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
2259    result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
2260    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
2261    result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
2262    result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
2263    result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
2264    result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
2265    result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
2266    result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
2267    result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
2268    result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
2269    result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
2270    result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
2271    result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
2272    result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
2273    result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
2274    result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
2275    result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
2276 
2277    result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
2278    result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
2279    result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2280    result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2281    result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
2282    result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
2283    result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
2284    result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
2285    result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
2286    result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
2287    result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
2288    result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
2289    result.init_qp_minus26 = pps->init_qp_minus26;
2290 
2291    for (i = 0; i < 19; ++i)
2292       result.column_width_minus1[i] = pps->column_width_minus1[i];
2293 
2294    for (i = 0; i < 21; ++i)
2295       result.row_height_minus1[i] = pps->row_height_minus1[i];
2296 
2297    result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
2298    result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
2299 
2300    uint8_t idxs[16];
2301    memset(result.poc_list, 0, 16 * sizeof(int));
2302    memset(result.ref_pic_list, 0x7f, 16);
2303    memset(idxs, 0xff, 16);
2304    for (i = 0; i < frame_info->referenceSlotCount; i++) {
2305       const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
2306          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
2307       int idx = frame_info->pReferenceSlots[i].slotIndex;
2308       result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
2309       result.ref_pic_list[i] = idx;
2310       idxs[idx] = i;
2311    }
2312    result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
2313 
2314 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
2315    for (i = 0; i < 8; ++i)
2316       result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
2317 
2318    for (i = 0; i < 8; ++i)
2319       result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
2320 
2321    for (i = 0; i < 8; ++i)
2322       result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
2323 
2324    const StdVideoH265ScalingLists *scaling_lists = NULL;
2325    if (pps->flags.pps_scaling_list_data_present_flag)
2326       scaling_lists = pps->pScalingLists;
2327    else if (sps->flags.sps_scaling_list_data_present_flag)
2328       scaling_lists = sps->pScalingLists;
2329 
2330    update_h265_scaling(it_ptr, scaling_lists);
2331    if (scaling_lists) {
2332       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
2333          result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
2334 
2335       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
2336          result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
2337    }
2338 
2339    for (i = 0; i < 2; i++) {
2340       for (j = 0; j < 15; j++)
2341          result.direct_reflist[i][j] = 0xff;
2342    }
2343 
2344    if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
2345       if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
2346          result.p010_mode = 1;
2347          result.msb_mode = 1;
2348       } else {
2349          result.p010_mode = 0;
2350          result.luma_10to8 = 5;
2351          result.chroma_10to8 = 5;
2352          result.sclr_luma10to8 = 4;
2353          result.sclr_chroma10to8 = 4;
2354       }
2355    }
2356 
2357    return result;
2358 }
2359 
2360 static unsigned
texture_offset_legacy(struct radeon_surf * surface,unsigned layer)2361 texture_offset_legacy(struct radeon_surf *surface, unsigned layer)
2362 {
2363    return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
2364           layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
2365 }
2366 
2367 static bool
ruvd_dec_message_decode(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)2368 ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *vid,
2369                         struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
2370                         const struct VkVideoDecodeInfoKHR *frame_info)
2371 {
2372    const struct radv_physical_device *pdev = radv_device_physical(device);
2373    struct ruvd_msg *msg = ptr;
2374    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2375    struct radv_image *img = dst_iv->image;
2376    struct radv_image_plane *luma = &img->planes[0];
2377    struct radv_image_plane *chroma = &img->planes[1];
2378    struct radv_image_view *dpb_iv =
2379       radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2380    struct radv_image *dpb = dpb_iv->image;
2381 
2382    memset(msg, 0, sizeof(struct ruvd_msg));
2383    msg->size = sizeof(*msg);
2384    msg->msg_type = RUVD_MSG_DECODE;
2385    msg->stream_handle = vid->stream_handle;
2386    msg->status_report_feedback_number = vid->dbg_frame_cnt++;
2387 
2388    msg->body.decode.stream_type = vid->stream_type;
2389    msg->body.decode.decode_flags = 0x1;
2390    msg->body.decode.width_in_samples = frame_info->dstPictureResource.codedExtent.width;
2391    msg->body.decode.height_in_samples = frame_info->dstPictureResource.codedExtent.height;
2392 
2393    msg->body.decode.dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2394    msg->body.decode.bsd_size = frame_info->srcBufferRange;
2395    msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
2396 
2397    if (vid->stream_type == RUVD_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10)
2398       msg->body.decode.dpb_reserved = vid->ctx.size;
2399 
2400    *slice_offset = 0;
2401    switch (vid->vk.op) {
2402    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
2403       msg->body.decode.codec.h264 =
2404          get_uvd_h264_msg(vid, params, frame_info, slice_offset, &msg->body.decode.width_in_samples,
2405                           &msg->body.decode.height_in_samples, it_ptr);
2406       break;
2407    }
2408    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
2409       msg->body.decode.codec.h265 = get_uvd_h265_msg(device, vid, params, frame_info,
2410                                                      &msg->body.decode.width_in_samples,
2411                                                      &msg->body.decode.height_in_samples,
2412                                                      it_ptr);
2413 
2414       if (vid->ctx.mem)
2415          msg->body.decode.dpb_reserved = vid->ctx.size;
2416       break;
2417    }
2418    default:
2419       return false;
2420    }
2421 
2422    msg->body.decode.dt_field_mode = false;
2423 
2424    if (pdev->info.gfx_level >= GFX9) {
2425       msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
2426       msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2427       msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2428       msg->body.decode.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
2429       msg->body.decode.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
2430       if (msg->body.decode.dt_field_mode) {
2431          msg->body.decode.dt_luma_bottom_offset =
2432             luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
2433          msg->body.decode.dt_chroma_bottom_offset =
2434             chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
2435       } else {
2436          msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2437          msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2438       }
2439       msg->body.decode.dt_surf_tile_config = 0;
2440    } else {
2441       msg->body.decode.dt_pitch = luma->surface.u.legacy.level[0].nblk_x * luma->surface.blk_w;
2442       switch (luma->surface.u.legacy.level[0].mode) {
2443       case RADEON_SURF_MODE_LINEAR_ALIGNED:
2444          msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2445          msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2446          break;
2447       case RADEON_SURF_MODE_1D:
2448          msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2449          msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
2450          break;
2451       case RADEON_SURF_MODE_2D:
2452          msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2453          msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
2454          break;
2455       default:
2456          assert(0);
2457          break;
2458       }
2459 
2460       msg->body.decode.dt_luma_top_offset = texture_offset_legacy(&luma->surface, 0);
2461       if (chroma)
2462          msg->body.decode.dt_chroma_top_offset = texture_offset_legacy(&chroma->surface, 0);
2463       if (msg->body.decode.dt_field_mode) {
2464          msg->body.decode.dt_luma_bottom_offset = texture_offset_legacy(&luma->surface, 1);
2465          if (chroma)
2466             msg->body.decode.dt_chroma_bottom_offset = texture_offset_legacy(&chroma->surface, 1);
2467       } else {
2468          msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2469          msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2470       }
2471 
2472       if (chroma) {
2473          assert(luma->surface.u.legacy.bankw == chroma->surface.u.legacy.bankw);
2474          assert(luma->surface.u.legacy.bankh == chroma->surface.u.legacy.bankh);
2475          assert(luma->surface.u.legacy.mtilea == chroma->surface.u.legacy.mtilea);
2476       }
2477 
2478       msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(util_logbase2(luma->surface.u.legacy.bankw));
2479       msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(util_logbase2(luma->surface.u.legacy.bankh));
2480       msg->body.decode.dt_surf_tile_config |=
2481          RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea));
2482    }
2483 
2484    if (pdev->info.family >= CHIP_STONEY)
2485       msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2;
2486 
2487    msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config;
2488    msg->body.decode.extension_support = 0x1;
2489 
2490    return true;
2491 }
2492 
2493 static void
ruvd_dec_message_create(struct radv_video_session * vid,void * ptr)2494 ruvd_dec_message_create(struct radv_video_session *vid, void *ptr)
2495 {
2496    struct ruvd_msg *msg = ptr;
2497 
2498    memset(ptr, 0, sizeof(*msg));
2499    msg->size = sizeof(*msg);
2500    msg->msg_type = RUVD_MSG_CREATE;
2501    msg->stream_handle = vid->stream_handle;
2502    msg->body.create.stream_type = vid->stream_type;
2503    msg->body.create.width_in_samples = vid->vk.max_coded.width;
2504    msg->body.create.height_in_samples = vid->vk.max_coded.height;
2505 }
2506 
2507 VKAPI_ATTR void VKAPI_CALL
radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoBeginCodingInfoKHR * pBeginInfo)2508 radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCodingInfoKHR *pBeginInfo)
2509 {
2510    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2511    VK_FROM_HANDLE(radv_video_session, vid, pBeginInfo->videoSession);
2512    VK_FROM_HANDLE(radv_video_session_params, params, pBeginInfo->videoSessionParameters);
2513 
2514    cmd_buffer->video.vid = vid;
2515    cmd_buffer->video.params = params;
2516 
2517    if (vid->encode)
2518       radv_video_enc_begin_coding(cmd_buffer);
2519 }
2520 
2521 static void
radv_vcn_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2522 radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2523 {
2524    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2525    const struct radv_physical_device *pdev = radv_device_physical(device);
2526    struct radv_video_session *vid = cmd_buffer->video.vid;
2527    uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
2528 
2529    void *ptr;
2530    uint32_t out_offset;
2531 
2532    if (vid->stream_type == RDECODE_CODEC_AV1) {
2533       uint8_t *ctxptr = radv_buffer_map(device->ws, vid->ctx.mem->bo);
2534       ctxptr += vid->ctx.offset;
2535       ac_vcn_av1_init_probs(pdev->av1_version, ctxptr);
2536       device->ws->buffer_unmap(device->ws, vid->ctx.mem->bo, false);
2537    }
2538    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2539 
2540    if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2541       radv_vcn_sq_start(cmd_buffer);
2542 
2543    rvcn_dec_message_create(vid, ptr, size);
2544    send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2545    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2546    /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2547 
2548    if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2549       radeon_check_space(device->ws, cmd_buffer->cs, 8);
2550       for (unsigned i = 0; i < 8; i++)
2551          radeon_emit(cmd_buffer->cs, 0x81ff);
2552    } else
2553       radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2554 }
2555 
2556 static void
radv_uvd_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2557 radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2558 {
2559    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2560    struct radv_video_session *vid = cmd_buffer->video.vid;
2561    uint32_t size = sizeof(struct ruvd_msg);
2562    void *ptr;
2563    uint32_t out_offset;
2564    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2565 
2566    ruvd_dec_message_create(vid, ptr);
2567    if (vid->sessionctx.mem)
2568       send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2569    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2570 
2571    /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2572    int padsize = vid->sessionctx.mem ? 4 : 6;
2573    radeon_check_space(device->ws, cmd_buffer->cs, padsize);
2574    for (unsigned i = 0; i < padsize; i++)
2575       radeon_emit(cmd_buffer->cs, PKT2_NOP_PAD);
2576 }
2577 
2578 VKAPI_ATTR void VKAPI_CALL
radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoCodingControlInfoKHR * pCodingControlInfo)2579 radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
2580 {
2581    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2582    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2583    struct radv_physical_device *pdev = radv_device_physical(device);
2584 
2585    if (cmd_buffer->video.vid->encode) {
2586       radv_video_enc_control_video_coding(cmd_buffer, pCodingControlInfo);
2587       return;
2588    }
2589    if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
2590       if (radv_has_uvd(pdev))
2591          radv_uvd_cmd_reset(cmd_buffer);
2592       else
2593          radv_vcn_cmd_reset(cmd_buffer);
2594    }
2595 }
2596 
2597 VKAPI_ATTR void VKAPI_CALL
radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoEndCodingInfoKHR * pEndCodingInfo)2598 radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo)
2599 {
2600    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2601 
2602    if (cmd_buffer->video.vid->encode) {
2603       radv_video_enc_end_coding(cmd_buffer);
2604       return;
2605    }
2606 }
2607 
2608 static void
radv_uvd_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2609 radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2610 {
2611    VK_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2612    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2613    const struct radv_physical_device *pdev = radv_device_physical(device);
2614    struct radv_video_session *vid = cmd_buffer->video.vid;
2615    struct radv_video_session_params *params = cmd_buffer->video.params;
2616    unsigned size = sizeof(struct ruvd_msg);
2617    void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2618    uint32_t out_offset, fb_offset, it_probs_offset = 0;
2619    struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2620    unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
2621 
2622    radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
2623    fb_bo = cmd_buffer->upload.upload_bo;
2624    if (have_it(vid)) {
2625       radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2626       it_probs_bo = cmd_buffer->upload.upload_bo;
2627    }
2628 
2629    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2630    msg_bo = cmd_buffer->upload.upload_bo;
2631 
2632    uint32_t slice_offset;
2633    ruvd_dec_message_decode(device, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2634    rvcn_dec_message_feedback(fb_ptr);
2635    if (vid->sessionctx.mem)
2636       send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2637    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2638 
2639    if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2640       struct radv_image_view *dpb_iv =
2641          radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2642       struct radv_image *dpb = dpb_iv->image;
2643       send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2644    }
2645 
2646    if (vid->ctx.mem)
2647       send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2648 
2649    send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2650             src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2651 
2652    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2653    struct radv_image *img = dst_iv->image;
2654    send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2655    send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2656    if (have_it(vid))
2657       send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2658 
2659    radeon_check_space(device->ws, cmd_buffer->cs, 2);
2660    set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
2661 }
2662 
2663 static void
radv_vcn_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2664 radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2665 {
2666    VK_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2667    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2668    const struct radv_physical_device *pdev = radv_device_physical(device);
2669    struct radv_video_session *vid = cmd_buffer->video.vid;
2670    struct radv_video_session_params *params = cmd_buffer->video.params;
2671    unsigned size = 0;
2672    void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2673    uint32_t out_offset, fb_offset, it_probs_offset = 0;
2674    struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2675 
2676    size += sizeof(rvcn_dec_message_header_t); /* header */
2677    size += sizeof(rvcn_dec_message_index_t);  /* codec */
2678    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2679       size += sizeof(rvcn_dec_message_index_t);
2680       size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2681    }
2682    size += sizeof(rvcn_dec_message_decode_t); /* decode */
2683    switch (vid->vk.op) {
2684    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
2685       size += sizeof(rvcn_dec_message_avc_t);
2686       break;
2687    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
2688       size += sizeof(rvcn_dec_message_hevc_t);
2689       break;
2690    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
2691       size += sizeof(rvcn_dec_message_av1_t);
2692       break;
2693    default:
2694       unreachable("unsupported codec.");
2695    }
2696 
2697    radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset, &fb_ptr);
2698    fb_bo = cmd_buffer->upload.upload_bo;
2699    if (have_it(vid)) {
2700       radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2701       it_probs_bo = cmd_buffer->upload.upload_bo;
2702    } else if (have_probs(vid)) {
2703       radv_vid_buffer_upload_alloc(cmd_buffer, sizeof(rvcn_dec_av1_segment_fg_t), &it_probs_offset, &it_probs_ptr);
2704       it_probs_bo = cmd_buffer->upload.upload_bo;
2705    }
2706 
2707    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2708    msg_bo = cmd_buffer->upload.upload_bo;
2709 
2710    if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2711       radv_vcn_sq_start(cmd_buffer);
2712 
2713    uint32_t slice_offset;
2714    rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2715    rvcn_dec_message_feedback(fb_ptr);
2716    send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2717    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2718 
2719    if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2720       struct radv_image_view *dpb_iv =
2721          radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2722       struct radv_image *dpb = dpb_iv->image;
2723       send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2724    }
2725 
2726    if (vid->ctx.mem)
2727       send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2728 
2729    send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2730             src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2731 
2732    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2733    struct radv_image *img = dst_iv->image;
2734    send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2735    send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2736    if (have_it(vid))
2737       send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2738    else if (have_probs(vid))
2739       send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset);
2740 
2741    if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2742       radeon_check_space(device->ws, cmd_buffer->cs, 2);
2743       set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
2744    } else
2745       radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2746 }
2747 
2748 VKAPI_ATTR void VKAPI_CALL
radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer,const VkVideoDecodeInfoKHR * frame_info)2749 radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
2750 {
2751    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2752    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2753    struct radv_physical_device *pdev = radv_device_physical(device);
2754 
2755    if (radv_has_uvd(pdev))
2756       radv_uvd_decode_video(cmd_buffer, frame_info);
2757    else
2758       radv_vcn_decode_video(cmd_buffer, frame_info);
2759 }
2760 
2761 void
radv_video_get_profile_alignments(struct radv_physical_device * pdev,const VkVideoProfileListInfoKHR * profile_list,uint32_t * width_align_out,uint32_t * height_align_out)2762 radv_video_get_profile_alignments(struct radv_physical_device *pdev, const VkVideoProfileListInfoKHR *profile_list,
2763                                   uint32_t *width_align_out, uint32_t *height_align_out)
2764 {
2765    vk_video_get_profile_alignments(profile_list, width_align_out, height_align_out);
2766    bool is_h265_main_10 = false;
2767 
2768    if (profile_list) {
2769       for (unsigned i = 0; i < profile_list->profileCount; i++) {
2770          if (profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
2771             const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
2772                vk_find_struct_const(profile_list->pProfiles[i].pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
2773             if (h265_profile->stdProfileIdc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
2774                is_h265_main_10 = true;
2775          }
2776       }
2777    } else
2778       is_h265_main_10 = true;
2779 
2780    uint32_t db_alignment = radv_video_get_db_alignment(pdev, 64, is_h265_main_10);
2781    *width_align_out = MAX2(*width_align_out, db_alignment);
2782    *height_align_out = MAX2(*height_align_out, db_alignment);
2783 }
2784