xref: /aosp_15_r20/external/libdrm/tests/amdgpu/vcn_tests.c (revision 7688df22e49036ff52a766b7101da3a49edadb8c)
1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <string.h>
26 #include <inttypes.h>
27 #include <unistd.h>
28 
29 #include "CUnit/Basic.h"
30 
31 #include <unistd.h>
32 #include "util_math.h"
33 
34 #include "amdgpu_test.h"
35 #include "amdgpu_drm.h"
36 #include "amdgpu_internal.h"
37 #include "decode_messages.h"
38 #include "frame.h"
39 
40 #define IB_SIZE		4096
41 #define MAX_RESOURCES	16
42 
43 #define DECODE_CMD_MSG_BUFFER                              0x00000000
44 #define DECODE_CMD_DPB_BUFFER                              0x00000001
45 #define DECODE_CMD_DECODING_TARGET_BUFFER                  0x00000002
46 #define DECODE_CMD_FEEDBACK_BUFFER                         0x00000003
47 #define DECODE_CMD_PROB_TBL_BUFFER                         0x00000004
48 #define DECODE_CMD_SESSION_CONTEXT_BUFFER                  0x00000005
49 #define DECODE_CMD_BITSTREAM_BUFFER                        0x00000100
50 #define DECODE_CMD_IT_SCALING_TABLE_BUFFER                 0x00000204
51 #define DECODE_CMD_CONTEXT_BUFFER                          0x00000206
52 
53 #define DECODE_IB_PARAM_DECODE_BUFFER                      (0x00000001)
54 
55 #define DECODE_CMDBUF_FLAGS_MSG_BUFFER                     (0x00000001)
56 #define DECODE_CMDBUF_FLAGS_DPB_BUFFER                     (0x00000002)
57 #define DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER               (0x00000004)
58 #define DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER         (0x00000008)
59 #define DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER                (0x00000010)
60 #define DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER              (0x00000200)
61 #define DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER                 (0x00000800)
62 #define DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER                (0x00001000)
63 #define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER         (0x00100000)
64 
65 static bool vcn_dec_sw_ring = false;
66 static bool vcn_unified_ring = false;
67 
68 #define H264_NAL_TYPE_NON_IDR_SLICE 1
69 #define H264_NAL_TYPE_DP_A_SLICE 2
70 #define H264_NAL_TYPE_DP_B_SLICE 3
71 #define H264_NAL_TYPE_DP_C_SLICE 0x4
72 #define H264_NAL_TYPE_IDR_SLICE 0x5
73 #define H264_NAL_TYPE_SEI 0x6
74 #define H264_NAL_TYPE_SEQ_PARAM 0x7
75 #define H264_NAL_TYPE_PIC_PARAM 0x8
76 #define H264_NAL_TYPE_ACCESS_UNIT 0x9
77 #define H264_NAL_TYPE_END_OF_SEQ 0xa
78 #define H264_NAL_TYPE_END_OF_STREAM 0xb
79 #define H264_NAL_TYPE_FILLER_DATA 0xc
80 #define H264_NAL_TYPE_SEQ_EXTENSION 0xd
81 
82 #define H264_START_CODE 0x000001
83 
84 struct amdgpu_vcn_bo {
85 	amdgpu_bo_handle handle;
86 	amdgpu_va_handle va_handle;
87 	uint64_t addr;
88 	uint64_t size;
89 	uint8_t *ptr;
90 };
91 
92 typedef struct rvcn_decode_buffer_s {
93 	unsigned int valid_buf_flag;
94 	unsigned int msg_buffer_address_hi;
95 	unsigned int msg_buffer_address_lo;
96 	unsigned int dpb_buffer_address_hi;
97 	unsigned int dpb_buffer_address_lo;
98 	unsigned int target_buffer_address_hi;
99 	unsigned int target_buffer_address_lo;
100 	unsigned int session_contex_buffer_address_hi;
101 	unsigned int session_contex_buffer_address_lo;
102 	unsigned int bitstream_buffer_address_hi;
103 	unsigned int bitstream_buffer_address_lo;
104 	unsigned int context_buffer_address_hi;
105 	unsigned int context_buffer_address_lo;
106 	unsigned int feedback_buffer_address_hi;
107 	unsigned int feedback_buffer_address_lo;
108 	unsigned int luma_hist_buffer_address_hi;
109 	unsigned int luma_hist_buffer_address_lo;
110 	unsigned int prob_tbl_buffer_address_hi;
111 	unsigned int prob_tbl_buffer_address_lo;
112 	unsigned int sclr_coeff_buffer_address_hi;
113 	unsigned int sclr_coeff_buffer_address_lo;
114 	unsigned int it_sclr_table_buffer_address_hi;
115 	unsigned int it_sclr_table_buffer_address_lo;
116 	unsigned int sclr_target_buffer_address_hi;
117 	unsigned int sclr_target_buffer_address_lo;
118 	unsigned int cenc_size_info_buffer_address_hi;
119 	unsigned int cenc_size_info_buffer_address_lo;
120 	unsigned int mpeg2_pic_param_buffer_address_hi;
121 	unsigned int mpeg2_pic_param_buffer_address_lo;
122 	unsigned int mpeg2_mb_control_buffer_address_hi;
123 	unsigned int mpeg2_mb_control_buffer_address_lo;
124 	unsigned int mpeg2_idct_coeff_buffer_address_hi;
125 	unsigned int mpeg2_idct_coeff_buffer_address_lo;
126 } rvcn_decode_buffer_t;
127 
128 typedef struct rvcn_decode_ib_package_s {
129 	unsigned int package_size;
130 	unsigned int package_type;
131 } rvcn_decode_ib_package_t;
132 
133 
134 struct amdgpu_vcn_reg {
135 	uint32_t data0;
136 	uint32_t data1;
137 	uint32_t cmd;
138 	uint32_t nop;
139 	uint32_t cntl;
140 };
141 
142 typedef struct BufferInfo_t {
143 	uint32_t numOfBitsInBuffer;
144 	const uint8_t *decBuffer;
145 	uint8_t decData;
146 	uint32_t decBufferSize;
147 	const uint8_t *end;
148 } bufferInfo;
149 
150 typedef struct h264_decode_t {
151 	uint8_t profile;
152 	uint8_t level_idc;
153 	uint8_t nal_ref_idc;
154 	uint8_t nal_unit_type;
155 	uint32_t pic_width, pic_height;
156 	uint32_t slice_type;
157 } h264_decode;
158 
159 static amdgpu_device_handle device_handle;
160 static uint32_t major_version;
161 static uint32_t minor_version;
162 static uint32_t family_id;
163 static uint32_t chip_rev;
164 static uint32_t chip_id;
165 static uint32_t asic_id;
166 static uint32_t chip_rev;
167 static struct amdgpu_vcn_bo enc_buf;
168 static struct amdgpu_vcn_bo cpb_buf;
169 static uint32_t enc_task_id;
170 
171 static amdgpu_context_handle context_handle;
172 static amdgpu_bo_handle ib_handle;
173 static amdgpu_va_handle ib_va_handle;
174 static uint64_t ib_mc_address;
175 static uint32_t *ib_cpu;
176 static uint32_t *ib_checksum;
177 static uint32_t *ib_size_in_dw;
178 
179 static rvcn_decode_buffer_t *decode_buffer;
180 struct amdgpu_vcn_bo session_ctx_buf;
181 
182 static amdgpu_bo_handle resources[MAX_RESOURCES];
183 static unsigned num_resources;
184 
185 static uint8_t vcn_reg_index;
186 static struct amdgpu_vcn_reg reg[] = {
187 	{0x81c4, 0x81c5, 0x81c3, 0x81ff, 0x81c6},
188 	{0x504, 0x505, 0x503, 0x53f, 0x506},
189 	{0x10, 0x11, 0xf, 0x29, 0x26d},
190 };
191 
192 uint32_t gWidth, gHeight, gSliceType;
193 static uint32_t vcn_ip_version_major;
194 static uint32_t vcn_ip_version_minor;
195 static void amdgpu_cs_vcn_dec_create(void);
196 static void amdgpu_cs_vcn_dec_decode(void);
197 static void amdgpu_cs_vcn_dec_destroy(void);
198 
199 static void amdgpu_cs_vcn_enc_create(void);
200 static void amdgpu_cs_vcn_enc_encode(void);
201 static void amdgpu_cs_vcn_enc_destroy(void);
202 
203 static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc);
204 static void amdgpu_cs_sq_ib_tail(uint32_t *end);
205 static void h264_check_0s (bufferInfo * bufInfo, int count);
206 static int32_t h264_se (bufferInfo * bufInfo);
207 static inline uint32_t bs_read_u1(bufferInfo *bufinfo);
208 static inline int bs_eof(bufferInfo *bufinfo);
209 static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n);
210 static inline uint32_t bs_read_ue(bufferInfo* bufinfo);
211 static uint32_t remove_03 (uint8_t *bptr, uint32_t len);
212 static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo *bufInfo);
213 static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo *bufInfo);
214 static void h264_slice_header (h264_decode *dec, bufferInfo *bufInfo);
215 static uint8_t h264_parse_nal (h264_decode *dec, bufferInfo *bufInfo);
216 static uint32_t h264_find_next_start_code (uint8_t *pBuf, uint32_t bufLen);
217 static int verify_checksum(uint8_t *buffer, uint32_t buffer_size);
218 
219 CU_TestInfo vcn_tests[] = {
220 
221 	{ "VCN DEC create",  amdgpu_cs_vcn_dec_create },
222 	{ "VCN DEC decode",  amdgpu_cs_vcn_dec_decode },
223 	{ "VCN DEC destroy",  amdgpu_cs_vcn_dec_destroy },
224 
225 	{ "VCN ENC create",  amdgpu_cs_vcn_enc_create },
226 	{ "VCN ENC encode",  amdgpu_cs_vcn_enc_encode },
227 	{ "VCN ENC destroy",  amdgpu_cs_vcn_enc_destroy },
228 	CU_TEST_INFO_NULL,
229 };
230 
suite_vcn_tests_enable(void)231 CU_BOOL suite_vcn_tests_enable(void)
232 {
233 	struct drm_amdgpu_info_hw_ip info;
234 	bool enc_ring, dec_ring;
235 	int r;
236 
237 	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
238 				   &minor_version, &device_handle))
239 		return CU_FALSE;
240 
241 	family_id = device_handle->info.family_id;
242 	asic_id = device_handle->info.asic_id;
243 	chip_rev = device_handle->info.chip_rev;
244 	chip_id = device_handle->info.chip_external_rev;
245 
246 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info);
247 	if (!r) {
248 		vcn_ip_version_major = info.hw_ip_version_major;
249 		vcn_ip_version_minor = info.hw_ip_version_minor;
250 		enc_ring = !!info.available_rings;
251 		/* in vcn 4.0 it re-uses encoding queue as unified queue */
252 		if (vcn_ip_version_major >= 4) {
253 			vcn_unified_ring = true;
254 			vcn_dec_sw_ring = true;
255 			dec_ring = enc_ring;
256 		} else {
257 			r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info);
258 			dec_ring = !!info.available_rings;
259 		}
260 	}
261 
262 	if (amdgpu_device_deinitialize(device_handle))
263 		return CU_FALSE;
264 
265 	if (r) {
266 		printf("\n\nASIC query hw info failed\n");
267 		return CU_FALSE;
268 	}
269 
270 	if (!(dec_ring || enc_ring) ||
271 	    (family_id < AMDGPU_FAMILY_RV &&
272 	     (family_id == AMDGPU_FAMILY_AI &&
273 	      (chip_id - chip_rev) < 0x32))) {  /* Arcturus */
274 		printf("\n\nThe ASIC NOT support VCN, suite disabled\n");
275 		return CU_FALSE;
276 	}
277 
278 	if (!dec_ring) {
279 		amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE);
280 		amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE);
281 		amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE);
282 	}
283 
284 	if (family_id == AMDGPU_FAMILY_AI || !enc_ring) {
285 		amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE);
286 		amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE);
287 		amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE);
288 	}
289 
290 	if (vcn_ip_version_major == 1)
291 		vcn_reg_index = 0;
292 	else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0)
293 		vcn_reg_index = 1;
294 	else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) ||
295 				vcn_ip_version_major == 3)
296 		vcn_reg_index = 2;
297 
298 	return CU_TRUE;
299 }
300 
suite_vcn_tests_init(void)301 int suite_vcn_tests_init(void)
302 {
303 	int r;
304 
305 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
306 				     &minor_version, &device_handle);
307 	if (r)
308 		return CUE_SINIT_FAILED;
309 
310 	family_id = device_handle->info.family_id;
311 
312 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
313 	if (r)
314 		return CUE_SINIT_FAILED;
315 
316 	r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096,
317 				    AMDGPU_GEM_DOMAIN_GTT, 0,
318 				    &ib_handle, (void**)&ib_cpu,
319 				    &ib_mc_address, &ib_va_handle);
320 	if (r)
321 		return CUE_SINIT_FAILED;
322 
323 	return CUE_SUCCESS;
324 }
325 
suite_vcn_tests_clean(void)326 int suite_vcn_tests_clean(void)
327 {
328 	int r;
329 
330 	r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
331 			     ib_mc_address, IB_SIZE);
332 	if (r)
333 		return CUE_SCLEAN_FAILED;
334 
335 	r = amdgpu_cs_ctx_free(context_handle);
336 	if (r)
337 		return CUE_SCLEAN_FAILED;
338 
339 	r = amdgpu_device_deinitialize(device_handle);
340 	if (r)
341 		return CUE_SCLEAN_FAILED;
342 
343 	return CUE_SUCCESS;
344 }
345 
amdgpu_cs_sq_head(uint32_t * base,int * offset,bool enc)346 static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc)
347 {
348 	/* signature */
349 	*(base + (*offset)++) = 0x00000010;
350 	*(base + (*offset)++) = 0x30000002;
351 	ib_checksum = base + (*offset)++;
352 	ib_size_in_dw = base + (*offset)++;
353 
354 	/* engine info */
355 	*(base + (*offset)++) = 0x00000010;
356 	*(base + (*offset)++) = 0x30000001;
357 	*(base + (*offset)++) = enc ? 2 : 3;
358 	*(base + (*offset)++) = 0x00000000;
359 }
360 
amdgpu_cs_sq_ib_tail(uint32_t * end)361 static void amdgpu_cs_sq_ib_tail(uint32_t *end)
362 {
363 	uint32_t size_in_dw;
364 	uint32_t checksum = 0;
365 
366 	/* if the pointers are invalid, no need to process */
367 	if (ib_checksum == NULL || ib_size_in_dw == NULL)
368 		return;
369 
370 	size_in_dw = end - ib_size_in_dw - 1;
371 	*ib_size_in_dw = size_in_dw;
372 	*(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
373 
374 	for (int i = 0; i < size_in_dw; i++)
375 		checksum += *(ib_checksum + 2 + i);
376 
377 	*ib_checksum = checksum;
378 
379 	ib_checksum = NULL;
380 	ib_size_in_dw = NULL;
381 }
382 
submit(unsigned ndw,unsigned ip)383 static int submit(unsigned ndw, unsigned ip)
384 {
385 	struct amdgpu_cs_request ibs_request = {0};
386 	struct amdgpu_cs_ib_info ib_info = {0};
387 	struct amdgpu_cs_fence fence_status = {0};
388 	uint32_t expired;
389 	int r;
390 
391 	ib_info.ib_mc_address = ib_mc_address;
392 	ib_info.size = ndw;
393 
394 	ibs_request.ip_type = ip;
395 
396 	r = amdgpu_bo_list_create(device_handle, num_resources, resources,
397 				  NULL, &ibs_request.resources);
398 	if (r)
399 		return r;
400 
401 	ibs_request.number_of_ibs = 1;
402 	ibs_request.ibs = &ib_info;
403 	ibs_request.fence_info.handle = NULL;
404 
405 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
406 	if (r)
407 		return r;
408 
409 	r = amdgpu_bo_list_destroy(ibs_request.resources);
410 	if (r)
411 		return r;
412 
413 	fence_status.context = context_handle;
414 	fence_status.ip_type = ip;
415 	fence_status.fence = ibs_request.seq_no;
416 
417 	r = amdgpu_cs_query_fence_status(&fence_status,
418 					 AMDGPU_TIMEOUT_INFINITE,
419 					 0, &expired);
420 	if (r)
421 		return r;
422 
423 	return 0;
424 }
425 
alloc_resource(struct amdgpu_vcn_bo * vcn_bo,unsigned size,unsigned domain)426 static void alloc_resource(struct amdgpu_vcn_bo *vcn_bo,
427 			unsigned size, unsigned domain)
428 {
429 	struct amdgpu_bo_alloc_request req = {0};
430 	amdgpu_bo_handle buf_handle;
431 	amdgpu_va_handle va_handle;
432 	uint64_t va = 0;
433 	int r;
434 
435 	req.alloc_size = ALIGN(size, 4096);
436 	req.preferred_heap = domain;
437 	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
438 	CU_ASSERT_EQUAL(r, 0);
439 	r = amdgpu_va_range_alloc(device_handle,
440 				  amdgpu_gpu_va_range_general,
441 				  req.alloc_size, 1, 0, &va,
442 				  &va_handle, 0);
443 	CU_ASSERT_EQUAL(r, 0);
444 	r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0,
445 			    AMDGPU_VA_OP_MAP);
446 	CU_ASSERT_EQUAL(r, 0);
447 	vcn_bo->addr = va;
448 	vcn_bo->handle = buf_handle;
449 	vcn_bo->size = req.alloc_size;
450 	vcn_bo->va_handle = va_handle;
451 	r = amdgpu_bo_cpu_map(vcn_bo->handle, (void **)&vcn_bo->ptr);
452 	CU_ASSERT_EQUAL(r, 0);
453 	memset(vcn_bo->ptr, 0, size);
454 	r = amdgpu_bo_cpu_unmap(vcn_bo->handle);
455 	CU_ASSERT_EQUAL(r, 0);
456 }
457 
free_resource(struct amdgpu_vcn_bo * vcn_bo)458 static void free_resource(struct amdgpu_vcn_bo *vcn_bo)
459 {
460 	int r;
461 
462 	r = amdgpu_bo_va_op(vcn_bo->handle, 0, vcn_bo->size,
463 			    vcn_bo->addr, 0, AMDGPU_VA_OP_UNMAP);
464 	CU_ASSERT_EQUAL(r, 0);
465 
466 	r = amdgpu_va_range_free(vcn_bo->va_handle);
467 	CU_ASSERT_EQUAL(r, 0);
468 
469 	r = amdgpu_bo_free(vcn_bo->handle);
470 	CU_ASSERT_EQUAL(r, 0);
471 	memset(vcn_bo, 0, sizeof(*vcn_bo));
472 }
473 
vcn_dec_cmd(uint64_t addr,unsigned cmd,int * idx)474 static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx)
475 {
476 	if (vcn_dec_sw_ring == false) {
477 		ib_cpu[(*idx)++] = reg[vcn_reg_index].data0;
478 		ib_cpu[(*idx)++] = addr;
479 		ib_cpu[(*idx)++] = reg[vcn_reg_index].data1;
480 		ib_cpu[(*idx)++] = addr >> 32;
481 		ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd;
482 		ib_cpu[(*idx)++] = cmd << 1;
483 		return;
484 	}
485 
486 	/* Support decode software ring message */
487 	if (!(*idx)) {
488 		rvcn_decode_ib_package_t *ib_header;
489 
490 		if (vcn_unified_ring)
491 			amdgpu_cs_sq_head(ib_cpu, idx, false);
492 
493 		ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx];
494 		ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
495 			sizeof(struct rvcn_decode_ib_package_s);
496 
497 		(*idx)++;
498 		ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER);
499 		(*idx)++;
500 
501 		decode_buffer = (rvcn_decode_buffer_t *)&(ib_cpu[*idx]);
502 		*idx += sizeof(struct rvcn_decode_buffer_s) / 4;
503 		memset(decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
504 	}
505 
506 	switch(cmd) {
507 		case DECODE_CMD_MSG_BUFFER:
508 			decode_buffer->valid_buf_flag |= DECODE_CMDBUF_FLAGS_MSG_BUFFER;
509 			decode_buffer->msg_buffer_address_hi = (addr >> 32);
510 			decode_buffer->msg_buffer_address_lo = (addr);
511 		break;
512 		case DECODE_CMD_DPB_BUFFER:
513 			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DPB_BUFFER);
514 			decode_buffer->dpb_buffer_address_hi = (addr >> 32);
515 			decode_buffer->dpb_buffer_address_lo = (addr);
516 		break;
517 		case DECODE_CMD_DECODING_TARGET_BUFFER:
518 			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
519 			decode_buffer->target_buffer_address_hi = (addr >> 32);
520 			decode_buffer->target_buffer_address_lo = (addr);
521 		break;
522 		case DECODE_CMD_FEEDBACK_BUFFER:
523 			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
524 			decode_buffer->feedback_buffer_address_hi = (addr >> 32);
525 			decode_buffer->feedback_buffer_address_lo = (addr);
526 		break;
527 		case DECODE_CMD_PROB_TBL_BUFFER:
528 			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
529 			decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
530 			decode_buffer->prob_tbl_buffer_address_lo = (addr);
531 		break;
532 		case DECODE_CMD_SESSION_CONTEXT_BUFFER:
533 			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
534 			decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
535 			decode_buffer->session_contex_buffer_address_lo = (addr);
536 		break;
537 		case DECODE_CMD_BITSTREAM_BUFFER:
538 			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
539 			decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
540 			decode_buffer->bitstream_buffer_address_lo = (addr);
541 		break;
542 		case DECODE_CMD_IT_SCALING_TABLE_BUFFER:
543 			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
544 			decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
545 			decode_buffer->it_sclr_table_buffer_address_lo = (addr);
546 		break;
547 		case DECODE_CMD_CONTEXT_BUFFER:
548 			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
549 			decode_buffer->context_buffer_address_hi = (addr >> 32);
550 			decode_buffer->context_buffer_address_lo = (addr);
551 		break;
552 		default:
553 			printf("Not Support!\n");
554 	}
555 }
556 
amdgpu_cs_vcn_dec_create(void)557 static void amdgpu_cs_vcn_dec_create(void)
558 {
559 	struct amdgpu_vcn_bo msg_buf;
560 	unsigned ip;
561 	int len, r;
562 
563 	num_resources  = 0;
564 	alloc_resource(&msg_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
565 	alloc_resource(&session_ctx_buf, 32 * 4096, AMDGPU_GEM_DOMAIN_VRAM);
566 	resources[num_resources++] = msg_buf.handle;
567 	resources[num_resources++] = session_ctx_buf.handle;
568 	resources[num_resources++] = ib_handle;
569 
570 	r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr);
571 	CU_ASSERT_EQUAL(r, 0);
572 
573 	memset(msg_buf.ptr, 0, 4096);
574 	memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg));
575 
576 	len = 0;
577 
578 	vcn_dec_cmd(session_ctx_buf.addr, 5, &len);
579 	if (vcn_dec_sw_ring == true) {
580 		vcn_dec_cmd(msg_buf.addr, 0, &len);
581 	} else {
582 		ib_cpu[len++] = reg[vcn_reg_index].data0;
583 		ib_cpu[len++] = msg_buf.addr;
584 		ib_cpu[len++] = reg[vcn_reg_index].data1;
585 		ib_cpu[len++] = msg_buf.addr >> 32;
586 		ib_cpu[len++] = reg[vcn_reg_index].cmd;
587 		ib_cpu[len++] = 0;
588 		for (; len % 16; ) {
589 			ib_cpu[len++] = reg[vcn_reg_index].nop;
590 			ib_cpu[len++] = 0;
591 		}
592 	}
593 
594 	if (vcn_unified_ring) {
595 		amdgpu_cs_sq_ib_tail(ib_cpu + len);
596 		ip = AMDGPU_HW_IP_VCN_ENC;
597 	} else
598 		ip = AMDGPU_HW_IP_VCN_DEC;
599 
600 	r = submit(len, ip);
601 
602 	CU_ASSERT_EQUAL(r, 0);
603 
604 	free_resource(&msg_buf);
605 }
606 
amdgpu_cs_vcn_dec_decode(void)607 static void amdgpu_cs_vcn_dec_decode(void)
608 {
609 	const unsigned dpb_size = 15923584, dt_size = 737280;
610 	uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum;
611 	struct amdgpu_vcn_bo dec_buf;
612 	int size, len, i, r;
613 	unsigned ip;
614 	uint8_t *dec;
615 
616 	size = 4*1024; /* msg */
617 	size += 4*1024; /* fb */
618 	size += 4096; /*it_scaling_table*/
619 	size += ALIGN(sizeof(uvd_bitstream), 4*1024);
620 	size += ALIGN(dpb_size, 4*1024);
621 	size += ALIGN(dt_size, 4*1024);
622 
623 	num_resources = 0;
624 	alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_GTT);
625 	resources[num_resources++] = dec_buf.handle;
626 	resources[num_resources++] = ib_handle;
627 
628 	r = amdgpu_bo_cpu_map(dec_buf.handle, (void **)&dec_buf.ptr);
629 	dec = dec_buf.ptr;
630 
631 	CU_ASSERT_EQUAL(r, 0);
632 	memset(dec_buf.ptr, 0, size);
633 	memcpy(dec_buf.ptr, vcn_dec_decode_msg, sizeof(vcn_dec_decode_msg));
634 	memcpy(dec_buf.ptr + sizeof(vcn_dec_decode_msg),
635 			avc_decode_msg, sizeof(avc_decode_msg));
636 
637 	dec += 4*1024;
638 	memcpy(dec, feedback_msg, sizeof(feedback_msg));
639 	dec += 4*1024;
640 	memcpy(dec, uvd_it_scaling_table, sizeof(uvd_it_scaling_table));
641 
642 	dec += 4*1024;
643 	memcpy(dec, uvd_bitstream, sizeof(uvd_bitstream));
644 
645 	dec += ALIGN(sizeof(uvd_bitstream), 4*1024);
646 
647 	dec += ALIGN(dpb_size, 4*1024);
648 
649 	msg_addr = dec_buf.addr;
650 	fb_addr = msg_addr + 4*1024;
651 	it_addr = fb_addr + 4*1024;
652 	bs_addr = it_addr + 4*1024;
653 	dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024);
654 	ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
655 	dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024);
656 
657 	len = 0;
658 	vcn_dec_cmd(session_ctx_buf.addr, 0x5, &len);
659 	vcn_dec_cmd(msg_addr, 0x0, &len);
660 	vcn_dec_cmd(dpb_addr, 0x1, &len);
661 	vcn_dec_cmd(dt_addr, 0x2, &len);
662 	vcn_dec_cmd(fb_addr, 0x3, &len);
663 	vcn_dec_cmd(bs_addr, 0x100, &len);
664 	vcn_dec_cmd(it_addr, 0x204, &len);
665 	vcn_dec_cmd(ctx_addr, 0x206, &len);
666 
667 	if (vcn_dec_sw_ring == false) {
668 		ib_cpu[len++] = reg[vcn_reg_index].cntl;
669 		ib_cpu[len++] = 0x1;
670 		for (; len % 16; ) {
671 			ib_cpu[len++] = reg[vcn_reg_index].nop;
672 			ib_cpu[len++] = 0;
673 		}
674 	}
675 
676 	if (vcn_unified_ring) {
677 		amdgpu_cs_sq_ib_tail(ib_cpu + len);
678 		ip = AMDGPU_HW_IP_VCN_ENC;
679 	} else
680 		ip = AMDGPU_HW_IP_VCN_DEC;
681 
682 	r = submit(len, ip);
683 	CU_ASSERT_EQUAL(r, 0);
684 
685 	for (i = 0, sum = 0; i < dt_size; ++i)
686 		sum += dec[i];
687 
688 	CU_ASSERT_EQUAL(sum, SUM_DECODE);
689 
690 	free_resource(&dec_buf);
691 }
692 
amdgpu_cs_vcn_dec_destroy(void)693 static void amdgpu_cs_vcn_dec_destroy(void)
694 {
695 	struct amdgpu_vcn_bo msg_buf;
696 	unsigned ip;
697 	int len, r;
698 
699 	num_resources = 0;
700 	alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT);
701 	resources[num_resources++] = msg_buf.handle;
702 	resources[num_resources++] = ib_handle;
703 
704 	r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr);
705 	CU_ASSERT_EQUAL(r, 0);
706 
707 	memset(msg_buf.ptr, 0, 1024);
708 	memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg));
709 
710 	len = 0;
711 	vcn_dec_cmd(session_ctx_buf.addr, 5, &len);
712 	if (vcn_dec_sw_ring == true) {
713 		vcn_dec_cmd(msg_buf.addr, 0, &len);
714 	} else {
715 		ib_cpu[len++] = reg[vcn_reg_index].data0;
716 		ib_cpu[len++] = msg_buf.addr;
717 		ib_cpu[len++] = reg[vcn_reg_index].data1;
718 		ib_cpu[len++] = msg_buf.addr >> 32;
719 		ib_cpu[len++] = reg[vcn_reg_index].cmd;
720 		ib_cpu[len++] = 0;
721 		for (; len % 16; ) {
722 			ib_cpu[len++] = reg[vcn_reg_index].nop;
723 			ib_cpu[len++] = 0;
724 		}
725 	}
726 
727 	if (vcn_unified_ring) {
728 		amdgpu_cs_sq_ib_tail(ib_cpu + len);
729 		ip = AMDGPU_HW_IP_VCN_ENC;
730 	} else
731 		ip = AMDGPU_HW_IP_VCN_DEC;
732 
733 	r = submit(len, ip);
734 	CU_ASSERT_EQUAL(r, 0);
735 
736 	free_resource(&msg_buf);
737 	free_resource(&session_ctx_buf);
738 }
739 
amdgpu_cs_vcn_enc_create(void)740 static void amdgpu_cs_vcn_enc_create(void)
741 {
742 	int len, r;
743 	uint32_t *p_task_size = NULL;
744 	uint32_t task_offset = 0, st_offset;
745 	uint32_t *st_size = NULL;
746 	unsigned width = 160, height = 128, buf_size;
747 	uint32_t fw_maj = 1, fw_min = 9;
748 
749 	if (vcn_ip_version_major == 2) {
750 		fw_maj = 1;
751 		fw_min = 1;
752 	} else if (vcn_ip_version_major == 3) {
753 		fw_maj = 1;
754 		fw_min = 0;
755 	}
756 
757 	gWidth = width;
758 	gHeight = height;
759 	buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;
760 	enc_task_id = 1;
761 
762 	num_resources = 0;
763 	alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT);
764 	alloc_resource(&cpb_buf, buf_size * 2, AMDGPU_GEM_DOMAIN_GTT);
765 	resources[num_resources++] = enc_buf.handle;
766 	resources[num_resources++] = cpb_buf.handle;
767 	resources[num_resources++] = ib_handle;
768 
769 	r = amdgpu_bo_cpu_map(enc_buf.handle, (void**)&enc_buf.ptr);
770 	memset(enc_buf.ptr, 0, 128 * 1024);
771 	r = amdgpu_bo_cpu_unmap(enc_buf.handle);
772 
773 	r = amdgpu_bo_cpu_map(cpb_buf.handle, (void**)&enc_buf.ptr);
774 	memset(enc_buf.ptr, 0, buf_size * 2);
775 	r = amdgpu_bo_cpu_unmap(cpb_buf.handle);
776 
777 	len = 0;
778 
779 	if (vcn_unified_ring)
780 		amdgpu_cs_sq_head(ib_cpu, &len, true);
781 
782 	/* session info */
783 	st_offset = len;
784 	st_size = &ib_cpu[len++];	/* size */
785 	ib_cpu[len++] = 0x00000001;	/* RENCODE_IB_PARAM_SESSION_INFO */
786 	ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
787 	ib_cpu[len++] = enc_buf.addr >> 32;
788 	ib_cpu[len++] = enc_buf.addr;
789 	ib_cpu[len++] = 1;	/* RENCODE_ENGINE_TYPE_ENCODE; */
790 	*st_size = (len - st_offset) * 4;
791 
792 	/* task info */
793 	task_offset = len;
794 	st_offset = len;
795 	st_size = &ib_cpu[len++];	/* size */
796 	ib_cpu[len++] = 0x00000002;	/* RENCODE_IB_PARAM_TASK_INFO */
797 	p_task_size = &ib_cpu[len++];
798 	ib_cpu[len++] = enc_task_id++;	/* task_id */
799 	ib_cpu[len++] = 0;	/* feedback */
800 	*st_size = (len - st_offset) * 4;
801 
802 	/* op init */
803 	st_offset = len;
804 	st_size = &ib_cpu[len++];	/* size */
805 	ib_cpu[len++] = 0x01000001;	/* RENCODE_IB_OP_INITIALIZE */
806 	*st_size = (len - st_offset) * 4;
807 
808 	/* session_init */
809 	st_offset = len;
810 	st_size = &ib_cpu[len++];	/* size */
811 	ib_cpu[len++] = 0x00000003;	/* RENCODE_IB_PARAM_SESSION_INIT */
812 	ib_cpu[len++] = 1;	/* RENCODE_ENCODE_STANDARD_H264 */
813 	ib_cpu[len++] = width;
814 	ib_cpu[len++] = height;
815 	ib_cpu[len++] = 0;
816 	ib_cpu[len++] = 0;
817 	ib_cpu[len++] = 0;	/* pre encode mode */
818 	ib_cpu[len++] = 0;	/* chroma enabled : false */
819 	ib_cpu[len++] = 0;
820 	ib_cpu[len++] = 0;
821 	*st_size = (len - st_offset) * 4;
822 
823 	/* slice control */
824 	st_offset = len;
825 	st_size = &ib_cpu[len++];	/* size */
826 	ib_cpu[len++] = 0x00200001;	/* RENCODE_H264_IB_PARAM_SLICE_CONTROL */
827 	ib_cpu[len++] = 0;	/* RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS */
828 	ib_cpu[len++] = ALIGN(width, 16) / 16 * ALIGN(height, 16) / 16;
829 	*st_size = (len - st_offset) * 4;
830 
831 	/* enc spec misc */
832 	st_offset = len;
833 	st_size = &ib_cpu[len++];	/* size */
834 	ib_cpu[len++] = 0x00200002;	/* RENCODE_H264_IB_PARAM_SPEC_MISC */
835 	ib_cpu[len++] = 0;	/* constrained intra pred flag */
836 	ib_cpu[len++] = 0;	/* cabac enable */
837 	ib_cpu[len++] = 0;	/* cabac init idc */
838 	ib_cpu[len++] = 1;	/* half pel enabled */
839 	ib_cpu[len++] = 1;	/* quarter pel enabled */
840 	ib_cpu[len++] = 100;	/* BASELINE profile */
841 	ib_cpu[len++] = 11;	/* level */
842 	if (vcn_ip_version_major >= 3) {
843 		ib_cpu[len++] = 0;	/* b_picture_enabled */
844 		ib_cpu[len++] = 0;	/* weighted_bipred_idc */
845 	}
846 	*st_size = (len - st_offset) * 4;
847 
848 	/* deblocking filter */
849 	st_offset = len;
850 	st_size = &ib_cpu[len++];	/* size */
851 	ib_cpu[len++] = 0x00200004;	/* RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER */
852 	ib_cpu[len++] = 0;	/* disable deblocking filter idc */
853 	ib_cpu[len++] = 0;	/* alpha c0 offset */
854 	ib_cpu[len++] = 0;	/* tc offset */
855 	ib_cpu[len++] = 0;	/* cb offset */
856 	ib_cpu[len++] = 0;	/* cr offset */
857 	*st_size = (len - st_offset) * 4;
858 
859 	/* layer control */
860 	st_offset = len;
861 	st_size = &ib_cpu[len++];	/* size */
862 	ib_cpu[len++] = 0x00000004;	/* RENCODE_IB_PARAM_LAYER_CONTROL */
863 	ib_cpu[len++] = 1;	/* max temporal layer */
864 	ib_cpu[len++] = 1;	/* no of temporal layer */
865 	*st_size = (len - st_offset) * 4;
866 
867 	/* rc_session init */
868 	st_offset = len;
869 	st_size = &ib_cpu[len++];	/* size */
870 	ib_cpu[len++] = 0x00000006;	/* RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT */
871 	ib_cpu[len++] = 0;	/* rate control */
872 	ib_cpu[len++] = 48;	/* vbv buffer level */
873 	*st_size = (len - st_offset) * 4;
874 
875 	/* quality params */
876 	st_offset = len;
877 	st_size = &ib_cpu[len++];	/* size */
878 	ib_cpu[len++] = 0x00000009;	/* RENCODE_IB_PARAM_QUALITY_PARAMS */
879 	ib_cpu[len++] = 0;	/* vbaq mode */
880 	ib_cpu[len++] = 0;	/* scene change sensitivity */
881 	ib_cpu[len++] = 0;	/* scene change min idr interval */
882 	ib_cpu[len++] = 0;
883 	if (vcn_ip_version_major >= 3)
884 		ib_cpu[len++] = 0;
885 	*st_size = (len - st_offset) * 4;
886 
887 	/* layer select */
888 	st_offset = len;
889 	st_size = &ib_cpu[len++];	/* size */
890 	ib_cpu[len++] = 0x00000005;	/* RENCODE_IB_PARAM_LAYER_SELECT */
891 	ib_cpu[len++] = 0;	/* temporal layer */
892 	*st_size = (len - st_offset) * 4;
893 
894 	/* rc layer init */
895 	st_offset = len;
896 	st_size = &ib_cpu[len++];	/* size */
897 	ib_cpu[len++] = 0x00000007;	/* RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT */
898 	ib_cpu[len++] = 0;
899 	ib_cpu[len++] = 0;
900 	ib_cpu[len++] = 25;
901 	ib_cpu[len++] = 1;
902 	ib_cpu[len++] = 0x01312d00;
903 	ib_cpu[len++] = 0;
904 	ib_cpu[len++] = 0;
905 	ib_cpu[len++] = 0;
906 	*st_size = (len - st_offset) * 4;
907 
908 	/* layer select */
909 	st_offset = len;
910 	st_size = &ib_cpu[len++];	/* size */
911 	ib_cpu[len++] = 0x00000005;	/* RENCODE_IB_PARAM_LAYER_SELECT */
912 	ib_cpu[len++] = 0;	/* temporal layer */
913 	*st_size = (len - st_offset) * 4;
914 
915 	/* rc per pic */
916 	st_offset = len;
917 	st_size = &ib_cpu[len++];	/* size */
918 	ib_cpu[len++] = 0x00000008;	/* RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE */
919 	ib_cpu[len++] = 20;
920 	ib_cpu[len++] = 0;
921 	ib_cpu[len++] = 51;
922 	ib_cpu[len++] = 0;
923 	ib_cpu[len++] = 1;
924 	ib_cpu[len++] = 0;
925 	ib_cpu[len++] = 1;
926 	ib_cpu[len++] = 0;
927 	*st_size = (len - st_offset) * 4;
928 
929 	/* op init rc */
930 	st_offset = len;
931 	st_size = &ib_cpu[len++];	/* size */
932 	ib_cpu[len++] = 0x01000004;	/* RENCODE_IB_OP_INIT_RC */
933 	*st_size = (len - st_offset) * 4;
934 
935 	/* op init rc vbv */
936 	st_offset = len;
937 	st_size = &ib_cpu[len++];	/* size */
938 	ib_cpu[len++] = 0x01000005;	/* RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL */
939 	*st_size = (len - st_offset) * 4;
940 
941 	*p_task_size = (len - task_offset) * 4;
942 
943 	if (vcn_unified_ring)
944 		amdgpu_cs_sq_ib_tail(ib_cpu + len);
945 
946 	r = submit(len, AMDGPU_HW_IP_VCN_ENC);
947 	CU_ASSERT_EQUAL(r, 0);
948 }
949 
h264_se(bufferInfo * bufInfo)950 static int32_t h264_se (bufferInfo * bufInfo)
951 {
952 	uint32_t ret;
953 
954 	ret = bs_read_ue (bufInfo);
955 	if ((ret & 0x1) == 0) {
956 		ret >>= 1;
957 		int32_t temp = 0 - ret;
958 		return temp;
959 	}
960 
961 	return (ret + 1) >> 1;
962 }
963 
h264_check_0s(bufferInfo * bufInfo,int count)964 static void h264_check_0s (bufferInfo * bufInfo, int count)
965 {
966 	uint32_t val;
967 
968 	val = bs_read_u (bufInfo, count);
969 	if (val != 0) {
970 		printf ("field error - %d bits should be 0 is %x\n", count, val);
971 	}
972 }
973 
bs_eof(bufferInfo * bufinfo)974 static inline int bs_eof(bufferInfo * bufinfo)
975 {
976 	if (bufinfo->decBuffer >= bufinfo->end)
977 		return 1;
978 	else
979 		return 0;
980 }
981 
bs_read_u1(bufferInfo * bufinfo)982 static inline uint32_t bs_read_u1(bufferInfo *bufinfo)
983 {
984 	uint32_t r = 0;
985 	uint32_t temp = 0;
986 
987 	bufinfo->numOfBitsInBuffer--;
988 	if (! bs_eof(bufinfo)) {
989 		temp = (((bufinfo->decData)) >> bufinfo->numOfBitsInBuffer);
990 		r = temp & 0x01;
991 	}
992 
993 	if (bufinfo->numOfBitsInBuffer == 0) {
994 		bufinfo->decBuffer++;
995 		bufinfo->decData = *bufinfo->decBuffer;
996 		bufinfo->numOfBitsInBuffer = 8;
997 	}
998 
999 	return r;
1000 }
1001 
bs_read_u(bufferInfo * bufinfo,int n)1002 static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n)
1003 {
1004 	uint32_t r = 0;
1005 	int i;
1006 
1007 	for (i = 0; i < n; i++) {
1008 		r |= ( bs_read_u1(bufinfo) << ( n - i - 1 ) );
1009 	}
1010 
1011 	return r;
1012 }
1013 
bs_read_ue(bufferInfo * bufinfo)1014 static inline uint32_t bs_read_ue(bufferInfo* bufinfo)
1015 {
1016 	int32_t r = 0;
1017 	int i = 0;
1018 
1019 	while( (bs_read_u1(bufinfo) == 0) && (i < 32) && (!bs_eof(bufinfo))) {
1020 		i++;
1021 	}
1022 	r = bs_read_u(bufinfo, i);
1023 	r += (1 << i) - 1;
1024 	return r;
1025 }
1026 
remove_03(uint8_t * bptr,uint32_t len)1027 static uint32_t remove_03 (uint8_t * bptr, uint32_t len)
1028 {
1029 	uint32_t nal_len = 0;
1030 	while (nal_len + 2 < len) {
1031 		if (bptr[0] == 0 && bptr[1] == 0 && bptr[2] == 3) {
1032 			bptr += 2;
1033 			nal_len += 2;
1034 			len--;
1035 			memmove (bptr, bptr + 1, len - nal_len);
1036 		} else {
1037 			bptr++;
1038 			nal_len++;
1039 		}
1040 	}
1041 	return len;
1042 }
1043 
scaling_list(uint32_t ix,uint32_t sizeOfScalingList,bufferInfo * bufInfo)1044 static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo * bufInfo)
1045 {
1046 	uint32_t lastScale = 8, nextScale = 8;
1047 	uint32_t jx;
1048 	int deltaScale;
1049 
1050 	for (jx = 0; jx < sizeOfScalingList; jx++) {
1051 		if (nextScale != 0) {
1052 			deltaScale = h264_se (bufInfo);
1053 			nextScale = (lastScale + deltaScale + 256) % 256;
1054 		}
1055 		if (nextScale == 0) {
1056 			lastScale = lastScale;
1057 		} else {
1058 			lastScale = nextScale;
1059 		}
1060 	}
1061 }
1062 
h264_parse_sequence_parameter_set(h264_decode * dec,bufferInfo * bufInfo)1063 static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo * bufInfo)
1064 {
1065 	uint32_t temp;
1066 
1067 	dec->profile = bs_read_u (bufInfo, 8);
1068 	bs_read_u (bufInfo, 1);		/* constaint_set0_flag */
1069 	bs_read_u (bufInfo, 1);		/* constaint_set1_flag */
1070 	bs_read_u (bufInfo, 1);		/* constaint_set2_flag */
1071 	bs_read_u (bufInfo, 1);		/* constaint_set3_flag */
1072 	bs_read_u (bufInfo, 1);		/* constaint_set4_flag */
1073 	bs_read_u (bufInfo, 1);		/* constaint_set5_flag */
1074 
1075 
1076 	h264_check_0s (bufInfo, 2);
1077 	dec->level_idc = bs_read_u (bufInfo, 8);
1078 	bs_read_ue (bufInfo);	/* SPS id*/
1079 
1080 	if (dec->profile == 100 || dec->profile == 110 ||
1081 		dec->profile == 122 || dec->profile == 144) {
1082 		uint32_t chroma_format_idc = bs_read_ue (bufInfo);
1083 		if (chroma_format_idc == 3) {
1084 			bs_read_u (bufInfo, 1);	/* residual_colour_transform_flag */
1085 		}
1086 		bs_read_ue (bufInfo);	/* bit_depth_luma_minus8 */
1087 		bs_read_ue (bufInfo);	/* bit_depth_chroma_minus8 */
1088 		bs_read_u (bufInfo, 1);	/* qpprime_y_zero_transform_bypass_flag */
1089 		uint32_t seq_scaling_matrix_present_flag = bs_read_u (bufInfo, 1);
1090 
1091 		if (seq_scaling_matrix_present_flag) {
1092 			for (uint32_t ix = 0; ix < 8; ix++) {
1093 				temp = bs_read_u (bufInfo, 1);
1094 				if (temp) {
1095 					scaling_list (ix, ix < 6 ? 16 : 64, bufInfo);
1096 				}
1097 			}
1098 		}
1099 	}
1100 
1101 	bs_read_ue (bufInfo);	/* log2_max_frame_num_minus4 */
1102 	uint32_t pic_order_cnt_type = bs_read_ue (bufInfo);
1103 
1104 	if (pic_order_cnt_type == 0) {
1105 		bs_read_ue (bufInfo);	/* log2_max_pic_order_cnt_lsb_minus4 */
1106 	} else if (pic_order_cnt_type == 1) {
1107 		bs_read_u (bufInfo, 1);	/* delta_pic_order_always_zero_flag */
1108 		h264_se (bufInfo);	/* offset_for_non_ref_pic */
1109 		h264_se (bufInfo);	/* offset_for_top_to_bottom_field */
1110 		temp = bs_read_ue (bufInfo);
1111 		for (uint32_t ix = 0; ix < temp; ix++) {
1112 			 h264_se (bufInfo);	/* offset_for_ref_frame[index] */
1113 		}
1114 	}
1115 	bs_read_ue (bufInfo);	/* num_ref_frames */
1116 	bs_read_u (bufInfo, 1);	/* gaps_in_frame_num_flag */
1117 	uint32_t PicWidthInMbs = bs_read_ue (bufInfo) + 1;
1118 
1119 	dec->pic_width = PicWidthInMbs * 16;
1120 	uint32_t PicHeightInMapUnits = bs_read_ue (bufInfo) + 1;
1121 
1122 	dec->pic_height = PicHeightInMapUnits * 16;
1123 	uint32_t frame_mbs_only_flag = bs_read_u (bufInfo, 1);
1124 	if (!frame_mbs_only_flag) {
1125 		bs_read_u (bufInfo, 1);	/* mb_adaptive_frame_field_flag */
1126 	}
1127 	bs_read_u (bufInfo, 1);	/* direct_8x8_inference_flag */
1128 	temp = bs_read_u (bufInfo, 1);
1129 	if (temp) {
1130 		bs_read_ue (bufInfo);	/* frame_crop_left_offset */
1131 		bs_read_ue (bufInfo);	/* frame_crop_right_offset */
1132 		bs_read_ue (bufInfo);	/* frame_crop_top_offset */
1133 		bs_read_ue (bufInfo);	/* frame_crop_bottom_offset */
1134 	}
1135 	temp = bs_read_u (bufInfo, 1);	/* VUI Parameters  */
1136 }
1137 
h264_slice_header(h264_decode * dec,bufferInfo * bufInfo)1138 static void h264_slice_header (h264_decode * dec, bufferInfo * bufInfo)
1139 {
1140 	uint32_t temp;
1141 
1142 	bs_read_ue (bufInfo);	/* first_mb_in_slice */
1143 	temp = bs_read_ue (bufInfo);
1144 	dec->slice_type = ((temp > 5) ? (temp - 5) : temp);
1145 }
1146 
h264_parse_nal(h264_decode * dec,bufferInfo * bufInfo)1147 static uint8_t h264_parse_nal (h264_decode * dec, bufferInfo * bufInfo)
1148 {
1149 	uint8_t type = 0;
1150 
1151 	h264_check_0s (bufInfo, 1);
1152 	dec->nal_ref_idc = bs_read_u (bufInfo, 2);
1153 	dec->nal_unit_type = type = bs_read_u (bufInfo, 5);
1154 	switch (type)
1155 	{
1156 	case H264_NAL_TYPE_NON_IDR_SLICE:
1157 	case H264_NAL_TYPE_IDR_SLICE:
1158 		h264_slice_header (dec, bufInfo);
1159 		break;
1160 	case H264_NAL_TYPE_SEQ_PARAM:
1161 		h264_parse_sequence_parameter_set (dec, bufInfo);
1162 		break;
1163 	case H264_NAL_TYPE_PIC_PARAM:
1164 	case H264_NAL_TYPE_SEI:
1165 	case H264_NAL_TYPE_ACCESS_UNIT:
1166 	case H264_NAL_TYPE_SEQ_EXTENSION:
1167 		/* NOP */
1168 		break;
1169 	default:
1170 		printf ("Nal type unknown %d \n ", type);
1171 		break;
1172 	}
1173 	return type;
1174 }
1175 
h264_find_next_start_code(uint8_t * pBuf,uint32_t bufLen)1176 static uint32_t h264_find_next_start_code (uint8_t * pBuf, uint32_t bufLen)
1177 {
1178 	uint32_t val;
1179 	uint32_t offset, startBytes;
1180 
1181 	offset = startBytes = 0;
1182 	if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 0 && pBuf[3] == 1) {
1183 		pBuf += 4;
1184 		offset = 4;
1185 		startBytes = 1;
1186 	} else if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 1) {
1187 		pBuf += 3;
1188 		offset = 3;
1189 		startBytes = 1;
1190 	}
1191 	val = 0xffffffff;
1192 	while (offset < bufLen - 3) {
1193 		val <<= 8;
1194 		val |= *pBuf++;
1195 		offset++;
1196 		if (val == H264_START_CODE)
1197 			return offset - 4;
1198 
1199 		if ((val & 0x00ffffff) == H264_START_CODE)
1200 			return offset - 3;
1201 	}
1202 	if (bufLen - offset <= 3 && startBytes == 0) {
1203 		startBytes = 0;
1204 		return 0;
1205 	}
1206 
1207 	return offset;
1208 }
1209 
verify_checksum(uint8_t * buffer,uint32_t buffer_size)1210 static int verify_checksum(uint8_t *buffer, uint32_t buffer_size)
1211 {
1212 	uint32_t buffer_pos = 0;
1213 	int done = 0;
1214 	h264_decode dec;
1215 
1216 	memset(&dec, 0, sizeof(h264_decode));
1217 	do {
1218 		uint32_t ret;
1219 
1220 		ret = h264_find_next_start_code (buffer + buffer_pos,
1221 				 buffer_size - buffer_pos);
1222 		if (ret == 0) {
1223 			done = 1;
1224 			if (buffer_pos == 0) {
1225 				fprintf (stderr,
1226 				 "couldn't find start code in buffer from 0\n");
1227 			}
1228 		} else {
1229 		/* have a complete NAL from buffer_pos to end */
1230 			if (ret > 3) {
1231 				uint32_t nal_len;
1232 				bufferInfo bufinfo;
1233 
1234 				nal_len = remove_03 (buffer + buffer_pos, ret);
1235 				bufinfo.decBuffer = buffer + buffer_pos + (buffer[buffer_pos + 2] == 1 ? 3 : 4);
1236 				bufinfo.decBufferSize = (nal_len - (buffer[buffer_pos + 2] == 1 ? 3 : 4)) * 8;
1237 				bufinfo.end = buffer + buffer_pos + nal_len;
1238 				bufinfo.numOfBitsInBuffer = 8;
1239 				bufinfo.decData = *bufinfo.decBuffer;
1240 				h264_parse_nal (&dec, &bufinfo);
1241 			}
1242 			buffer_pos += ret;	/*  buffer_pos points to next code */
1243 		}
1244 	} while (done == 0);
1245 
1246 	if ((dec.pic_width == gWidth) &&
1247 		(dec.pic_height == gHeight) &&
1248 		(dec.slice_type == gSliceType))
1249 	    return 0;
1250 	else
1251 		return -1;
1252 }
1253 
check_result(struct amdgpu_vcn_bo fb_buf,struct amdgpu_vcn_bo bs_buf,int frame_type)1254 static void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_buf, int frame_type)
1255 {
1256 	uint32_t *fb_ptr;
1257 	uint8_t *bs_ptr;
1258 	uint32_t size;
1259 	int r;
1260 /* 	uint64_t s[3] = {0, 1121279001727, 1059312481445}; */
1261 
1262 	r = amdgpu_bo_cpu_map(fb_buf.handle, (void **)&fb_buf.ptr);
1263 	CU_ASSERT_EQUAL(r, 0);
1264 	fb_ptr = (uint32_t*)fb_buf.ptr;
1265 	size = fb_ptr[6];
1266 	r = amdgpu_bo_cpu_unmap(fb_buf.handle);
1267 	CU_ASSERT_EQUAL(r, 0);
1268 	r = amdgpu_bo_cpu_map(bs_buf.handle, (void **)&bs_buf.ptr);
1269 	CU_ASSERT_EQUAL(r, 0);
1270 
1271 	bs_ptr = (uint8_t*)bs_buf.ptr;
1272 	r = verify_checksum(bs_ptr, size);
1273 	CU_ASSERT_EQUAL(r, 0);
1274 	r = amdgpu_bo_cpu_unmap(bs_buf.handle);
1275 
1276 	CU_ASSERT_EQUAL(r, 0);
1277 }
1278 
amdgpu_cs_vcn_ib_zero_count(int * len,int num)1279 static void amdgpu_cs_vcn_ib_zero_count(int *len, int num)
1280 {
1281 	for (int i = 0; i < num; i++)
1282 		ib_cpu[(*len)++] = 0;
1283 }
1284 
amdgpu_cs_vcn_enc_encode_frame(int frame_type)1285 static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
1286 {
1287 	struct amdgpu_vcn_bo bs_buf, fb_buf, input_buf;
1288 	int len, r;
1289 	unsigned width = 160, height = 128, buf_size;
1290 	uint32_t *p_task_size = NULL;
1291 	uint32_t task_offset = 0, st_offset;
1292 	uint32_t *st_size = NULL;
1293 	uint32_t fw_maj = 1, fw_min = 9;
1294 
1295 	if (vcn_ip_version_major == 2) {
1296 		fw_maj = 1;
1297 		fw_min = 1;
1298 	} else if (vcn_ip_version_major == 3) {
1299 		fw_maj = 1;
1300 		fw_min = 0;
1301 	}
1302 	gSliceType = frame_type;
1303 	buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;
1304 
1305 	num_resources = 0;
1306 	alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
1307 	alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
1308 	alloc_resource(&input_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT);
1309 	resources[num_resources++] = enc_buf.handle;
1310 	resources[num_resources++] = cpb_buf.handle;
1311 	resources[num_resources++] = bs_buf.handle;
1312 	resources[num_resources++] = fb_buf.handle;
1313 	resources[num_resources++] = input_buf.handle;
1314 	resources[num_resources++] = ib_handle;
1315 
1316 
1317 	r = amdgpu_bo_cpu_map(bs_buf.handle, (void**)&bs_buf.ptr);
1318 	memset(bs_buf.ptr, 0, 4096);
1319 	r = amdgpu_bo_cpu_unmap(bs_buf.handle);
1320 
1321 	r = amdgpu_bo_cpu_map(fb_buf.handle, (void**)&fb_buf.ptr);
1322 	memset(fb_buf.ptr, 0, 4096);
1323 	r = amdgpu_bo_cpu_unmap(fb_buf.handle);
1324 
1325 	r = amdgpu_bo_cpu_map(input_buf.handle, (void **)&input_buf.ptr);
1326 	CU_ASSERT_EQUAL(r, 0);
1327 
1328 	for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++)
1329 		memcpy(input_buf.ptr + i * ALIGN(width, 256), frame + i * width, width);
1330 
1331 	r = amdgpu_bo_cpu_unmap(input_buf.handle);
1332 	CU_ASSERT_EQUAL(r, 0);
1333 
1334 	len = 0;
1335 
1336 	if (vcn_unified_ring)
1337 		amdgpu_cs_sq_head(ib_cpu, &len, true);
1338 
1339 	/* session info */
1340 	st_offset = len;
1341 	st_size = &ib_cpu[len++];	/* size */
1342 	ib_cpu[len++] = 0x00000001;	/* RENCODE_IB_PARAM_SESSION_INFO */
1343 	ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
1344 	ib_cpu[len++] = enc_buf.addr >> 32;
1345 	ib_cpu[len++] = enc_buf.addr;
1346 	ib_cpu[len++] = 1;	/* RENCODE_ENGINE_TYPE_ENCODE */;
1347 	*st_size = (len - st_offset) * 4;
1348 
1349 	/* task info */
1350 	task_offset = len;
1351 	st_offset = len;
1352 	st_size = &ib_cpu[len++];	/* size */
1353 	ib_cpu[len++] = 0x00000002;	/* RENCODE_IB_PARAM_TASK_INFO */
1354 	p_task_size = &ib_cpu[len++];
1355 	ib_cpu[len++] = enc_task_id++;	/* task_id */
1356 	ib_cpu[len++] = 1;	/* feedback */
1357 	*st_size = (len - st_offset) * 4;
1358 
1359 	if (frame_type == 2) {
1360 		/* sps */
1361 		st_offset = len;
1362 		st_size = &ib_cpu[len++];	/* size */
1363 		if(vcn_ip_version_major == 1)
1364 			ib_cpu[len++] = 0x00000020;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */
1365 		else
1366 			ib_cpu[len++] = 0x0000000a;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn */
1367 		ib_cpu[len++] = 0x00000002;	/* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */
1368 		ib_cpu[len++] = 0x00000011;	/* sps len */
1369 		ib_cpu[len++] = 0x00000001;	/* start code */
1370 		ib_cpu[len++] = 0x6764440b;
1371 		ib_cpu[len++] = 0xac54c284;
1372 		ib_cpu[len++] = 0x68078442;
1373 		ib_cpu[len++] = 0x37000000;
1374 		*st_size = (len - st_offset) * 4;
1375 
1376 		/* pps */
1377 		st_offset = len;
1378 		st_size = &ib_cpu[len++];	/* size */
1379 		if(vcn_ip_version_major == 1)
1380 			ib_cpu[len++] = 0x00000020;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/
1381 		else
1382 			ib_cpu[len++] = 0x0000000a;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn*/
1383 		ib_cpu[len++] = 0x00000003;	/* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */
1384 		ib_cpu[len++] = 0x00000008;	/* pps len */
1385 		ib_cpu[len++] = 0x00000001;	/* start code */
1386 		ib_cpu[len++] = 0x68ce3c80;
1387 		*st_size = (len - st_offset) * 4;
1388 	}
1389 
1390 	/* slice header */
1391 	st_offset = len;
1392 	st_size = &ib_cpu[len++];	/* size */
1393 	if(vcn_ip_version_major == 1)
1394 		ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */
1395 	else
1396 		ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER other vcn */
1397 	if (frame_type == 2) {
1398 		ib_cpu[len++] = 0x65000000;
1399 		ib_cpu[len++] = 0x11040000;
1400 	} else {
1401 		ib_cpu[len++] = 0x41000000;
1402 		ib_cpu[len++] = 0x34210000;
1403 	}
1404 	ib_cpu[len++] = 0xe0000000;
1405 	amdgpu_cs_vcn_ib_zero_count(&len, 13);
1406 
1407 	ib_cpu[len++] = 0x00000001;
1408 	ib_cpu[len++] = 0x00000008;
1409 	ib_cpu[len++] = 0x00020000;
1410 	ib_cpu[len++] = 0x00000000;
1411 	ib_cpu[len++] = 0x00000001;
1412 	ib_cpu[len++] = 0x00000015;
1413 	ib_cpu[len++] = 0x00020001;
1414 	ib_cpu[len++] = 0x00000000;
1415 	ib_cpu[len++] = 0x00000001;
1416 	ib_cpu[len++] = 0x00000003;
1417 	amdgpu_cs_vcn_ib_zero_count(&len, 22);
1418 	*st_size = (len - st_offset) * 4;
1419 
1420 	/* encode params */
1421 	st_offset = len;
1422 	st_size = &ib_cpu[len++];	/* size */
1423 	if(vcn_ip_version_major == 1)
1424 		ib_cpu[len++] = 0x0000000b;	/* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1 */
1425 	else
1426 		ib_cpu[len++] = 0x0000000f;	/* RENCODE_IB_PARAM_ENCODE_PARAMS other vcn */
1427 	ib_cpu[len++] = frame_type;
1428 	ib_cpu[len++] = 0x0001f000;
1429 	ib_cpu[len++] = input_buf.addr >> 32;
1430 	ib_cpu[len++] = input_buf.addr;
1431 	ib_cpu[len++] = (input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32;
1432 	ib_cpu[len++] = input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32);
1433 	ib_cpu[len++] = 0x00000100;
1434 	ib_cpu[len++] = 0x00000080;
1435 	ib_cpu[len++] = 0x00000000;
1436 	ib_cpu[len++] = 0xffffffff;
1437 	ib_cpu[len++] = 0x00000000;
1438 	*st_size = (len - st_offset) * 4;
1439 
1440 	/* encode params h264 */
1441 	st_offset = len;
1442 	st_size = &ib_cpu[len++];	/* size */
1443 	ib_cpu[len++] = 0x00200003;	/* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */
1444 	if (vcn_ip_version_major <= 2) {
1445 		ib_cpu[len++] = 0x00000000;
1446 		ib_cpu[len++] = 0x00000000;
1447 		ib_cpu[len++] = 0x00000000;
1448 		ib_cpu[len++] = 0xffffffff;
1449 	} else {
1450 		ib_cpu[len++] = 0x00000000;
1451 		ib_cpu[len++] = 0x00000000;
1452 		ib_cpu[len++] = 0x00000000;
1453 		ib_cpu[len++] = 0x00000000;
1454 		ib_cpu[len++] = 0x00000000;
1455 		ib_cpu[len++] = 0x00000000;
1456 		ib_cpu[len++] = 0x00000000;
1457 		ib_cpu[len++] = 0xffffffff;
1458 		ib_cpu[len++] = 0x00000000;
1459 		ib_cpu[len++] = 0x00000000;
1460 		ib_cpu[len++] = 0x00000000;
1461 		ib_cpu[len++] = 0x00000000;
1462 		ib_cpu[len++] = 0xffffffff;
1463 		ib_cpu[len++] = 0x00000000;
1464 		ib_cpu[len++] = 0x00000000;
1465 		ib_cpu[len++] = 0x00000000;
1466 		ib_cpu[len++] = 0x00000000;
1467 		ib_cpu[len++] = 0x00000001;
1468 	}
1469 	*st_size = (len - st_offset) * 4;
1470 
1471 	/* encode context */
1472 	st_offset = len;
1473 	st_size = &ib_cpu[len++];	/* size */
1474 	if(vcn_ip_version_major == 1)
1475 		ib_cpu[len++] = 0x0000000d;	/* ENCODE_CONTEXT_BUFFER  vcn 1 */
1476 	else
1477 		ib_cpu[len++] = 0x00000011;	/* ENCODE_CONTEXT_BUFFER  other vcn */
1478 	ib_cpu[len++] = cpb_buf.addr >> 32;
1479 	ib_cpu[len++] = cpb_buf.addr;
1480 	ib_cpu[len++] = 0x00000000;	/* swizzle mode */
1481 	ib_cpu[len++] = 0x00000100;	/* luma pitch */
1482 	ib_cpu[len++] = 0x00000100;	/* chroma pitch */
1483 	ib_cpu[len++] = 0x00000002; /* no reconstructed picture */
1484 	ib_cpu[len++] = 0x00000000;	/* reconstructed pic 1 luma offset */
1485 	ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32);	/* pic1 chroma offset */
1486 	if(vcn_ip_version_major == 4)
1487 		amdgpu_cs_vcn_ib_zero_count(&len, 2);
1488 	ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;	/* pic2 luma offset */
1489 	ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2;	/* pic2 chroma offset */
1490 
1491 	amdgpu_cs_vcn_ib_zero_count(&len, 280);
1492 	*st_size = (len - st_offset) * 4;
1493 
1494 	/* bitstream buffer */
1495 	st_offset = len;
1496 	st_size = &ib_cpu[len++];	/* size */
1497 	if(vcn_ip_version_major == 1)
1498 		ib_cpu[len++] = 0x0000000e;	/* VIDEO_BITSTREAM_BUFFER vcn 1 */
1499 	else
1500 		ib_cpu[len++] = 0x00000012;	/* VIDEO_BITSTREAM_BUFFER other vcn */
1501 
1502 	ib_cpu[len++] = 0x00000000;	/* mode */
1503 	ib_cpu[len++] = bs_buf.addr >> 32;
1504 	ib_cpu[len++] = bs_buf.addr;
1505 	ib_cpu[len++] = 0x0001f000;
1506 	ib_cpu[len++] = 0x00000000;
1507 	*st_size = (len - st_offset) * 4;
1508 
1509 	/* feedback */
1510 	st_offset = len;
1511 	st_size = &ib_cpu[len++];	/* size */
1512 	if(vcn_ip_version_major == 1)
1513 		ib_cpu[len++] = 0x00000010;	/* FEEDBACK_BUFFER vcn 1 */
1514 	else
1515 		ib_cpu[len++] = 0x00000015;	/* FEEDBACK_BUFFER vcn 2,3 */
1516 	ib_cpu[len++] = 0x00000000;
1517 	ib_cpu[len++] = fb_buf.addr >> 32;
1518 	ib_cpu[len++] = fb_buf.addr;
1519 	ib_cpu[len++] = 0x00000010;
1520 	ib_cpu[len++] = 0x00000028;
1521 	*st_size = (len - st_offset) * 4;
1522 
1523 	/* intra refresh */
1524 	st_offset = len;
1525 	st_size = &ib_cpu[len++];
1526 	if(vcn_ip_version_major == 1)
1527 		ib_cpu[len++] = 0x0000000c;	/* INTRA_REFRESH vcn 1 */
1528 	else
1529 		ib_cpu[len++] = 0x00000010;	/* INTRA_REFRESH vcn 2,3 */
1530 	ib_cpu[len++] = 0x00000000;
1531 	ib_cpu[len++] = 0x00000000;
1532 	ib_cpu[len++] = 0x00000000;
1533 	*st_size = (len - st_offset) * 4;
1534 
1535 	if(vcn_ip_version_major != 1) {
1536 		/* Input Format */
1537 		st_offset = len;
1538 		st_size = &ib_cpu[len++];
1539 		ib_cpu[len++] = 0x0000000c;
1540 		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_VOLUME_G22_BT709 */
1541 		ib_cpu[len++] = 0x00000000;
1542 		ib_cpu[len++] = 0x00000000;
1543 		ib_cpu[len++] = 0x00000000;
1544 		ib_cpu[len++] = 0x00000000;
1545 		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_BIT_DEPTH_8_BIT */
1546 		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_PACKING_FORMAT_NV12 */
1547 		*st_size = (len - st_offset) * 4;
1548 
1549 		/* Output Format */
1550 		st_offset = len;
1551 		st_size = &ib_cpu[len++];
1552 		ib_cpu[len++] = 0x0000000d;
1553 		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_VOLUME_G22_BT709 */
1554 		ib_cpu[len++] = 0x00000000;
1555 		ib_cpu[len++] = 0x00000000;
1556 		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_BIT_DEPTH_8_BIT */
1557 		*st_size = (len - st_offset) * 4;
1558 	}
1559 	/* op_speed */
1560 	st_offset = len;
1561 	st_size = &ib_cpu[len++];
1562 	ib_cpu[len++] = 0x01000006;	/* SPEED_ENCODING_MODE */
1563 	*st_size = (len - st_offset) * 4;
1564 
1565 	/* op_enc */
1566 	st_offset = len;
1567 	st_size = &ib_cpu[len++];
1568 	ib_cpu[len++] = 0x01000003;
1569 	*st_size = (len - st_offset) * 4;
1570 
1571 	*p_task_size = (len - task_offset) * 4;
1572 
1573 	if (vcn_unified_ring)
1574 		amdgpu_cs_sq_ib_tail(ib_cpu + len);
1575 
1576 	r = submit(len, AMDGPU_HW_IP_VCN_ENC);
1577 	CU_ASSERT_EQUAL(r, 0);
1578 
1579 	/* check result */
1580 	check_result(fb_buf, bs_buf, frame_type);
1581 
1582 	free_resource(&fb_buf);
1583 	free_resource(&bs_buf);
1584 	free_resource(&input_buf);
1585 }
1586 
amdgpu_cs_vcn_enc_encode(void)1587 static void amdgpu_cs_vcn_enc_encode(void)
1588 {
1589 	amdgpu_cs_vcn_enc_encode_frame(2);	/* IDR frame */
1590 }
1591 
amdgpu_cs_vcn_enc_destroy(void)1592 static void amdgpu_cs_vcn_enc_destroy(void)
1593 {
1594 	int len = 0, r;
1595 	uint32_t *p_task_size = NULL;
1596 	uint32_t task_offset = 0, st_offset;
1597 	uint32_t *st_size = NULL;
1598 	uint32_t fw_maj = 1, fw_min = 9;
1599 
1600 	if (vcn_ip_version_major == 2) {
1601 		fw_maj = 1;
1602 		fw_min = 1;
1603 	} else if (vcn_ip_version_major == 3) {
1604 		fw_maj = 1;
1605 		fw_min = 0;
1606 	}
1607 
1608 	num_resources = 0;
1609 /* 	alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); */
1610 	resources[num_resources++] = enc_buf.handle;
1611 	resources[num_resources++] = ib_handle;
1612 
1613 	if (vcn_unified_ring)
1614 		amdgpu_cs_sq_head(ib_cpu, &len, true);
1615 
1616 	/* session info */
1617 	st_offset = len;
1618 	st_size = &ib_cpu[len++];	/* size */
1619 	ib_cpu[len++] = 0x00000001;	/* RENCODE_IB_PARAM_SESSION_INFO */
1620 	ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
1621 	ib_cpu[len++] = enc_buf.addr >> 32;
1622 	ib_cpu[len++] = enc_buf.addr;
1623 	ib_cpu[len++] = 1;	/* RENCODE_ENGINE_TYPE_ENCODE; */
1624 	*st_size = (len - st_offset) * 4;
1625 
1626 	/* task info */
1627 	task_offset = len;
1628 	st_offset = len;
1629 	st_size = &ib_cpu[len++];	/* size */
1630 	ib_cpu[len++] = 0x00000002;	/* RENCODE_IB_PARAM_TASK_INFO */
1631 	p_task_size = &ib_cpu[len++];
1632 	ib_cpu[len++] = enc_task_id++;	/* task_id */
1633 	ib_cpu[len++] = 0;	/* feedback */
1634 	*st_size = (len - st_offset) * 4;
1635 
1636 	/*  op close */
1637 	st_offset = len;
1638 	st_size = &ib_cpu[len++];
1639 	ib_cpu[len++] = 0x01000002;	/* RENCODE_IB_OP_CLOSE_SESSION */
1640 	*st_size = (len - st_offset) * 4;
1641 
1642 	*p_task_size = (len - task_offset) * 4;
1643 
1644 	if (vcn_unified_ring)
1645 		amdgpu_cs_sq_ib_tail(ib_cpu + len);
1646 
1647 	r = submit(len, AMDGPU_HW_IP_VCN_ENC);
1648 	CU_ASSERT_EQUAL(r, 0);
1649 
1650 	free_resource(&cpb_buf);
1651 	free_resource(&enc_buf);
1652 }
1653