1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2 /*
3 * Wave5 series multi-standard codec IP - decoder interface
4 *
5 * Copyright (C) 2021-2023 CHIPS&MEDIA INC
6 */
7
8 #include <linux/pm_runtime.h>
9 #include "wave5-helper.h"
10
11 #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder"
12 #define VPU_DEC_DRV_NAME "wave5-dec"
13
14 static const struct v4l2_frmsize_stepwise dec_hevc_frmsize = {
15 .min_width = W5_MIN_DEC_PIC_8_WIDTH,
16 .max_width = W5_MAX_DEC_PIC_WIDTH,
17 .step_width = W5_DEC_CODEC_STEP_WIDTH,
18 .min_height = W5_MIN_DEC_PIC_8_HEIGHT,
19 .max_height = W5_MAX_DEC_PIC_HEIGHT,
20 .step_height = W5_DEC_CODEC_STEP_HEIGHT,
21 };
22
23 static const struct v4l2_frmsize_stepwise dec_h264_frmsize = {
24 .min_width = W5_MIN_DEC_PIC_32_WIDTH,
25 .max_width = W5_MAX_DEC_PIC_WIDTH,
26 .step_width = W5_DEC_CODEC_STEP_WIDTH,
27 .min_height = W5_MIN_DEC_PIC_32_HEIGHT,
28 .max_height = W5_MAX_DEC_PIC_HEIGHT,
29 .step_height = W5_DEC_CODEC_STEP_HEIGHT,
30 };
31
32 static const struct v4l2_frmsize_stepwise dec_raw_frmsize = {
33 .min_width = W5_MIN_DEC_PIC_8_WIDTH,
34 .max_width = W5_MAX_DEC_PIC_WIDTH,
35 .step_width = W5_DEC_RAW_STEP_WIDTH,
36 .min_height = W5_MIN_DEC_PIC_8_HEIGHT,
37 .max_height = W5_MAX_DEC_PIC_HEIGHT,
38 .step_height = W5_DEC_RAW_STEP_HEIGHT,
39 };
40
41 static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = {
42 [VPU_FMT_TYPE_CODEC] = {
43 {
44 .v4l2_pix_fmt = V4L2_PIX_FMT_HEVC,
45 .v4l2_frmsize = &dec_hevc_frmsize,
46 },
47 {
48 .v4l2_pix_fmt = V4L2_PIX_FMT_H264,
49 .v4l2_frmsize = &dec_h264_frmsize,
50 },
51 },
52 [VPU_FMT_TYPE_RAW] = {
53 {
54 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420,
55 .v4l2_frmsize = &dec_raw_frmsize,
56 },
57 {
58 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12,
59 .v4l2_frmsize = &dec_raw_frmsize,
60 },
61 {
62 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21,
63 .v4l2_frmsize = &dec_raw_frmsize,
64 },
65 {
66 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P,
67 .v4l2_frmsize = &dec_raw_frmsize,
68 },
69 {
70 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16,
71 .v4l2_frmsize = &dec_raw_frmsize,
72 },
73 {
74 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61,
75 .v4l2_frmsize = &dec_raw_frmsize,
76 },
77 {
78 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M,
79 .v4l2_frmsize = &dec_raw_frmsize,
80 },
81 {
82 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12M,
83 .v4l2_frmsize = &dec_raw_frmsize,
84 },
85 {
86 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21M,
87 .v4l2_frmsize = &dec_raw_frmsize,
88 },
89 {
90 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M,
91 .v4l2_frmsize = &dec_raw_frmsize,
92 },
93 {
94 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16M,
95 .v4l2_frmsize = &dec_raw_frmsize,
96 },
97 {
98 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61M,
99 .v4l2_frmsize = &dec_raw_frmsize,
100 },
101 }
102 };
103
104 /*
105 * Make sure that the state switch is allowed and add logging for debugging
106 * purposes
107 */
switch_state(struct vpu_instance * inst,enum vpu_instance_state state)108 static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state)
109 {
110 switch (state) {
111 case VPU_INST_STATE_NONE:
112 break;
113 case VPU_INST_STATE_OPEN:
114 if (inst->state != VPU_INST_STATE_NONE)
115 goto invalid_state_switch;
116 goto valid_state_switch;
117 case VPU_INST_STATE_INIT_SEQ:
118 if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP)
119 goto invalid_state_switch;
120 goto valid_state_switch;
121 case VPU_INST_STATE_PIC_RUN:
122 if (inst->state != VPU_INST_STATE_INIT_SEQ)
123 goto invalid_state_switch;
124 goto valid_state_switch;
125 case VPU_INST_STATE_STOP:
126 goto valid_state_switch;
127 }
128 invalid_state_switch:
129 WARN(1, "Invalid state switch from %s to %s.\n",
130 state_to_str(inst->state), state_to_str(state));
131 return -EINVAL;
132 valid_state_switch:
133 dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n",
134 state_to_str(inst->state), state_to_str(state));
135 inst->state = state;
136 return 0;
137 }
138
wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance * inst)139 static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst)
140 {
141 int ret;
142
143 ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0);
144 if (ret) {
145 /*
146 * To set the EOS flag, a command is sent to the firmware.
147 * That command may never return (timeout) or may report an error.
148 */
149 dev_err(inst->dev->dev,
150 "Setting EOS for the bitstream, fail: %d\n", ret);
151 return ret;
152 }
153 return 0;
154 }
155
wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx * m2m_ctx)156 static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx)
157 {
158 struct vpu_src_buffer *vpu_buf;
159
160 if (!m2m_ctx->last_src_buf)
161 return false;
162
163 vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
164 return vpu_buf->consumed;
165 }
166
wave5_handle_src_buffer(struct vpu_instance * inst,dma_addr_t rd_ptr)167 static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr)
168 {
169 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
170 struct v4l2_m2m_buffer *buf, *n;
171 size_t consumed_bytes = 0;
172
173 if (rd_ptr >= inst->last_rd_ptr) {
174 consumed_bytes = rd_ptr - inst->last_rd_ptr;
175 } else {
176 size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr;
177 size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr;
178
179 consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs);
180 }
181
182 inst->last_rd_ptr = rd_ptr;
183 consumed_bytes += inst->remaining_consumed_bytes;
184
185 dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__,
186 consumed_bytes);
187
188 v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
189 struct vb2_v4l2_buffer *src_buf = &buf->vb;
190 size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
191
192 if (src_size > consumed_bytes)
193 break;
194
195 dev_dbg(inst->dev->dev, "%s: removing src buffer %i",
196 __func__, src_buf->vb2_buf.index);
197 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
198 inst->timestamp = src_buf->vb2_buf.timestamp;
199 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
200 consumed_bytes -= src_size;
201
202 /* Handle the case the last bitstream buffer has been picked */
203 if (src_buf == m2m_ctx->last_src_buf) {
204 int ret;
205
206 m2m_ctx->last_src_buf = NULL;
207 ret = wave5_vpu_dec_set_eos_on_firmware(inst);
208 if (ret)
209 dev_warn(inst->dev->dev,
210 "Setting EOS for the bitstream, fail: %d\n", ret);
211 break;
212 }
213 }
214
215 inst->remaining_consumed_bytes = consumed_bytes;
216 }
217
start_decode(struct vpu_instance * inst,u32 * fail_res)218 static int start_decode(struct vpu_instance *inst, u32 *fail_res)
219 {
220 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
221 int ret = 0;
222
223 ret = wave5_vpu_dec_start_one_frame(inst, fail_res);
224 if (ret) {
225 struct vb2_v4l2_buffer *src_buf;
226
227 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
228 if (src_buf)
229 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
230 switch_state(inst, VPU_INST_STATE_STOP);
231
232 dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__);
233 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
234 }
235
236 return ret;
237 }
238
flag_last_buffer_done(struct vpu_instance * inst)239 static void flag_last_buffer_done(struct vpu_instance *inst)
240 {
241 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
242 struct vb2_v4l2_buffer *vb;
243 int i;
244
245 lockdep_assert_held(&inst->state_spinlock);
246
247 vb = v4l2_m2m_dst_buf_remove(m2m_ctx);
248 if (!vb) {
249 m2m_ctx->is_draining = true;
250 m2m_ctx->next_buf_last = true;
251 return;
252 }
253
254 for (i = 0; i < vb->vb2_buf.num_planes; i++)
255 vb2_set_plane_payload(&vb->vb2_buf, i, 0);
256 vb->field = V4L2_FIELD_NONE;
257
258 v4l2_m2m_last_buffer_done(m2m_ctx, vb);
259 }
260
send_eos_event(struct vpu_instance * inst)261 static void send_eos_event(struct vpu_instance *inst)
262 {
263 static const struct v4l2_event vpu_event_eos = {
264 .type = V4L2_EVENT_EOS
265 };
266
267 lockdep_assert_held(&inst->state_spinlock);
268
269 v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
270 inst->eos = false;
271 }
272
handle_dynamic_resolution_change(struct vpu_instance * inst)273 static int handle_dynamic_resolution_change(struct vpu_instance *inst)
274 {
275 struct v4l2_fh *fh = &inst->v4l2_fh;
276 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
277
278 static const struct v4l2_event vpu_event_src_ch = {
279 .type = V4L2_EVENT_SOURCE_CHANGE,
280 .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION,
281 };
282 struct dec_info *p_dec_info = &inst->codec_info->dec_info;
283 struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info;
284
285 lockdep_assert_held(&inst->state_spinlock);
286
287 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr);
288
289 dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n",
290 __func__, initial_info->pic_width, initial_info->pic_height,
291 initial_info->profile, initial_info->min_frame_buffer_count);
292
293 inst->needs_reallocation = true;
294 inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1;
295 if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) {
296 struct v4l2_ctrl *ctrl;
297
298 ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl,
299 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE);
300 if (ctrl)
301 v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count);
302 }
303
304 if (p_dec_info->initial_info_obtained) {
305 const struct vpu_format *vpu_fmt;
306
307 inst->conf_win.left = initial_info->pic_crop_rect.left;
308 inst->conf_win.top = initial_info->pic_crop_rect.top;
309 inst->conf_win.width = initial_info->pic_width -
310 initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right;
311 inst->conf_win.height = initial_info->pic_height -
312 initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom;
313
314 vpu_fmt = wave5_find_vpu_fmt(inst->src_fmt.pixelformat,
315 dec_fmt_list[VPU_FMT_TYPE_CODEC]);
316 if (!vpu_fmt)
317 return -EINVAL;
318
319 wave5_update_pix_fmt(&inst->src_fmt,
320 VPU_FMT_TYPE_CODEC,
321 initial_info->pic_width,
322 initial_info->pic_height,
323 vpu_fmt->v4l2_frmsize);
324
325 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat,
326 dec_fmt_list[VPU_FMT_TYPE_RAW]);
327 if (!vpu_fmt)
328 return -EINVAL;
329
330 wave5_update_pix_fmt(&inst->dst_fmt,
331 VPU_FMT_TYPE_RAW,
332 initial_info->pic_width,
333 initial_info->pic_height,
334 vpu_fmt->v4l2_frmsize);
335 }
336
337 v4l2_event_queue_fh(fh, &vpu_event_src_ch);
338
339 return 0;
340 }
341
wave5_vpu_dec_finish_decode(struct vpu_instance * inst)342 static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
343 {
344 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
345 struct dec_output_info dec_info;
346 int ret;
347 struct vb2_v4l2_buffer *dec_buf = NULL;
348 struct vb2_v4l2_buffer *disp_buf = NULL;
349 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
350 struct queue_status_info q_status;
351
352 dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__);
353
354 ret = wave5_vpu_dec_get_output_info(inst, &dec_info);
355 if (ret) {
356 dev_warn(inst->dev->dev, "%s: could not get output info.", __func__);
357 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
358 return;
359 }
360
361 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr,
362 &dec_info.wr_ptr);
363 wave5_handle_src_buffer(inst, dec_info.rd_ptr);
364
365 dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__,
366 dec_info.index_frame_decoded, dec_info.index_frame_display);
367
368 if (!vb2_is_streaming(dst_vq)) {
369 dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__);
370 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
371 return;
372 }
373
374 /* Remove decoded buffer from the ready queue now that it has been
375 * decoded.
376 */
377 if (dec_info.index_frame_decoded >= 0) {
378 struct vb2_buffer *vb = vb2_get_buffer(dst_vq,
379 dec_info.index_frame_decoded);
380 if (vb) {
381 dec_buf = to_vb2_v4l2_buffer(vb);
382 dec_buf->vb2_buf.timestamp = inst->timestamp;
383 } else {
384 dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i",
385 __func__, dec_info.index_frame_decoded);
386 }
387 }
388
389 if (dec_info.index_frame_display >= 0) {
390 disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display);
391 if (!disp_buf)
392 dev_warn(inst->dev->dev, "%s: invalid display frame index %i",
393 __func__, dec_info.index_frame_display);
394 }
395
396 /* If there is anything to display, do that now */
397 if (disp_buf) {
398 struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf);
399
400 if (inst->dst_fmt.num_planes == 1) {
401 vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
402 inst->dst_fmt.plane_fmt[0].sizeimage);
403 } else if (inst->dst_fmt.num_planes == 2) {
404 vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
405 inst->dst_fmt.plane_fmt[0].sizeimage);
406 vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
407 inst->dst_fmt.plane_fmt[1].sizeimage);
408 } else if (inst->dst_fmt.num_planes == 3) {
409 vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
410 inst->dst_fmt.plane_fmt[0].sizeimage);
411 vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
412 inst->dst_fmt.plane_fmt[1].sizeimage);
413 vb2_set_plane_payload(&disp_buf->vb2_buf, 2,
414 inst->dst_fmt.plane_fmt[2].sizeimage);
415 }
416
417 /* TODO implement interlace support */
418 disp_buf->field = V4L2_FIELD_NONE;
419 dst_vpu_buf->display = true;
420 v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE);
421
422 dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n",
423 __func__, dec_info.frame_cycle,
424 vb2_get_plane_payload(&disp_buf->vb2_buf, 0));
425 }
426
427 if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END ||
428 dec_info.sequence_changed)) {
429 unsigned long flags;
430
431 spin_lock_irqsave(&inst->state_spinlock, flags);
432 if (!v4l2_m2m_has_stopped(m2m_ctx)) {
433 switch_state(inst, VPU_INST_STATE_STOP);
434
435 if (dec_info.sequence_changed)
436 handle_dynamic_resolution_change(inst);
437 else
438 send_eos_event(inst);
439
440 flag_last_buffer_done(inst);
441 }
442 spin_unlock_irqrestore(&inst->state_spinlock, flags);
443 }
444
445 /*
446 * During a resolution change and while draining, the firmware may flush
447 * the reorder queue regardless of having a matching decoding operation
448 * pending. Only terminate the job if there are no more IRQ coming.
449 */
450 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
451 if (q_status.report_queue_count == 0 &&
452 (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) {
453 dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__);
454 pm_runtime_mark_last_busy(inst->dev->dev);
455 pm_runtime_put_autosuspend(inst->dev->dev);
456 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
457 }
458 }
459
wave5_vpu_dec_querycap(struct file * file,void * fh,struct v4l2_capability * cap)460 static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
461 {
462 strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver));
463 strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card));
464
465 return 0;
466 }
467
wave5_vpu_dec_enum_framesizes(struct file * f,void * fh,struct v4l2_frmsizeenum * fsize)468 static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize)
469 {
470 const struct vpu_format *vpu_fmt;
471
472 if (fsize->index)
473 return -EINVAL;
474
475 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
476 if (!vpu_fmt) {
477 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]);
478 if (!vpu_fmt)
479 return -EINVAL;
480 }
481
482 fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
483 fsize->stepwise.min_width = vpu_fmt->v4l2_frmsize->min_width;
484 fsize->stepwise.max_width = vpu_fmt->v4l2_frmsize->max_width;
485 fsize->stepwise.step_width = W5_DEC_CODEC_STEP_WIDTH;
486 fsize->stepwise.min_height = vpu_fmt->v4l2_frmsize->min_height;
487 fsize->stepwise.max_height = vpu_fmt->v4l2_frmsize->max_height;
488 fsize->stepwise.step_height = W5_DEC_CODEC_STEP_HEIGHT;
489
490 return 0;
491 }
492
wave5_vpu_dec_enum_fmt_cap(struct file * file,void * fh,struct v4l2_fmtdesc * f)493 static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f)
494 {
495 const struct vpu_format *vpu_fmt;
496
497 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]);
498 if (!vpu_fmt)
499 return -EINVAL;
500
501 f->pixelformat = vpu_fmt->v4l2_pix_fmt;
502 f->flags = 0;
503
504 return 0;
505 }
506
wave5_vpu_dec_try_fmt_cap(struct file * file,void * fh,struct v4l2_format * f)507 static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
508 {
509 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
510 struct dec_info *p_dec_info = &inst->codec_info->dec_info;
511 const struct v4l2_frmsize_stepwise *frmsize;
512 const struct vpu_format *vpu_fmt;
513 int width, height;
514
515 dev_dbg(inst->dev->dev,
516 "%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n",
517 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
518 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
519
520 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
521 if (!vpu_fmt) {
522 width = inst->dst_fmt.width;
523 height = inst->dst_fmt.height;
524 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
525 frmsize = &dec_raw_frmsize;
526 } else {
527 width = f->fmt.pix_mp.width;
528 height = f->fmt.pix_mp.height;
529 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
530 frmsize = vpu_fmt->v4l2_frmsize;
531 }
532
533 if (p_dec_info->initial_info_obtained) {
534 width = inst->dst_fmt.width;
535 height = inst->dst_fmt.height;
536 }
537
538 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_RAW,
539 width, height, frmsize);
540 f->fmt.pix_mp.colorspace = inst->colorspace;
541 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
542 f->fmt.pix_mp.quantization = inst->quantization;
543 f->fmt.pix_mp.xfer_func = inst->xfer_func;
544
545 return 0;
546 }
547
wave5_vpu_dec_s_fmt_cap(struct file * file,void * fh,struct v4l2_format * f)548 static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
549 {
550 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
551 int i, ret;
552
553 dev_dbg(inst->dev->dev,
554 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
555 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
556 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
557
558 ret = wave5_vpu_dec_try_fmt_cap(file, fh, f);
559 if (ret)
560 return ret;
561
562 inst->dst_fmt.width = f->fmt.pix_mp.width;
563 inst->dst_fmt.height = f->fmt.pix_mp.height;
564 inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
565 inst->dst_fmt.field = f->fmt.pix_mp.field;
566 inst->dst_fmt.flags = f->fmt.pix_mp.flags;
567 inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes;
568 for (i = 0; i < inst->dst_fmt.num_planes; i++) {
569 inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
570 inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
571 }
572
573 if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 ||
574 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) {
575 inst->cbcr_interleave = true;
576 inst->nv21 = false;
577 inst->output_format = FORMAT_420;
578 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 ||
579 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) {
580 inst->cbcr_interleave = true;
581 inst->nv21 = true;
582 inst->output_format = FORMAT_420;
583 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 ||
584 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) {
585 inst->cbcr_interleave = true;
586 inst->nv21 = false;
587 inst->output_format = FORMAT_422;
588 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 ||
589 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) {
590 inst->cbcr_interleave = true;
591 inst->nv21 = true;
592 inst->output_format = FORMAT_422;
593 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P ||
594 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) {
595 inst->cbcr_interleave = false;
596 inst->nv21 = false;
597 inst->output_format = FORMAT_422;
598 } else {
599 inst->cbcr_interleave = false;
600 inst->nv21 = false;
601 inst->output_format = FORMAT_420;
602 }
603
604 return 0;
605 }
606
wave5_vpu_dec_g_fmt_cap(struct file * file,void * fh,struct v4l2_format * f)607 static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
608 {
609 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
610 int i;
611
612 f->fmt.pix_mp.width = inst->dst_fmt.width;
613 f->fmt.pix_mp.height = inst->dst_fmt.height;
614 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
615 f->fmt.pix_mp.field = inst->dst_fmt.field;
616 f->fmt.pix_mp.flags = inst->dst_fmt.flags;
617 f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
618 for (i = 0; i < f->fmt.pix_mp.num_planes; i++) {
619 f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline;
620 f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage;
621 }
622
623 f->fmt.pix_mp.colorspace = inst->colorspace;
624 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
625 f->fmt.pix_mp.quantization = inst->quantization;
626 f->fmt.pix_mp.xfer_func = inst->xfer_func;
627
628 return 0;
629 }
630
wave5_vpu_dec_enum_fmt_out(struct file * file,void * fh,struct v4l2_fmtdesc * f)631 static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f)
632 {
633 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
634 const struct vpu_format *vpu_fmt;
635
636 dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index);
637
638 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
639 if (!vpu_fmt)
640 return -EINVAL;
641
642 f->pixelformat = vpu_fmt->v4l2_pix_fmt;
643 f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED;
644
645 return 0;
646 }
647
wave5_vpu_dec_try_fmt_out(struct file * file,void * fh,struct v4l2_format * f)648 static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
649 {
650 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
651 const struct v4l2_frmsize_stepwise *frmsize;
652 const struct vpu_format *vpu_fmt;
653 int width, height;
654
655 dev_dbg(inst->dev->dev,
656 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
657 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
658 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
659
660 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
661 if (!vpu_fmt) {
662 width = inst->src_fmt.width;
663 height = inst->src_fmt.height;
664 f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat;
665 frmsize = &dec_hevc_frmsize;
666 } else {
667 width = f->fmt.pix_mp.width;
668 height = f->fmt.pix_mp.height;
669 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
670 frmsize = vpu_fmt->v4l2_frmsize;
671 }
672
673 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_CODEC,
674 width, height, frmsize);
675
676 return 0;
677 }
678
wave5_vpu_dec_s_fmt_out(struct file * file,void * fh,struct v4l2_format * f)679 static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
680 {
681 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
682 const struct vpu_format *vpu_fmt;
683 int i, ret;
684
685 dev_dbg(inst->dev->dev,
686 "%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n",
687 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
688 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field);
689
690 ret = wave5_vpu_dec_try_fmt_out(file, fh, f);
691 if (ret)
692 return ret;
693
694 inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type);
695 if (inst->std == STD_UNKNOWN) {
696 dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n",
697 (char *)&f->fmt.pix_mp.pixelformat);
698 return -EINVAL;
699 }
700
701 inst->src_fmt.width = f->fmt.pix_mp.width;
702 inst->src_fmt.height = f->fmt.pix_mp.height;
703 inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
704 inst->src_fmt.field = f->fmt.pix_mp.field;
705 inst->src_fmt.flags = f->fmt.pix_mp.flags;
706 inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes;
707 for (i = 0; i < inst->src_fmt.num_planes; i++) {
708 inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
709 inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
710 }
711
712 inst->colorspace = f->fmt.pix_mp.colorspace;
713 inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
714 inst->quantization = f->fmt.pix_mp.quantization;
715 inst->xfer_func = f->fmt.pix_mp.xfer_func;
716
717 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
718 if (!vpu_fmt)
719 return -EINVAL;
720
721 wave5_update_pix_fmt(&inst->dst_fmt, VPU_FMT_TYPE_RAW,
722 f->fmt.pix_mp.width, f->fmt.pix_mp.height,
723 vpu_fmt->v4l2_frmsize);
724
725 return 0;
726 }
727
wave5_vpu_dec_g_selection(struct file * file,void * fh,struct v4l2_selection * s)728 static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
729 {
730 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
731
732 dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target);
733
734 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
735 return -EINVAL;
736 switch (s->target) {
737 case V4L2_SEL_TGT_COMPOSE_BOUNDS:
738 case V4L2_SEL_TGT_COMPOSE_PADDED:
739 s->r.left = 0;
740 s->r.top = 0;
741 s->r.width = inst->dst_fmt.width;
742 s->r.height = inst->dst_fmt.height;
743 break;
744 case V4L2_SEL_TGT_COMPOSE:
745 case V4L2_SEL_TGT_COMPOSE_DEFAULT:
746 s->r.left = 0;
747 s->r.top = 0;
748 if (inst->state > VPU_INST_STATE_OPEN) {
749 s->r = inst->conf_win;
750 } else {
751 s->r.width = inst->src_fmt.width;
752 s->r.height = inst->src_fmt.height;
753 }
754 break;
755 default:
756 return -EINVAL;
757 }
758
759 return 0;
760 }
761
wave5_vpu_dec_s_selection(struct file * file,void * fh,struct v4l2_selection * s)762 static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s)
763 {
764 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
765
766 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
767 return -EINVAL;
768
769 if (s->target != V4L2_SEL_TGT_COMPOSE)
770 return -EINVAL;
771
772 dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n",
773 s->r.width, s->r.height);
774
775 s->r.left = 0;
776 s->r.top = 0;
777 s->r.width = inst->dst_fmt.width;
778 s->r.height = inst->dst_fmt.height;
779
780 return 0;
781 }
782
wave5_vpu_dec_stop(struct vpu_instance * inst)783 static int wave5_vpu_dec_stop(struct vpu_instance *inst)
784 {
785 int ret = 0;
786 unsigned long flags;
787 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
788
789 spin_lock_irqsave(&inst->state_spinlock, flags);
790
791 if (m2m_ctx->is_draining) {
792 ret = -EBUSY;
793 goto unlock_and_return;
794 }
795
796 if (inst->state != VPU_INST_STATE_NONE) {
797 /*
798 * Temporarily release the state_spinlock so that subsequent
799 * calls do not block on a mutex while inside this spinlock.
800 */
801 spin_unlock_irqrestore(&inst->state_spinlock, flags);
802 ret = wave5_vpu_dec_set_eos_on_firmware(inst);
803 if (ret)
804 return ret;
805
806 spin_lock_irqsave(&inst->state_spinlock, flags);
807 /*
808 * TODO eliminate this check by using a separate check for
809 * draining triggered by a resolution change.
810 */
811 if (m2m_ctx->is_draining) {
812 ret = -EBUSY;
813 goto unlock_and_return;
814 }
815 }
816
817 /*
818 * Used to remember the EOS state after the streamoff/on transition on
819 * the capture queue.
820 */
821 inst->eos = true;
822
823 if (m2m_ctx->has_stopped)
824 goto unlock_and_return;
825
826 m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx);
827 m2m_ctx->is_draining = true;
828
829 /*
830 * Deferred to device run in case it wasn't in the ring buffer
831 * yet. In other case, we have to send the EOS signal to the
832 * firmware so that any pending PIC_RUN ends without new
833 * bitstream buffer.
834 */
835 if (m2m_ctx->last_src_buf)
836 goto unlock_and_return;
837
838 if (inst->state == VPU_INST_STATE_NONE) {
839 send_eos_event(inst);
840 flag_last_buffer_done(inst);
841 }
842
843 unlock_and_return:
844 spin_unlock_irqrestore(&inst->state_spinlock, flags);
845 return ret;
846 }
847
wave5_vpu_dec_start(struct vpu_instance * inst)848 static int wave5_vpu_dec_start(struct vpu_instance *inst)
849 {
850 int ret = 0;
851 unsigned long flags;
852 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
853 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
854
855 spin_lock_irqsave(&inst->state_spinlock, flags);
856
857 if (m2m_ctx->is_draining) {
858 ret = -EBUSY;
859 goto unlock_and_return;
860 }
861
862 if (m2m_ctx->has_stopped)
863 m2m_ctx->has_stopped = false;
864
865 vb2_clear_last_buffer_dequeued(dst_vq);
866 inst->eos = false;
867
868 unlock_and_return:
869 spin_unlock_irqrestore(&inst->state_spinlock, flags);
870 return ret;
871 }
872
wave5_vpu_dec_decoder_cmd(struct file * file,void * fh,struct v4l2_decoder_cmd * dc)873 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc)
874 {
875 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
876 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
877 int ret;
878
879 dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd);
880
881 ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc);
882 if (ret)
883 return ret;
884
885 switch (dc->cmd) {
886 case V4L2_DEC_CMD_STOP:
887 ret = wave5_vpu_dec_stop(inst);
888 /* Just in case we don't have anything to decode anymore */
889 v4l2_m2m_try_schedule(m2m_ctx);
890 break;
891 case V4L2_DEC_CMD_START:
892 ret = wave5_vpu_dec_start(inst);
893 break;
894 default:
895 ret = -EINVAL;
896 }
897
898 return ret;
899 }
900
901 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = {
902 .vidioc_querycap = wave5_vpu_dec_querycap,
903 .vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes,
904
905 .vidioc_enum_fmt_vid_cap = wave5_vpu_dec_enum_fmt_cap,
906 .vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap,
907 .vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap,
908 .vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap,
909
910 .vidioc_enum_fmt_vid_out = wave5_vpu_dec_enum_fmt_out,
911 .vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out,
912 .vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out,
913 .vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out,
914
915 .vidioc_g_selection = wave5_vpu_dec_g_selection,
916 .vidioc_s_selection = wave5_vpu_dec_s_selection,
917
918 .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
919 /*
920 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since
921 * there is no immediate use-case for supporting CREATE_BUFS on
922 * just the OUTPUT queue, disable CREATE_BUFS altogether.
923 */
924 .vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
925 .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
926 .vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
927 .vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
928 .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
929 .vidioc_streamon = v4l2_m2m_ioctl_streamon,
930 .vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
931
932 .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd,
933 .vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd,
934
935 .vidioc_subscribe_event = wave5_vpu_subscribe_event,
936 .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
937 };
938
wave5_vpu_dec_queue_setup(struct vb2_queue * q,unsigned int * num_buffers,unsigned int * num_planes,unsigned int sizes[],struct device * alloc_devs[])939 static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers,
940 unsigned int *num_planes, unsigned int sizes[],
941 struct device *alloc_devs[])
942 {
943 struct vpu_instance *inst = vb2_get_drv_priv(q);
944 struct v4l2_pix_format_mplane inst_format =
945 (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt;
946 unsigned int i;
947
948 dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__,
949 *num_buffers, *num_planes, q->type);
950
951 *num_planes = inst_format.num_planes;
952
953 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
954 sizes[0] = inst_format.plane_fmt[0].sizeimage;
955 dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
956 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
957 if (*num_buffers < inst->fbc_buf_count)
958 *num_buffers = inst->fbc_buf_count;
959
960 for (i = 0; i < *num_planes; i++) {
961 sizes[i] = inst_format.plane_fmt[i].sizeimage;
962 dev_dbg(inst->dev->dev, "%s: size[%u]: %u\n", __func__, i, sizes[i]);
963 }
964 }
965
966 return 0;
967 }
968
wave5_prepare_fb(struct vpu_instance * inst)969 static int wave5_prepare_fb(struct vpu_instance *inst)
970 {
971 int linear_num;
972 int non_linear_num;
973 int fb_stride = 0, fb_height = 0;
974 int luma_size, chroma_size;
975 int ret, i;
976 struct v4l2_m2m_buffer *buf, *n;
977 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
978 u32 bitdepth = inst->codec_info->dec_info.initial_info.luma_bitdepth;
979
980 switch (bitdepth) {
981 case 8:
982 break;
983 case 10:
984 if (inst->std == W_HEVC_DEC &&
985 inst->dev->attr.support_hevc10bit_dec)
986 break;
987
988 fallthrough;
989 default:
990 dev_err(inst->dev->dev, "no support for %d bit depth\n", bitdepth);
991
992 return -EINVAL;
993 }
994
995 linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx);
996 non_linear_num = inst->fbc_buf_count;
997
998 for (i = 0; i < non_linear_num; i++) {
999 struct frame_buffer *frame = &inst->frame_buf[i];
1000 struct vpu_buf *vframe = &inst->frame_vbuf[i];
1001
1002 fb_stride = ALIGN(inst->dst_fmt.width * bitdepth / 8, 32);
1003 fb_height = ALIGN(inst->dst_fmt.height, 32);
1004 luma_size = fb_stride * fb_height;
1005
1006 chroma_size = ALIGN(fb_stride / 2, 16) * fb_height;
1007
1008 if (vframe->size == (luma_size + chroma_size))
1009 continue;
1010
1011 if (vframe->size)
1012 wave5_vpu_dec_reset_framebuffer(inst, i);
1013
1014 vframe->size = luma_size + chroma_size;
1015 ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe);
1016 if (ret) {
1017 dev_dbg(inst->dev->dev,
1018 "%s: Allocating FBC buf of size %zu, fail: %d\n",
1019 __func__, vframe->size, ret);
1020 return ret;
1021 }
1022
1023 frame->buf_y = vframe->daddr;
1024 frame->buf_cb = vframe->daddr + luma_size;
1025 frame->buf_cr = (dma_addr_t)-1;
1026 frame->size = vframe->size;
1027 frame->width = inst->src_fmt.width;
1028 frame->stride = fb_stride;
1029 frame->map_type = COMPRESSED_FRAME_MAP;
1030 frame->update_fb_info = true;
1031 }
1032 /* In case the count has reduced, clean up leftover framebuffer memory */
1033 for (i = non_linear_num; i < MAX_REG_FRAME; i++) {
1034 ret = wave5_vpu_dec_reset_framebuffer(inst, i);
1035 if (ret)
1036 break;
1037 }
1038
1039 for (i = 0; i < linear_num; i++) {
1040 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1041 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1042 struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i);
1043 struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i];
1044 dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0;
1045 u32 buf_size = 0;
1046 u32 fb_stride = inst->dst_fmt.width;
1047 u32 luma_size = fb_stride * inst->dst_fmt.height;
1048 u32 chroma_size;
1049
1050 if (inst->output_format == FORMAT_422)
1051 chroma_size = fb_stride * inst->dst_fmt.height / 2;
1052 else
1053 chroma_size = fb_stride * inst->dst_fmt.height / 4;
1054
1055 if (inst->dst_fmt.num_planes == 1) {
1056 buf_size = vb2_plane_size(vb, 0);
1057 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1058 buf_addr_cb = buf_addr_y + luma_size;
1059 buf_addr_cr = buf_addr_cb + chroma_size;
1060 } else if (inst->dst_fmt.num_planes == 2) {
1061 buf_size = vb2_plane_size(vb, 0) +
1062 vb2_plane_size(vb, 1);
1063 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1064 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1065 buf_addr_cr = buf_addr_cb + chroma_size;
1066 } else if (inst->dst_fmt.num_planes == 3) {
1067 buf_size = vb2_plane_size(vb, 0) +
1068 vb2_plane_size(vb, 1) +
1069 vb2_plane_size(vb, 2);
1070 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1071 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1072 buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2);
1073 }
1074
1075 frame->buf_y = buf_addr_y;
1076 frame->buf_cb = buf_addr_cb;
1077 frame->buf_cr = buf_addr_cr;
1078 frame->size = buf_size;
1079 frame->width = inst->src_fmt.width;
1080 frame->stride = fb_stride;
1081 frame->map_type = LINEAR_FRAME_MAP;
1082 frame->update_fb_info = true;
1083 }
1084
1085 ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num,
1086 fb_stride, inst->dst_fmt.height);
1087 if (ret) {
1088 dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d",
1089 __func__, ret);
1090 return ret;
1091 }
1092
1093 /*
1094 * Mark all frame buffers as out of display, to avoid using them before
1095 * the application have them queued.
1096 */
1097 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1098 ret = wave5_vpu_dec_set_disp_flag(inst, i);
1099 if (ret) {
1100 dev_dbg(inst->dev->dev,
1101 "%s: Setting display flag of buf index: %u, fail: %d\n",
1102 __func__, i, ret);
1103 }
1104 }
1105
1106 v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) {
1107 struct vb2_v4l2_buffer *vbuf = &buf->vb;
1108
1109 ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index);
1110 if (ret)
1111 dev_dbg(inst->dev->dev,
1112 "%s: Clearing display flag of buf index: %u, fail: %d\n",
1113 __func__, i, ret);
1114 }
1115
1116 return 0;
1117 }
1118
write_to_ringbuffer(struct vpu_instance * inst,void * buffer,size_t buffer_size,struct vpu_buf * ring_buffer,dma_addr_t wr_ptr)1119 static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
1120 struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
1121 {
1122 size_t size;
1123 size_t offset = wr_ptr - ring_buffer->daddr;
1124 int ret;
1125
1126 if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
1127 size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
1128 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
1129 if (ret < 0)
1130 return ret;
1131
1132 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
1133 buffer_size - size);
1134 if (ret < 0)
1135 return ret;
1136 } else {
1137 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
1138 buffer_size);
1139 if (ret < 0)
1140 return ret;
1141 }
1142
1143 return 0;
1144 }
1145
fill_ringbuffer(struct vpu_instance * inst)1146 static int fill_ringbuffer(struct vpu_instance *inst)
1147 {
1148 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1149 struct v4l2_m2m_buffer *buf, *n;
1150 int ret;
1151
1152 if (m2m_ctx->last_src_buf) {
1153 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
1154
1155 if (vpu_buf->consumed) {
1156 dev_dbg(inst->dev->dev, "last src buffer already written\n");
1157 return 0;
1158 }
1159 }
1160
1161 v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
1162 struct vb2_v4l2_buffer *vbuf = &buf->vb;
1163 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1164 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1165 size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
1166 void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
1167 dma_addr_t rd_ptr = 0;
1168 dma_addr_t wr_ptr = 0;
1169 size_t remain_size = 0;
1170
1171 if (vpu_buf->consumed) {
1172 dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
1173 vbuf->vb2_buf.index);
1174 continue;
1175 }
1176
1177 if (!src_buf) {
1178 dev_dbg(inst->dev->dev,
1179 "%s: Acquiring kernel pointer to src buf (%u), fail\n",
1180 __func__, vbuf->vb2_buf.index);
1181 break;
1182 }
1183
1184 ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
1185 if (ret) {
1186 /* Unable to acquire the mutex */
1187 dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
1188 ret);
1189 return ret;
1190 }
1191
1192 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
1193
1194 if (remain_size < src_size) {
1195 dev_dbg(inst->dev->dev,
1196 "%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
1197 __func__, remain_size, src_size, vbuf->vb2_buf.index);
1198 break;
1199 }
1200
1201 ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
1202 if (ret) {
1203 dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
1204 vbuf->vb2_buf.index, ret);
1205 return ret;
1206 }
1207
1208 ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
1209 if (ret) {
1210 dev_dbg(inst->dev->dev,
1211 "update_bitstream_buffer fail: %d for src buf (%u)\n",
1212 ret, vbuf->vb2_buf.index);
1213 break;
1214 }
1215
1216 vpu_buf->consumed = true;
1217
1218 /* Don't write buffers passed the last one while draining. */
1219 if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
1220 dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
1221 break;
1222 }
1223 }
1224
1225 return 0;
1226 }
1227
wave5_vpu_dec_buf_queue_src(struct vb2_buffer * vb)1228 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
1229 {
1230 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1231 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1232 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1233 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1234
1235 vpu_buf->consumed = false;
1236 vbuf->sequence = inst->queued_src_buf_num++;
1237
1238 v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1239 }
1240
wave5_vpu_dec_buf_queue_dst(struct vb2_buffer * vb)1241 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
1242 {
1243 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1244 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1245 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1246
1247 vbuf->sequence = inst->queued_dst_buf_num++;
1248
1249 if (inst->state == VPU_INST_STATE_PIC_RUN) {
1250 struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf);
1251 int ret;
1252
1253 /*
1254 * The buffer is already registered just clear the display flag
1255 * to let the firmware know it can be used.
1256 */
1257 vpu_buf->display = false;
1258 ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index);
1259 if (ret) {
1260 dev_dbg(inst->dev->dev,
1261 "%s: Clearing the display flag of buffer index: %u, fail: %d\n",
1262 __func__, vb->index, ret);
1263 }
1264 }
1265
1266 if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) {
1267 unsigned int i;
1268
1269 for (i = 0; i < vb->num_planes; i++)
1270 vb2_set_plane_payload(vb, i, 0);
1271
1272 vbuf->field = V4L2_FIELD_NONE;
1273
1274 send_eos_event(inst);
1275 v4l2_m2m_last_buffer_done(m2m_ctx, vbuf);
1276 } else {
1277 v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1278 }
1279 }
1280
wave5_vpu_dec_buf_queue(struct vb2_buffer * vb)1281 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
1282 {
1283 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1284 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1285
1286 dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n",
1287 __func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
1288 vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
1289
1290 if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1291 wave5_vpu_dec_buf_queue_src(vb);
1292 else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
1293 wave5_vpu_dec_buf_queue_dst(vb);
1294 }
1295
wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance * inst)1296 static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst)
1297 {
1298 int ret;
1299 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1300
1301 ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4;
1302 ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer);
1303 if (ret) {
1304 dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n",
1305 __func__, ring_buffer->size, ret);
1306 return ret;
1307 }
1308
1309 inst->last_rd_ptr = ring_buffer->daddr;
1310
1311 return 0;
1312 }
1313
wave5_vpu_dec_start_streaming(struct vb2_queue * q,unsigned int count)1314 static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count)
1315 {
1316 struct vpu_instance *inst = vb2_get_drv_priv(q);
1317 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1318 int ret = 0;
1319
1320 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1321 pm_runtime_resume_and_get(inst->dev->dev);
1322
1323 v4l2_m2m_update_start_streaming_state(m2m_ctx, q);
1324
1325 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) {
1326 struct dec_open_param open_param;
1327
1328 memset(&open_param, 0, sizeof(struct dec_open_param));
1329
1330 ret = wave5_vpu_dec_allocate_ring_buffer(inst);
1331 if (ret)
1332 goto return_buffers;
1333
1334 open_param.bitstream_buffer = inst->bitstream_vbuf.daddr;
1335 open_param.bitstream_buffer_size = inst->bitstream_vbuf.size;
1336
1337 ret = wave5_vpu_dec_open(inst, &open_param);
1338 if (ret) {
1339 dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n",
1340 __func__, ret);
1341 goto free_bitstream_vbuf;
1342 }
1343
1344 ret = switch_state(inst, VPU_INST_STATE_OPEN);
1345 if (ret)
1346 goto free_bitstream_vbuf;
1347 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1348 struct dec_initial_info *initial_info =
1349 &inst->codec_info->dec_info.initial_info;
1350
1351 if (inst->state == VPU_INST_STATE_STOP)
1352 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1353 if (ret)
1354 goto return_buffers;
1355
1356 if (inst->state == VPU_INST_STATE_INIT_SEQ &&
1357 inst->dev->product_code == WAVE521C_CODE) {
1358 if (initial_info->luma_bitdepth != 8) {
1359 dev_info(inst->dev->dev, "%s: no support for %d bit depth",
1360 __func__, initial_info->luma_bitdepth);
1361 ret = -EINVAL;
1362 goto return_buffers;
1363 }
1364 }
1365
1366 }
1367 pm_runtime_mark_last_busy(inst->dev->dev);
1368 pm_runtime_put_autosuspend(inst->dev->dev);
1369 return ret;
1370
1371 free_bitstream_vbuf:
1372 wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf);
1373 return_buffers:
1374 wave5_return_bufs(q, VB2_BUF_STATE_QUEUED);
1375 pm_runtime_put_autosuspend(inst->dev->dev);
1376 return ret;
1377 }
1378
streamoff_output(struct vb2_queue * q)1379 static int streamoff_output(struct vb2_queue *q)
1380 {
1381 struct vpu_instance *inst = vb2_get_drv_priv(q);
1382 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1383 struct vb2_v4l2_buffer *buf;
1384 int ret;
1385 dma_addr_t new_rd_ptr;
1386 struct dec_output_info dec_info;
1387 unsigned int i;
1388
1389 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1390 ret = wave5_vpu_dec_set_disp_flag(inst, i);
1391 if (ret)
1392 dev_dbg(inst->dev->dev,
1393 "%s: Setting display flag of buf index: %u, fail: %d\n",
1394 __func__, i, ret);
1395 }
1396
1397 while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) {
1398 dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n",
1399 __func__, buf->vb2_buf.type, buf->vb2_buf.index);
1400 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1401 }
1402
1403 while (wave5_vpu_dec_get_output_info(inst, &dec_info) == 0) {
1404 if (dec_info.index_frame_display >= 0)
1405 wave5_vpu_dec_set_disp_flag(inst, dec_info.index_frame_display);
1406 }
1407
1408 ret = wave5_vpu_flush_instance(inst);
1409 if (ret)
1410 return ret;
1411
1412 /* Reset the ring buffer information */
1413 new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst);
1414 inst->last_rd_ptr = new_rd_ptr;
1415 inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr;
1416 inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr;
1417
1418 if (v4l2_m2m_has_stopped(m2m_ctx))
1419 send_eos_event(inst);
1420
1421 /* streamoff on output cancels any draining operation */
1422 inst->eos = false;
1423
1424 return 0;
1425 }
1426
streamoff_capture(struct vb2_queue * q)1427 static int streamoff_capture(struct vb2_queue *q)
1428 {
1429 struct vpu_instance *inst = vb2_get_drv_priv(q);
1430 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1431 struct vb2_v4l2_buffer *buf;
1432 unsigned int i;
1433 int ret = 0;
1434
1435 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1436 ret = wave5_vpu_dec_set_disp_flag(inst, i);
1437 if (ret)
1438 dev_dbg(inst->dev->dev,
1439 "%s: Setting display flag of buf index: %u, fail: %d\n",
1440 __func__, i, ret);
1441 }
1442
1443 while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) {
1444 u32 plane;
1445
1446 dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n",
1447 __func__, buf->vb2_buf.type, buf->vb2_buf.index);
1448
1449 for (plane = 0; plane < inst->dst_fmt.num_planes; plane++)
1450 vb2_set_plane_payload(&buf->vb2_buf, plane, 0);
1451
1452 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1453 }
1454
1455 if (inst->needs_reallocation) {
1456 wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL);
1457 inst->needs_reallocation = false;
1458 }
1459
1460 if (v4l2_m2m_has_stopped(m2m_ctx)) {
1461 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1462 if (ret)
1463 return ret;
1464 }
1465
1466 return 0;
1467 }
1468
wave5_vpu_dec_stop_streaming(struct vb2_queue * q)1469 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
1470 {
1471 struct vpu_instance *inst = vb2_get_drv_priv(q);
1472 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1473 bool check_cmd = TRUE;
1474
1475 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1476 pm_runtime_resume_and_get(inst->dev->dev);
1477
1478 while (check_cmd) {
1479 struct queue_status_info q_status;
1480 struct dec_output_info dec_output_info;
1481
1482 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1483
1484 if (q_status.report_queue_count == 0)
1485 break;
1486
1487 if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1488 break;
1489
1490 if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
1491 dev_dbg(inst->dev->dev, "there is no output info\n");
1492 }
1493
1494 v4l2_m2m_update_stop_streaming_state(m2m_ctx, q);
1495
1496 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1497 streamoff_output(q);
1498 else
1499 streamoff_capture(q);
1500
1501 pm_runtime_mark_last_busy(inst->dev->dev);
1502 pm_runtime_put_autosuspend(inst->dev->dev);
1503 }
1504
1505 static const struct vb2_ops wave5_vpu_dec_vb2_ops = {
1506 .queue_setup = wave5_vpu_dec_queue_setup,
1507 .buf_queue = wave5_vpu_dec_buf_queue,
1508 .start_streaming = wave5_vpu_dec_start_streaming,
1509 .stop_streaming = wave5_vpu_dec_stop_streaming,
1510 };
1511
wave5_set_default_format(struct v4l2_pix_format_mplane * src_fmt,struct v4l2_pix_format_mplane * dst_fmt)1512 static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt,
1513 struct v4l2_pix_format_mplane *dst_fmt)
1514 {
1515 src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt;
1516 wave5_update_pix_fmt(src_fmt, VPU_FMT_TYPE_CODEC,
1517 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT,
1518 &dec_hevc_frmsize);
1519
1520 dst_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt;
1521 wave5_update_pix_fmt(dst_fmt, VPU_FMT_TYPE_RAW,
1522 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT,
1523 &dec_raw_frmsize);
1524 }
1525
wave5_vpu_dec_queue_init(void * priv,struct vb2_queue * src_vq,struct vb2_queue * dst_vq)1526 static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
1527 {
1528 return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops);
1529 }
1530
1531 static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = {
1532 .finish_process = wave5_vpu_dec_finish_decode,
1533 };
1534
initialize_sequence(struct vpu_instance * inst)1535 static int initialize_sequence(struct vpu_instance *inst)
1536 {
1537 struct dec_initial_info initial_info;
1538 int ret = 0;
1539
1540 memset(&initial_info, 0, sizeof(struct dec_initial_info));
1541
1542 ret = wave5_vpu_dec_issue_seq_init(inst);
1543 if (ret) {
1544 dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n",
1545 __func__, ret);
1546 return ret;
1547 }
1548
1549 if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1550 dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__);
1551
1552 ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info);
1553 if (ret) {
1554 dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n",
1555 __func__, ret, initial_info.seq_init_err_reason);
1556 wave5_handle_src_buffer(inst, initial_info.rd_ptr);
1557 return ret;
1558 }
1559
1560 handle_dynamic_resolution_change(inst);
1561
1562 return 0;
1563 }
1564
wave5_is_draining_or_eos(struct vpu_instance * inst)1565 static bool wave5_is_draining_or_eos(struct vpu_instance *inst)
1566 {
1567 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1568
1569 lockdep_assert_held(&inst->state_spinlock);
1570 return m2m_ctx->is_draining || inst->eos;
1571 }
1572
wave5_vpu_dec_device_run(void * priv)1573 static void wave5_vpu_dec_device_run(void *priv)
1574 {
1575 struct vpu_instance *inst = priv;
1576 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1577 struct queue_status_info q_status;
1578 u32 fail_res = 0;
1579 int ret = 0;
1580
1581 dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__);
1582 pm_runtime_resume_and_get(inst->dev->dev);
1583 ret = fill_ringbuffer(inst);
1584 if (ret) {
1585 dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
1586 goto finish_job_and_return;
1587 }
1588
1589 switch (inst->state) {
1590 case VPU_INST_STATE_OPEN:
1591 ret = initialize_sequence(inst);
1592 if (ret) {
1593 unsigned long flags;
1594
1595 spin_lock_irqsave(&inst->state_spinlock, flags);
1596 if (wave5_is_draining_or_eos(inst) &&
1597 wave5_last_src_buffer_consumed(m2m_ctx)) {
1598 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1599
1600 switch_state(inst, VPU_INST_STATE_STOP);
1601
1602 if (vb2_is_streaming(dst_vq))
1603 send_eos_event(inst);
1604 else
1605 handle_dynamic_resolution_change(inst);
1606
1607 flag_last_buffer_done(inst);
1608 }
1609 spin_unlock_irqrestore(&inst->state_spinlock, flags);
1610 } else {
1611 switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1612 }
1613
1614 break;
1615
1616 case VPU_INST_STATE_INIT_SEQ:
1617 /*
1618 * Do this early, preparing the fb can trigger an IRQ before
1619 * we had a chance to switch, which leads to an invalid state
1620 * change.
1621 */
1622 switch_state(inst, VPU_INST_STATE_PIC_RUN);
1623
1624 /*
1625 * During DRC, the picture decoding remains pending, so just leave the job
1626 * active until this decode operation completes.
1627 */
1628 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1629
1630 /*
1631 * The sequence must be analyzed first to calculate the proper
1632 * size of the auxiliary buffers.
1633 */
1634 ret = wave5_prepare_fb(inst);
1635 if (ret) {
1636 dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret);
1637 switch_state(inst, VPU_INST_STATE_STOP);
1638 break;
1639 }
1640
1641 if (q_status.instance_queue_count) {
1642 dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1643 return;
1644 }
1645
1646 fallthrough;
1647 case VPU_INST_STATE_PIC_RUN:
1648 ret = start_decode(inst, &fail_res);
1649 if (ret) {
1650 dev_err(inst->dev->dev,
1651 "Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
1652 m2m_ctx, ret, fail_res);
1653 break;
1654 }
1655 /* Return so that we leave this job active */
1656 dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1657 return;
1658 default:
1659 WARN(1, "Execution of a job in state %s illegal.\n", state_to_str(inst->state));
1660 break;
1661 }
1662
1663 finish_job_and_return:
1664 dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__);
1665 pm_runtime_mark_last_busy(inst->dev->dev);
1666 pm_runtime_put_autosuspend(inst->dev->dev);
1667 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
1668 }
1669
wave5_vpu_dec_job_abort(void * priv)1670 static void wave5_vpu_dec_job_abort(void *priv)
1671 {
1672 struct vpu_instance *inst = priv;
1673 int ret;
1674
1675 ret = switch_state(inst, VPU_INST_STATE_STOP);
1676 if (ret)
1677 return;
1678
1679 ret = wave5_vpu_dec_set_eos_on_firmware(inst);
1680 if (ret)
1681 dev_warn(inst->dev->dev,
1682 "Setting EOS for the bitstream, fail: %d\n", ret);
1683 }
1684
wave5_vpu_dec_job_ready(void * priv)1685 static int wave5_vpu_dec_job_ready(void *priv)
1686 {
1687 struct vpu_instance *inst = priv;
1688 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1689 unsigned long flags;
1690 int ret = 0;
1691
1692 spin_lock_irqsave(&inst->state_spinlock, flags);
1693
1694 switch (inst->state) {
1695 case VPU_INST_STATE_NONE:
1696 dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n");
1697 break;
1698 case VPU_INST_STATE_OPEN:
1699 if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) ||
1700 v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) {
1701 ret = 1;
1702 break;
1703 }
1704
1705 dev_dbg(inst->dev->dev,
1706 "Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n");
1707 break;
1708 case VPU_INST_STATE_INIT_SEQ:
1709 case VPU_INST_STATE_PIC_RUN:
1710 if (!m2m_ctx->cap_q_ctx.q.streaming) {
1711 dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n");
1712 break;
1713 } else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) {
1714 dev_dbg(inst->dev->dev,
1715 "No capture buffer ready to decode!\n");
1716 break;
1717 } else if (!wave5_is_draining_or_eos(inst) &&
1718 !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) {
1719 dev_dbg(inst->dev->dev,
1720 "No bitstream data to decode!\n");
1721 break;
1722 }
1723 ret = 1;
1724 break;
1725 case VPU_INST_STATE_STOP:
1726 dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n");
1727 break;
1728 }
1729
1730 spin_unlock_irqrestore(&inst->state_spinlock, flags);
1731
1732 return ret;
1733 }
1734
1735 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = {
1736 .device_run = wave5_vpu_dec_device_run,
1737 .job_abort = wave5_vpu_dec_job_abort,
1738 .job_ready = wave5_vpu_dec_job_ready,
1739 };
1740
wave5_vpu_open_dec(struct file * filp)1741 static int wave5_vpu_open_dec(struct file *filp)
1742 {
1743 struct video_device *vdev = video_devdata(filp);
1744 struct vpu_device *dev = video_drvdata(filp);
1745 struct vpu_instance *inst = NULL;
1746 struct v4l2_m2m_ctx *m2m_ctx;
1747 int ret = 0;
1748
1749 inst = kzalloc(sizeof(*inst), GFP_KERNEL);
1750 if (!inst)
1751 return -ENOMEM;
1752
1753 inst->dev = dev;
1754 inst->type = VPU_INST_TYPE_DEC;
1755 inst->ops = &wave5_vpu_dec_inst_ops;
1756
1757 spin_lock_init(&inst->state_spinlock);
1758
1759 inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
1760 if (!inst->codec_info)
1761 return -ENOMEM;
1762
1763 v4l2_fh_init(&inst->v4l2_fh, vdev);
1764 filp->private_data = &inst->v4l2_fh;
1765 v4l2_fh_add(&inst->v4l2_fh);
1766
1767 INIT_LIST_HEAD(&inst->list);
1768
1769 inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev;
1770 inst->v4l2_fh.m2m_ctx =
1771 v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init);
1772 if (IS_ERR(inst->v4l2_fh.m2m_ctx)) {
1773 ret = PTR_ERR(inst->v4l2_fh.m2m_ctx);
1774 goto cleanup_inst;
1775 }
1776 m2m_ctx = inst->v4l2_fh.m2m_ctx;
1777
1778 v4l2_m2m_set_src_buffered(m2m_ctx, true);
1779 v4l2_m2m_set_dst_buffered(m2m_ctx, true);
1780 /*
1781 * We use the M2M job queue to ensure synchronization of steps where
1782 * needed, as IOCTLs can occur at anytime and we need to run commands on
1783 * the firmware in a specified order.
1784 * In order to initialize the sequence on the firmware within an M2M
1785 * job, the M2M framework needs to be able to queue jobs before
1786 * the CAPTURE queue has been started, because we need the results of the
1787 * initialization to properly prepare the CAPTURE queue with the correct
1788 * amount of buffers.
1789 * By setting ignore_cap_streaming to true the m2m framework will call
1790 * job_ready as soon as the OUTPUT queue is streaming, instead of
1791 * waiting until both the CAPTURE and OUTPUT queues are streaming.
1792 */
1793 m2m_ctx->ignore_cap_streaming = true;
1794
1795 v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10);
1796 v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL,
1797 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1);
1798
1799 if (inst->v4l2_ctrl_hdl.error) {
1800 ret = -ENODEV;
1801 goto cleanup_inst;
1802 }
1803
1804 inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl;
1805 v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl);
1806
1807 wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt);
1808 inst->colorspace = V4L2_COLORSPACE_REC709;
1809 inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
1810 inst->quantization = V4L2_QUANTIZATION_DEFAULT;
1811 inst->xfer_func = V4L2_XFER_FUNC_DEFAULT;
1812
1813 init_completion(&inst->irq_done);
1814
1815 inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL);
1816 if (inst->id < 0) {
1817 dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id);
1818 ret = inst->id;
1819 goto cleanup_inst;
1820 }
1821
1822 /*
1823 * For Wave515 SRAM memory was already allocated
1824 * at wave5_vpu_dec_register_device()
1825 */
1826 if (inst->dev->product_code != WAVE515_CODE)
1827 wave5_vdi_allocate_sram(inst->dev);
1828
1829 ret = mutex_lock_interruptible(&dev->dev_lock);
1830 if (ret)
1831 goto cleanup_inst;
1832
1833 if (list_empty(&dev->instances))
1834 pm_runtime_use_autosuspend(inst->dev->dev);
1835
1836 list_add_tail(&inst->list, &dev->instances);
1837
1838 mutex_unlock(&dev->dev_lock);
1839
1840 return 0;
1841
1842 cleanup_inst:
1843 wave5_cleanup_instance(inst);
1844 return ret;
1845 }
1846
wave5_vpu_dec_release(struct file * filp)1847 static int wave5_vpu_dec_release(struct file *filp)
1848 {
1849 return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder");
1850 }
1851
1852 static const struct v4l2_file_operations wave5_vpu_dec_fops = {
1853 .owner = THIS_MODULE,
1854 .open = wave5_vpu_open_dec,
1855 .release = wave5_vpu_dec_release,
1856 .unlocked_ioctl = video_ioctl2,
1857 .poll = v4l2_m2m_fop_poll,
1858 .mmap = v4l2_m2m_fop_mmap,
1859 };
1860
wave5_vpu_dec_register_device(struct vpu_device * dev)1861 int wave5_vpu_dec_register_device(struct vpu_device *dev)
1862 {
1863 struct video_device *vdev_dec;
1864 int ret;
1865
1866 /*
1867 * Secondary AXI setup for Wave515 is done by INIT_VPU command,
1868 * i.e. wave5_vpu_init(), that's why we allocate SRAM memory early.
1869 */
1870 if (dev->product_code == WAVE515_CODE)
1871 wave5_vdi_allocate_sram(dev);
1872
1873 vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL);
1874 if (!vdev_dec)
1875 return -ENOMEM;
1876
1877 dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops);
1878 if (IS_ERR(dev->v4l2_m2m_dec_dev)) {
1879 ret = PTR_ERR(dev->v4l2_m2m_dec_dev);
1880 dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret);
1881 return -EINVAL;
1882 }
1883
1884 dev->video_dev_dec = vdev_dec;
1885
1886 strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name));
1887 vdev_dec->fops = &wave5_vpu_dec_fops;
1888 vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops;
1889 vdev_dec->release = video_device_release_empty;
1890 vdev_dec->v4l2_dev = &dev->v4l2_dev;
1891 vdev_dec->vfl_dir = VFL_DIR_M2M;
1892 vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
1893 vdev_dec->lock = &dev->dev_lock;
1894
1895 ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1);
1896 if (ret)
1897 return ret;
1898
1899 video_set_drvdata(vdev_dec, dev);
1900
1901 return 0;
1902 }
1903
wave5_vpu_dec_unregister_device(struct vpu_device * dev)1904 void wave5_vpu_dec_unregister_device(struct vpu_device *dev)
1905 {
1906 /*
1907 * Here is a freeing pair for Wave515 SRAM memory allocation
1908 * happened at wave5_vpu_dec_register_device().
1909 */
1910 if (dev->product_code == WAVE515_CODE)
1911 wave5_vdi_free_sram(dev);
1912
1913 video_unregister_device(dev->video_dev_dec);
1914 if (dev->v4l2_m2m_dec_dev)
1915 v4l2_m2m_release(dev->v4l2_m2m_dec_dev);
1916 }
1917