1 /*
2 * Copyright © 2024 Collabora Ltd.
3 *
4 * Derived from tu_cmd_buffer.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * SPDX-License-Identifier: MIT
10 */
11
12 #include "genxml/gen_macros.h"
13
14 #include "panvk_buffer.h"
15 #include "panvk_cmd_alloc.h"
16 #include "panvk_cmd_buffer.h"
17 #include "panvk_cmd_desc_state.h"
18 #include "panvk_cmd_meta.h"
19 #include "panvk_device.h"
20 #include "panvk_entrypoints.h"
21 #include "panvk_image.h"
22 #include "panvk_image_view.h"
23 #include "panvk_instance.h"
24 #include "panvk_priv_bo.h"
25 #include "panvk_shader.h"
26
27 #include "pan_desc.h"
28 #include "pan_earlyzs.h"
29 #include "pan_encoder.h"
30 #include "pan_format.h"
31 #include "pan_jc.h"
32 #include "pan_props.h"
33 #include "pan_samples.h"
34 #include "pan_shader.h"
35
36 #include "vk_format.h"
37 #include "vk_meta.h"
38 #include "vk_pipeline_layout.h"
39
40 struct panvk_draw_info {
41 struct {
42 uint32_t size;
43 uint32_t offset;
44 int32_t vertex_offset;
45 } index;
46
47 struct {
48 uint32_t base;
49 uint32_t count;
50 } vertex;
51
52 struct {
53 uint32_t base;
54 uint32_t count;
55 } instance;
56 };
57
58 #define is_dirty(__cmdbuf, __name) \
59 BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty, \
60 MESA_VK_DYNAMIC_##__name)
61
62 static void
emit_vs_attrib(const struct panvk_draw_info * draw,const struct vk_vertex_attribute_state * attrib_info,const struct vk_vertex_binding_state * buf_info,const struct panvk_attrib_buf * buf,uint32_t vb_desc_offset,struct mali_attribute_packed * desc)63 emit_vs_attrib(const struct panvk_draw_info *draw,
64 const struct vk_vertex_attribute_state *attrib_info,
65 const struct vk_vertex_binding_state *buf_info,
66 const struct panvk_attrib_buf *buf, uint32_t vb_desc_offset,
67 struct mali_attribute_packed *desc)
68 {
69 bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
70 enum pipe_format f = vk_format_to_pipe_format(attrib_info->format);
71 unsigned buf_idx = vb_desc_offset + attrib_info->binding;
72 unsigned divisor = draw->vertex.count * buf_info->divisor;
73
74 pan_pack(desc, ATTRIBUTE, cfg) {
75 cfg.offset = attrib_info->offset;
76 cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw;
77 cfg.table = 0;
78 cfg.buffer_index = buf_idx;
79 cfg.stride = buf_info->stride;
80 if (!per_instance) {
81 /* Per-vertex */
82 cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
83 cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
84 cfg.offset_enable = true;
85 } else if (util_is_power_of_two_or_zero(divisor)) {
86 /* Per-instance, POT divisor */
87 cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
88 cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE;
89 cfg.divisor_r = __builtin_ctz(divisor);
90 } else {
91 /* Per-instance, NPOT divisor */
92 cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
93 cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE;
94 cfg.divisor_d = panfrost_compute_magic_divisor(divisor, &cfg.divisor_r,
95 &cfg.divisor_e);
96 }
97 }
98 }
99
100 static VkResult
prepare_vs_driver_set(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)101 prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf,
102 struct panvk_draw_info *draw)
103 {
104 struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
105 bool dirty = is_dirty(cmdbuf, VI) || is_dirty(cmdbuf, VI_BINDINGS_VALID) ||
106 is_dirty(cmdbuf, VI_BINDING_STRIDES) ||
107 cmdbuf->state.gfx.vb.dirty ||
108 !vs_desc_state->driver_set.dev_addr;
109
110 if (!dirty)
111 return VK_SUCCESS;
112
113 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
114 const struct vk_vertex_input_state *vi =
115 cmdbuf->vk.dynamic_graphics_state.vi;
116 unsigned num_vs_attribs = util_last_bit(vi->attributes_valid);
117 uint32_t vb_count = 0;
118
119 for (unsigned i = 0; i < num_vs_attribs; i++) {
120 if (vi->attributes_valid & BITFIELD_BIT(i))
121 vb_count = MAX2(vi->attributes[i].binding + 1, vb_count);
122 }
123
124 uint32_t vb_offset = vs->desc_info.dyn_bufs.count + MAX_VS_ATTRIBS + 1;
125 uint32_t desc_count = vb_offset + vb_count;
126 const struct panvk_descriptor_state *desc_state =
127 &cmdbuf->state.gfx.desc_state;
128 struct panfrost_ptr driver_set = panvk_cmd_alloc_dev_mem(
129 cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE);
130 struct panvk_opaque_desc *descs = driver_set.cpu;
131
132 if (!driver_set.gpu)
133 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
134
135 for (uint32_t i = 0; i < MAX_VS_ATTRIBS; i++) {
136 if (vi->attributes_valid & BITFIELD_BIT(i)) {
137 unsigned binding = vi->attributes[i].binding;
138
139 emit_vs_attrib(draw, &vi->attributes[i], &vi->bindings[binding],
140 &cmdbuf->state.gfx.vb.bufs[binding], vb_offset,
141 (struct mali_attribute_packed *)(&descs[i]));
142 } else {
143 memset(&descs[i], 0, sizeof(descs[0]));
144 }
145 }
146
147 /* Dummy sampler always comes right after the vertex attribs. */
148 pan_pack(&descs[MAX_VS_ATTRIBS], SAMPLER, _) {
149 }
150
151 panvk_per_arch(cmd_fill_dyn_bufs)(
152 desc_state, vs,
153 (struct mali_buffer_packed *)(&descs[MAX_VS_ATTRIBS + 1]));
154
155 for (uint32_t i = 0; i < vb_count; i++) {
156 const struct panvk_attrib_buf *vb = &cmdbuf->state.gfx.vb.bufs[i];
157
158 pan_pack(&descs[vb_offset + i], BUFFER, cfg) {
159 if (vi->bindings_valid & BITFIELD_BIT(i)) {
160 cfg.address = vb->address;
161 cfg.size = vb->size;
162 } else {
163 cfg.address = 0;
164 cfg.size = 0;
165 }
166 }
167 }
168
169 vs_desc_state->driver_set.dev_addr = driver_set.gpu;
170 vs_desc_state->driver_set.size = desc_count * PANVK_DESCRIPTOR_SIZE;
171 return VK_SUCCESS;
172 }
173
174 static VkResult
prepare_fs_driver_set(struct panvk_cmd_buffer * cmdbuf)175 prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf)
176 {
177 struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
178
179 if (fs_desc_state->driver_set.dev_addr)
180 return VK_SUCCESS;
181
182 const struct panvk_descriptor_state *desc_state =
183 &cmdbuf->state.gfx.desc_state;
184 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
185 uint32_t desc_count = fs->desc_info.dyn_bufs.count + 1;
186 struct panfrost_ptr driver_set = panvk_cmd_alloc_dev_mem(
187 cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE);
188 struct panvk_opaque_desc *descs = driver_set.cpu;
189
190 if (desc_count && !driver_set.gpu)
191 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
192
193 /* Dummy sampler always comes first. */
194 pan_pack(&descs[0], SAMPLER, _) {
195 }
196
197 panvk_per_arch(cmd_fill_dyn_bufs)(desc_state, fs,
198 (struct mali_buffer_packed *)(&descs[1]));
199
200 fs_desc_state->driver_set.dev_addr = driver_set.gpu;
201 fs_desc_state->driver_set.size = desc_count * PANVK_DESCRIPTOR_SIZE;
202 return VK_SUCCESS;
203 }
204
205 static void
prepare_sysvals(struct panvk_cmd_buffer * cmdbuf)206 prepare_sysvals(struct panvk_cmd_buffer *cmdbuf)
207 {
208 struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals;
209 struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
210
211 if (is_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
212 for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++)
213 sysvals->blend.constants[i] =
214 CLAMP(cb->blend_constants[i], 0.0f, 1.0f);
215 cmdbuf->state.gfx.push_uniforms = 0;
216 }
217
218 if (is_dirty(cmdbuf, VP_VIEWPORTS)) {
219 VkViewport *viewport = &cmdbuf->vk.dynamic_graphics_state.vp.viewports[0];
220
221 /* Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of
222 * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
223 * end of the section, the spec defines:
224 *
225 * px = width
226 * py = height
227 * pz = maxDepth - minDepth
228 */
229 sysvals->viewport.scale.x = 0.5f * viewport->width;
230 sysvals->viewport.scale.y = 0.5f * viewport->height;
231 sysvals->viewport.scale.z = (viewport->maxDepth - viewport->minDepth);
232
233 /* Upload the viewport offset. Defined as (ox, oy, oz) at the start of
234 * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
235 * end of the section, the spec defines:
236 *
237 * ox = x + width/2
238 * oy = y + height/2
239 * oz = minDepth
240 */
241 sysvals->viewport.offset.x = (0.5f * viewport->width) + viewport->x;
242 sysvals->viewport.offset.y = (0.5f * viewport->height) + viewport->y;
243 sysvals->viewport.offset.z = viewport->minDepth;
244 cmdbuf->state.gfx.push_uniforms = 0;
245 }
246 }
247
248 static bool
has_depth_att(struct panvk_cmd_buffer * cmdbuf)249 has_depth_att(struct panvk_cmd_buffer *cmdbuf)
250 {
251 return (cmdbuf->state.gfx.render.bound_attachments &
252 MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0;
253 }
254
255 static bool
has_stencil_att(struct panvk_cmd_buffer * cmdbuf)256 has_stencil_att(struct panvk_cmd_buffer *cmdbuf)
257 {
258 return (cmdbuf->state.gfx.render.bound_attachments &
259 MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0;
260 }
261
262 static bool
writes_depth(struct panvk_cmd_buffer * cmdbuf)263 writes_depth(struct panvk_cmd_buffer *cmdbuf)
264 {
265 const struct vk_depth_stencil_state *ds =
266 &cmdbuf->vk.dynamic_graphics_state.ds;
267
268 return has_depth_att(cmdbuf) && ds->depth.test_enable &&
269 ds->depth.write_enable && ds->depth.compare_op != VK_COMPARE_OP_NEVER;
270 }
271
272 static bool
writes_stencil(struct panvk_cmd_buffer * cmdbuf)273 writes_stencil(struct panvk_cmd_buffer *cmdbuf)
274 {
275 const struct vk_depth_stencil_state *ds =
276 &cmdbuf->vk.dynamic_graphics_state.ds;
277
278 return has_stencil_att(cmdbuf) && ds->stencil.test_enable &&
279 ((ds->stencil.front.write_mask &&
280 (ds->stencil.front.op.fail != VK_STENCIL_OP_KEEP ||
281 ds->stencil.front.op.pass != VK_STENCIL_OP_KEEP ||
282 ds->stencil.front.op.depth_fail != VK_STENCIL_OP_KEEP)) ||
283 (ds->stencil.back.write_mask &&
284 (ds->stencil.back.op.fail != VK_STENCIL_OP_KEEP ||
285 ds->stencil.back.op.pass != VK_STENCIL_OP_KEEP ||
286 ds->stencil.back.op.depth_fail != VK_STENCIL_OP_KEEP)));
287 }
288
289 static bool
ds_test_always_passes(struct panvk_cmd_buffer * cmdbuf)290 ds_test_always_passes(struct panvk_cmd_buffer *cmdbuf)
291 {
292 const struct vk_depth_stencil_state *ds =
293 &cmdbuf->vk.dynamic_graphics_state.ds;
294
295 if (!has_depth_att(cmdbuf))
296 return true;
297
298 if (ds->depth.test_enable && ds->depth.compare_op != VK_COMPARE_OP_ALWAYS)
299 return false;
300
301 if (ds->stencil.test_enable &&
302 (ds->stencil.front.op.compare != VK_COMPARE_OP_ALWAYS ||
303 ds->stencil.back.op.compare != VK_COMPARE_OP_ALWAYS))
304 return false;
305
306 return true;
307 }
308
309 static inline enum mali_func
translate_compare_func(VkCompareOp comp)310 translate_compare_func(VkCompareOp comp)
311 {
312 STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER);
313 STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS);
314 STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL);
315 STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL);
316 STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER);
317 STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL);
318 STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL ==
319 (VkCompareOp)MALI_FUNC_GEQUAL);
320 STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS);
321
322 return (enum mali_func)comp;
323 }
324
325 static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)326 translate_stencil_op(VkStencilOp in)
327 {
328 switch (in) {
329 case VK_STENCIL_OP_KEEP:
330 return MALI_STENCIL_OP_KEEP;
331 case VK_STENCIL_OP_ZERO:
332 return MALI_STENCIL_OP_ZERO;
333 case VK_STENCIL_OP_REPLACE:
334 return MALI_STENCIL_OP_REPLACE;
335 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
336 return MALI_STENCIL_OP_INCR_SAT;
337 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
338 return MALI_STENCIL_OP_DECR_SAT;
339 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
340 return MALI_STENCIL_OP_INCR_WRAP;
341 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
342 return MALI_STENCIL_OP_DECR_WRAP;
343 case VK_STENCIL_OP_INVERT:
344 return MALI_STENCIL_OP_INVERT;
345 default:
346 unreachable("Invalid stencil op");
347 }
348 }
349
350 static bool
fs_required(struct panvk_cmd_buffer * cmdbuf)351 fs_required(struct panvk_cmd_buffer *cmdbuf)
352 {
353 const struct pan_shader_info *fs_info =
354 cmdbuf->state.gfx.fs.shader ? &cmdbuf->state.gfx.fs.shader->info : NULL;
355 const struct vk_dynamic_graphics_state *dyns =
356 &cmdbuf->vk.dynamic_graphics_state;
357 const struct vk_color_blend_state *cb = &dyns->cb;
358
359 if (!fs_info)
360 return false;
361
362 /* If we generally have side effects */
363 if (fs_info->fs.sidefx)
364 return true;
365
366 /* If colour is written we need to execute */
367 for (unsigned i = 0; i < cb->attachment_count; ++i) {
368 if ((cb->color_write_enables & BITFIELD_BIT(i)) &&
369 cb->attachments[i].write_mask)
370 return true;
371 }
372
373 /* If alpha-to-coverage is enabled, we need to run the fragment shader even
374 * if we don't have a color attachment, so depth/stencil updates can be
375 * discarded if alpha, and thus coverage, is 0. */
376 if (dyns->ms.alpha_to_coverage_enable)
377 return true;
378
379 /* If depth is written and not implied we need to execute.
380 * TODO: Predicate on Z/S writes being enabled */
381 return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil);
382 }
383
384 static enum mali_draw_mode
translate_prim_topology(VkPrimitiveTopology in)385 translate_prim_topology(VkPrimitiveTopology in)
386 {
387 /* Test VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA separately, as it's not
388 * part of the VkPrimitiveTopology enum.
389 */
390 if (in == VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA)
391 return MALI_DRAW_MODE_TRIANGLES;
392
393 switch (in) {
394 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
395 return MALI_DRAW_MODE_POINTS;
396 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
397 return MALI_DRAW_MODE_LINES;
398 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
399 return MALI_DRAW_MODE_LINE_STRIP;
400 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
401 return MALI_DRAW_MODE_TRIANGLES;
402 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
403 return MALI_DRAW_MODE_TRIANGLE_STRIP;
404 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
405 return MALI_DRAW_MODE_TRIANGLE_FAN;
406 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
407 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
408 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
409 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
410 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
411 default:
412 unreachable("Invalid primitive type");
413 }
414 }
415
416 static void
force_fb_preload(struct panvk_cmd_buffer * cmdbuf)417 force_fb_preload(struct panvk_cmd_buffer *cmdbuf)
418 {
419 for (unsigned i = 0; i < cmdbuf->state.gfx.render.fb.info.rt_count; i++) {
420 if (cmdbuf->state.gfx.render.fb.info.rts[i].view) {
421 cmdbuf->state.gfx.render.fb.info.rts[i].clear = false;
422 cmdbuf->state.gfx.render.fb.info.rts[i].preload = true;
423 }
424 }
425
426 if (cmdbuf->state.gfx.render.fb.info.zs.view.zs) {
427 cmdbuf->state.gfx.render.fb.info.zs.clear.z = false;
428 cmdbuf->state.gfx.render.fb.info.zs.preload.z = true;
429 }
430
431 if (cmdbuf->state.gfx.render.fb.info.zs.view.s ||
432 (cmdbuf->state.gfx.render.fb.info.zs.view.zs &&
433 util_format_is_depth_and_stencil(
434 cmdbuf->state.gfx.render.fb.info.zs.view.zs->format))) {
435 cmdbuf->state.gfx.render.fb.info.zs.clear.s = false;
436 cmdbuf->state.gfx.render.fb.info.zs.preload.s = true;
437 }
438 }
439
440 static VkResult
update_tls(struct panvk_cmd_buffer * cmdbuf)441 update_tls(struct panvk_cmd_buffer *cmdbuf)
442 {
443 struct panvk_tls_state *state = &cmdbuf->state.tls;
444 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
445 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
446 struct cs_builder *b =
447 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
448
449 if (!cmdbuf->state.gfx.tsd) {
450 if (!state->desc.gpu) {
451 state->desc = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
452 if (!state->desc.gpu)
453 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
454 }
455
456 cmdbuf->state.gfx.tsd = state->desc.gpu;
457
458 cs_update_vt_ctx(b)
459 cs_move64_to(b, cs_sr_reg64(b, 24), state->desc.gpu);
460 }
461
462 state->info.tls.size =
463 MAX3(vs->info.tls_size, fs ? fs->info.tls_size : 0, state->info.tls.size);
464 return VK_SUCCESS;
465 }
466
467 static enum mali_index_type
index_size_to_index_type(uint32_t size)468 index_size_to_index_type(uint32_t size)
469 {
470 switch (size) {
471 case 0:
472 return MALI_INDEX_TYPE_NONE;
473 case 1:
474 return MALI_INDEX_TYPE_UINT8;
475 case 2:
476 return MALI_INDEX_TYPE_UINT16;
477 case 4:
478 return MALI_INDEX_TYPE_UINT32;
479 default:
480 assert(!"Invalid index size");
481 return MALI_INDEX_TYPE_NONE;
482 }
483 }
484
485 static VkResult
prepare_blend(struct panvk_cmd_buffer * cmdbuf)486 prepare_blend(struct panvk_cmd_buffer *cmdbuf)
487 {
488 bool dirty =
489 is_dirty(cmdbuf, CB_LOGIC_OP_ENABLE) || is_dirty(cmdbuf, CB_LOGIC_OP) ||
490 is_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||
491 is_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||
492 is_dirty(cmdbuf, CB_BLEND_ENABLES) ||
493 is_dirty(cmdbuf, CB_BLEND_EQUATIONS) ||
494 is_dirty(cmdbuf, CB_WRITE_MASKS) || is_dirty(cmdbuf, CB_BLEND_CONSTANTS);
495
496 if (!dirty)
497 return VK_SUCCESS;
498
499 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
500 const struct vk_dynamic_graphics_state *dyns =
501 &cmdbuf->vk.dynamic_graphics_state;
502 const struct vk_color_blend_state *cb = &dyns->cb;
503 unsigned bd_count = MAX2(cb->attachment_count, 1);
504 struct cs_builder *b =
505 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
506 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
507 const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
508 mali_ptr fs_code = panvk_shader_get_dev_addr(fs);
509 struct panfrost_ptr ptr =
510 panvk_cmd_alloc_desc_array(cmdbuf, bd_count, BLEND);
511 struct mali_blend_packed *bds = ptr.cpu;
512
513 if (bd_count && !ptr.gpu)
514 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
515
516 panvk_per_arch(blend_emit_descs)(
517 dev, cb, cmdbuf->state.gfx.render.color_attachments.fmts,
518 cmdbuf->state.gfx.render.color_attachments.samples, fs_info, fs_code, bds,
519 &cmdbuf->state.gfx.cb.info);
520
521 cs_move64_to(b, cs_sr_reg64(b, 50), ptr.gpu | bd_count);
522 return VK_SUCCESS;
523 }
524
525 static void
prepare_vp(struct panvk_cmd_buffer * cmdbuf)526 prepare_vp(struct panvk_cmd_buffer *cmdbuf)
527 {
528 struct cs_builder *b =
529 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
530 const VkViewport *viewport =
531 &cmdbuf->vk.dynamic_graphics_state.vp.viewports[0];
532 const VkRect2D *scissor = &cmdbuf->vk.dynamic_graphics_state.vp.scissors[0];
533
534 if (is_dirty(cmdbuf, VP_VIEWPORTS) || is_dirty(cmdbuf, VP_SCISSORS)) {
535 uint64_t scissor_box;
536 pan_pack(&scissor_box, SCISSOR, cfg) {
537
538 /* The spec says "width must be greater than 0.0" */
539 assert(viewport->x >= 0);
540 int minx = (int)viewport->x;
541 int maxx = (int)(viewport->x + viewport->width);
542
543 /* Viewport height can be negative */
544 int miny =
545 MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
546 int maxy =
547 MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
548
549 assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
550 miny = MAX2(scissor->offset.x, minx);
551 miny = MAX2(scissor->offset.y, miny);
552 maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
553 maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
554
555 /* Make sure we don't end up with a max < min when width/height is 0 */
556 maxx = maxx > minx ? maxx - 1 : maxx;
557 maxy = maxy > miny ? maxy - 1 : maxy;
558
559 cfg.scissor_minimum_x = minx;
560 cfg.scissor_minimum_y = miny;
561 cfg.scissor_maximum_x = maxx;
562 cfg.scissor_maximum_y = maxy;
563 }
564
565 cs_move64_to(b, cs_sr_reg64(b, 42), scissor_box);
566 }
567
568 if (is_dirty(cmdbuf, VP_VIEWPORTS)) {
569 cs_move32_to(b, cs_sr_reg32(b, 44),
570 fui(MIN2(viewport->minDepth, viewport->maxDepth)));
571 cs_move32_to(b, cs_sr_reg32(b, 45),
572 fui(MAX2(viewport->minDepth, viewport->maxDepth)));
573 }
574 }
575
576 static uint32_t
calc_fbd_size(struct panvk_cmd_buffer * cmdbuf)577 calc_fbd_size(struct panvk_cmd_buffer *cmdbuf)
578 {
579 const struct pan_fb_info *fb = &cmdbuf->state.gfx.render.fb.info;
580 bool has_zs_ext = fb->zs.view.zs || fb->zs.view.s;
581 uint32_t fbd_size = pan_size(FRAMEBUFFER);
582
583 if (has_zs_ext)
584 fbd_size += pan_size(ZS_CRC_EXTENSION);
585
586 fbd_size += pan_size(RENDER_TARGET) * MAX2(fb->rt_count, 1);
587 return fbd_size;
588 }
589
590 static uint32_t
calc_render_descs_size(struct panvk_cmd_buffer * cmdbuf)591 calc_render_descs_size(struct panvk_cmd_buffer *cmdbuf)
592 {
593 return (calc_fbd_size(cmdbuf) * cmdbuf->state.gfx.render.layer_count) +
594 pan_size(TILER_CONTEXT);
595 }
596
597 static void
cs_render_desc_ringbuf_reserve(struct cs_builder * b,uint32_t size)598 cs_render_desc_ringbuf_reserve(struct cs_builder *b, uint32_t size)
599 {
600 /* Make sure we don't allocate more than the ringbuf size. */
601 assert(size <= RENDER_DESC_RINGBUF_SIZE);
602
603 /* Make sure the allocation is 64-byte aligned. */
604 assert(ALIGN_POT(size, 64) == size);
605
606 struct cs_index ringbuf_sync = cs_scratch_reg64(b, 0);
607 struct cs_index sz_reg = cs_scratch_reg32(b, 2);
608
609 cs_load64_to(
610 b, ringbuf_sync, cs_subqueue_ctx_reg(b),
611 offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.syncobj));
612 cs_wait_slot(b, SB_ID(LS), false);
613
614 /* Wait for the other end to release memory. */
615 cs_move32_to(b, sz_reg, size - 1);
616 cs_sync32_wait(b, false, MALI_CS_CONDITION_GREATER, sz_reg, ringbuf_sync);
617
618 /* Decrement the syncobj to reflect the fact we're reserving memory. */
619 cs_move32_to(b, sz_reg, -size);
620 cs_sync32_add(b, false, MALI_CS_SYNC_SCOPE_CSG, sz_reg, ringbuf_sync,
621 cs_now());
622 }
623
624 static void
cs_render_desc_ringbuf_move_ptr(struct cs_builder * b,uint32_t size)625 cs_render_desc_ringbuf_move_ptr(struct cs_builder *b, uint32_t size)
626 {
627 struct cs_index scratch_reg = cs_scratch_reg32(b, 0);
628 struct cs_index ptr_lo = cs_scratch_reg32(b, 2);
629 struct cs_index pos = cs_scratch_reg32(b, 4);
630
631 cs_load_to(
632 b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b),
633 BITFIELD_MASK(3),
634 offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr));
635 cs_wait_slot(b, SB_ID(LS), false);
636
637 /* Update the relative position and absolute address. */
638 cs_add32(b, ptr_lo, ptr_lo, size);
639 cs_add32(b, pos, pos, size);
640 cs_add32(b, scratch_reg, pos, -RENDER_DESC_RINGBUF_SIZE);
641
642 /* Wrap-around. */
643 cs_while(b, MALI_CS_CONDITION_GEQUAL, scratch_reg) {
644 cs_add32(b, ptr_lo, ptr_lo, -RENDER_DESC_RINGBUF_SIZE);
645 cs_add32(b, pos, pos, -RENDER_DESC_RINGBUF_SIZE);
646 cs_loop_break(b, MALI_CS_CONDITION_ALWAYS, cs_undef());
647 }
648
649 cs_store(
650 b, cs_scratch_reg_tuple(b, 2, 3), cs_subqueue_ctx_reg(b),
651 BITFIELD_MASK(3),
652 offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr));
653 cs_wait_slot(b, SB_ID(LS), false);
654 }
655
656 static VkResult
get_tiler_desc(struct panvk_cmd_buffer * cmdbuf)657 get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
658 {
659 if (cmdbuf->state.gfx.render.tiler)
660 return VK_SUCCESS;
661
662 struct cs_builder *b =
663 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
664 struct panvk_physical_device *phys_dev =
665 to_panvk_physical_device(cmdbuf->vk.base.device->physical);
666 struct panfrost_tiler_features tiler_features =
667 panfrost_query_tiler_features(&phys_dev->kmod.props);
668 bool simul_use =
669 cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
670 struct panfrost_ptr tiler_desc = {0};
671 struct mali_tiler_context_packed tiler_tmpl;
672
673 if (!simul_use) {
674 tiler_desc = panvk_cmd_alloc_desc(cmdbuf, TILER_CONTEXT);
675 if (!tiler_desc.gpu)
676 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
677 } else {
678 /* If the tiler descriptor is allocated from the ring buffer, we set a
679 * dumb non-zero address to allow the is-tiler-acquired test to pass. */
680 tiler_desc.cpu = &tiler_tmpl;
681 tiler_desc.gpu = 0xdeadbeefdeadbeefull;
682 }
683
684 pan_pack(tiler_desc.cpu, TILER_CONTEXT, cfg) {
685 unsigned max_levels = tiler_features.max_levels;
686 assert(max_levels >= 2);
687
688 /* TODO: Select hierarchy mask more effectively */
689 cfg.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
690
691 /* For large framebuffers, disable the smallest bin size to
692 * avoid pathological tiler memory usage.
693 */
694 cfg.fb_width = cmdbuf->state.gfx.render.fb.info.width;
695 cfg.fb_height = cmdbuf->state.gfx.render.fb.info.height;
696 if (MAX2(cfg.fb_width, cfg.fb_height) >= 4096)
697 cfg.hierarchy_mask &= ~1;
698
699 cfg.sample_pattern =
700 pan_sample_pattern(cmdbuf->state.gfx.render.fb.info.nr_samples);
701
702 /* TODO: revisit for VK_EXT_provoking_vertex. */
703 cfg.first_provoking_vertex = true;
704
705 cfg.layer_count = cmdbuf->state.gfx.render.layer_count;
706 cfg.layer_offset = 0;
707 }
708
709 cmdbuf->state.gfx.render.tiler = tiler_desc.gpu;
710
711 struct cs_index tiler_ctx_addr = cs_sr_reg64(b, 40);
712
713 if (simul_use) {
714 uint32_t descs_sz = calc_render_descs_size(cmdbuf);
715
716 cs_render_desc_ringbuf_reserve(b, descs_sz);
717
718 /* Reserve ringbuf mem. */
719 cs_update_vt_ctx(b) {
720 cs_load64_to(b, tiler_ctx_addr, cs_subqueue_ctx_reg(b),
721 offsetof(struct panvk_cs_subqueue_context,
722 render.desc_ringbuf.ptr));
723 }
724
725 cs_render_desc_ringbuf_move_ptr(b, descs_sz);
726
727 /* Lay out words 2:5, so they can be stored along the other updates. */
728 cs_move64_to(b, cs_scratch_reg64(b, 2),
729 tiler_tmpl.opaque[2] | (uint64_t)tiler_tmpl.opaque[3] << 32);
730 cs_move64_to(b, cs_scratch_reg64(b, 4),
731 tiler_tmpl.opaque[4] | (uint64_t)tiler_tmpl.opaque[5] << 32);
732 } else {
733 cs_update_vt_ctx(b) {
734 cs_move64_to(b, tiler_ctx_addr, tiler_desc.gpu);
735 }
736 }
737
738 /* Reset the polygon list. */
739 cs_move64_to(b, cs_scratch_reg64(b, 0), 0);
740
741 /* Load the tiler_heap and geom_buf from the context. */
742 cs_load_to(b, cs_scratch_reg_tuple(b, 6, 4), cs_subqueue_ctx_reg(b),
743 BITFIELD_MASK(4),
744 offsetof(struct panvk_cs_subqueue_context, render.tiler_heap));
745
746 /* Reset the completed chain. */
747 cs_move64_to(b, cs_scratch_reg64(b, 10), 0);
748 cs_move64_to(b, cs_scratch_reg64(b, 12), 0);
749
750 cs_wait_slot(b, SB_ID(LS), false);
751
752 /* Update the first half of the tiler desc. */
753 if (simul_use) {
754 cs_store(b, cs_scratch_reg_tuple(b, 0, 14), tiler_ctx_addr,
755 BITFIELD_MASK(14), 0);
756 } else {
757 cs_store(b, cs_scratch_reg_tuple(b, 0, 2), tiler_ctx_addr,
758 BITFIELD_MASK(2), 0);
759 cs_store(b, cs_scratch_reg_tuple(b, 6, 8), tiler_ctx_addr,
760 BITFIELD_MASK(8), 24);
761 }
762
763 cs_wait_slot(b, SB_ID(LS), false);
764
765 /* r10:13 are already zero, fill r8:9 and r14:15 with zeros so we can reset
766 * the private state in one store. */
767 cs_move64_to(b, cs_scratch_reg64(b, 8), 0);
768 cs_move64_to(b, cs_scratch_reg64(b, 14), 0);
769
770 /* Update the second half of the tiler descriptor. */
771 cs_store(b, cs_scratch_reg_tuple(b, 8, 8), tiler_ctx_addr, BITFIELD_MASK(8),
772 96);
773 cs_wait_slot(b, SB_ID(LS), false);
774
775 /* Then we change the scoreboard slot used for iterators. */
776 panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
777
778 cs_heap_operation(b, MALI_CS_HEAP_OPERATION_VERTEX_TILER_STARTED, cs_now());
779 return VK_SUCCESS;
780 }
781
782 static VkResult
get_fb_descs(struct panvk_cmd_buffer * cmdbuf)783 get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
784 {
785 if (cmdbuf->state.gfx.render.fbds.gpu ||
786 !cmdbuf->state.gfx.render.layer_count)
787 return VK_SUCCESS;
788
789 uint32_t fbds_sz =
790 calc_fbd_size(cmdbuf) * cmdbuf->state.gfx.render.layer_count;
791
792 memset(&cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds, 0,
793 sizeof(cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds));
794
795 cmdbuf->state.gfx.render.fbds = panvk_cmd_alloc_dev_mem(
796 cmdbuf, desc, fbds_sz, pan_alignment(FRAMEBUFFER));
797 if (!cmdbuf->state.gfx.render.fbds.gpu)
798 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
799
800 return VK_SUCCESS;
801 }
802
803 static VkResult
prepare_vs(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)804 prepare_vs(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
805 {
806 struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
807 struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
808 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
809 struct cs_builder *b =
810 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
811 const struct vk_input_assembly_state *ia =
812 &cmdbuf->vk.dynamic_graphics_state.ia;
813 mali_ptr pos_spd = ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST
814 ? panvk_priv_mem_dev_addr(vs->spds.pos_points)
815 : panvk_priv_mem_dev_addr(vs->spds.pos_triangles);
816 mali_ptr var_spd = panvk_priv_mem_dev_addr(vs->spds.var);
817 bool upd_res_table = false;
818
819 if (!vs_desc_state->res_table) {
820 VkResult result = prepare_vs_driver_set(cmdbuf, draw);
821 if (result != VK_SUCCESS)
822 return result;
823
824 result = panvk_per_arch(cmd_prepare_shader_res_table)(cmdbuf, desc_state,
825 vs, vs_desc_state);
826 if (result != VK_SUCCESS)
827 return result;
828
829 upd_res_table = true;
830 }
831
832 cs_update_vt_ctx(b) {
833 if (upd_res_table)
834 cs_move64_to(b, cs_sr_reg64(b, 0), vs_desc_state->res_table);
835
836 if (pos_spd != cmdbuf->state.gfx.vs.spds.pos)
837 cs_move64_to(b, cs_sr_reg64(b, 16), pos_spd);
838
839 if (var_spd != cmdbuf->state.gfx.vs.spds.var)
840 cs_move64_to(b, cs_sr_reg64(b, 18), var_spd);
841 }
842
843 return VK_SUCCESS;
844 }
845
846 static VkResult
prepare_fs(struct panvk_cmd_buffer * cmdbuf)847 prepare_fs(struct panvk_cmd_buffer *cmdbuf)
848 {
849 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
850 struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
851 struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
852 struct cs_builder *b =
853 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
854 mali_ptr frag_spd = panvk_priv_mem_dev_addr(fs->spd);
855 bool upd_res_table = false;
856
857 if (!fs_desc_state->res_table) {
858 VkResult result = prepare_fs_driver_set(cmdbuf);
859 if (result != VK_SUCCESS)
860 return result;
861
862 result = panvk_per_arch(cmd_prepare_shader_res_table)(cmdbuf, desc_state,
863 fs, fs_desc_state);
864 if (result != VK_SUCCESS)
865 return result;
866
867 upd_res_table = true;
868 }
869
870 cs_update_vt_ctx(b) {
871 if (upd_res_table)
872 cs_move64_to(b, cs_sr_reg64(b, 4), fs_desc_state->res_table);
873
874 if (cmdbuf->state.gfx.fs.spd != frag_spd)
875 cs_move64_to(b, cs_sr_reg64(b, 20), frag_spd);
876 }
877
878 return VK_SUCCESS;
879 }
880
881 static VkResult
prepare_push_uniforms(struct panvk_cmd_buffer * cmdbuf)882 prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf)
883 {
884 struct cs_builder *b =
885 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
886
887 if (!cmdbuf->state.gfx.push_uniforms) {
888 cmdbuf->state.gfx.push_uniforms = panvk_per_arch(
889 cmd_prepare_push_uniforms)(cmdbuf, &cmdbuf->state.gfx.sysvals,
890 sizeof(cmdbuf->state.gfx.sysvals));
891 if (!cmdbuf->state.gfx.push_uniforms)
892 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
893
894 uint32_t push_size = 256 + sizeof(struct panvk_graphics_sysvals);
895 uint64_t fau_count = DIV_ROUND_UP(push_size, 8);
896 mali_ptr fau_ptr = cmdbuf->state.gfx.push_uniforms | (fau_count << 56);
897
898 cs_update_vt_ctx(b) {
899 cs_move64_to(b, cs_sr_reg64(b, 8), fau_ptr);
900 cs_move64_to(b, cs_sr_reg64(b, 12), fau_ptr);
901 }
902 }
903
904 return VK_SUCCESS;
905 }
906
907 static VkResult
prepare_ds(struct panvk_cmd_buffer * cmdbuf)908 prepare_ds(struct panvk_cmd_buffer *cmdbuf)
909 {
910 bool dirty = is_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) ||
911 is_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) ||
912 is_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
913 is_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
914 is_dirty(cmdbuf, DS_STENCIL_TEST_ENABLE) ||
915 is_dirty(cmdbuf, DS_STENCIL_OP) ||
916 is_dirty(cmdbuf, DS_STENCIL_COMPARE_MASK) ||
917 is_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
918 is_dirty(cmdbuf, DS_STENCIL_REFERENCE) ||
919 is_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE) ||
920 is_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) ||
921 is_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) ||
922 /* fs_required() uses ms.alpha_to_coverage_enable
923 * and vk_color_blend_state
924 */
925 is_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
926 is_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||
927 is_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||
928 is_dirty(cmdbuf, CB_WRITE_MASKS);
929
930 if (!dirty)
931 return VK_SUCCESS;
932
933 struct cs_builder *b =
934 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
935 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
936 const struct vk_dynamic_graphics_state *dyns =
937 &cmdbuf->vk.dynamic_graphics_state;
938 const struct vk_depth_stencil_state *ds = &dyns->ds;
939 const struct vk_rasterization_state *rs = &dyns->rs;
940 bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable;
941 bool test_z = has_depth_att(cmdbuf) && ds->depth.test_enable;
942 bool needs_fs = fs_required(cmdbuf);
943
944 struct panfrost_ptr zsd = panvk_cmd_alloc_desc(cmdbuf, DEPTH_STENCIL);
945 if (!zsd.gpu)
946 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
947
948 pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
949 cfg.stencil_test_enable = test_s;
950 if (test_s) {
951 cfg.front_compare_function =
952 translate_compare_func(ds->stencil.front.op.compare);
953 cfg.front_stencil_fail =
954 translate_stencil_op(ds->stencil.front.op.fail);
955 cfg.front_depth_fail =
956 translate_stencil_op(ds->stencil.front.op.depth_fail);
957 cfg.front_depth_pass = translate_stencil_op(ds->stencil.front.op.pass);
958 cfg.back_compare_function =
959 translate_compare_func(ds->stencil.back.op.compare);
960 cfg.back_stencil_fail = translate_stencil_op(ds->stencil.back.op.fail);
961 cfg.back_depth_fail =
962 translate_stencil_op(ds->stencil.back.op.depth_fail);
963 cfg.back_depth_pass = translate_stencil_op(ds->stencil.back.op.pass);
964 }
965
966 cfg.stencil_from_shader = needs_fs ? fs->info.fs.writes_stencil : 0;
967 cfg.front_write_mask = ds->stencil.front.write_mask;
968 cfg.back_write_mask = ds->stencil.back.write_mask;
969 cfg.front_value_mask = ds->stencil.front.compare_mask;
970 cfg.back_value_mask = ds->stencil.back.compare_mask;
971 cfg.front_reference_value = ds->stencil.front.reference;
972 cfg.back_reference_value = ds->stencil.back.reference;
973
974 if (rs->depth_clamp_enable)
975 cfg.depth_clamp_mode = MALI_DEPTH_CLAMP_MODE_BOUNDS;
976
977 if (fs)
978 cfg.depth_source = pan_depth_source(&fs->info);
979 cfg.depth_write_enable = ds->depth.write_enable;
980 cfg.depth_bias_enable = rs->depth_bias.enable;
981 cfg.depth_function = test_z ? translate_compare_func(ds->depth.compare_op)
982 : MALI_FUNC_ALWAYS;
983 cfg.depth_units = rs->depth_bias.constant * 2.0f;
984 cfg.depth_factor = rs->depth_bias.slope;
985 cfg.depth_bias_clamp = rs->depth_bias.clamp;
986 }
987
988 cs_update_vt_ctx(b)
989 cs_move64_to(b, cs_sr_reg64(b, 52), zsd.gpu);
990
991 return VK_SUCCESS;
992 }
993
994 static void
prepare_dcd(struct panvk_cmd_buffer * cmdbuf)995 prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
996 {
997 struct cs_builder *b =
998 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
999 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
1000 bool fs_is_dirty =
1001 cmdbuf->state.gfx.fs.spd != (fs ? panvk_priv_mem_dev_addr(fs->spd) : 0);
1002 bool dcd0_dirty = is_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) ||
1003 is_dirty(cmdbuf, RS_CULL_MODE) ||
1004 is_dirty(cmdbuf, RS_FRONT_FACE) ||
1005 is_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
1006 is_dirty(cmdbuf, MS_SAMPLE_MASK) ||
1007 is_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
1008 is_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) ||
1009 /* writes_depth() uses vk_depth_stencil_state */
1010 is_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) ||
1011 is_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) ||
1012 is_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
1013 /* writes_stencil() uses vk_depth_stencil_state */
1014 is_dirty(cmdbuf, DS_STENCIL_TEST_ENABLE) ||
1015 is_dirty(cmdbuf, DS_STENCIL_OP) ||
1016 is_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
1017 /* fs_required() uses vk_color_blend_state */
1018 is_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||
1019 is_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||
1020 is_dirty(cmdbuf, CB_WRITE_MASKS) || fs_is_dirty ||
1021 cmdbuf->state.gfx.render.dirty;
1022 bool dcd1_dirty = is_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
1023 is_dirty(cmdbuf, MS_SAMPLE_MASK) ||
1024 /* fs_required() uses ms.alpha_to_coverage_enable
1025 * and vk_color_blend_state
1026 */
1027 is_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
1028 is_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||
1029 is_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||
1030 is_dirty(cmdbuf, CB_WRITE_MASKS) || fs_is_dirty ||
1031 cmdbuf->state.gfx.render.dirty;
1032
1033 bool needs_fs = fs_required(cmdbuf);
1034
1035 const struct vk_dynamic_graphics_state *dyns =
1036 &cmdbuf->vk.dynamic_graphics_state;
1037 const struct vk_rasterization_state *rs =
1038 &cmdbuf->vk.dynamic_graphics_state.rs;
1039 bool alpha_to_coverage = dyns->ms.alpha_to_coverage_enable;
1040 bool writes_z = writes_depth(cmdbuf);
1041 bool writes_s = writes_stencil(cmdbuf);
1042
1043 if (dcd0_dirty) {
1044 struct mali_dcd_flags_0_packed dcd0;
1045 pan_pack(&dcd0, DCD_FLAGS_0, cfg) {
1046 if (needs_fs) {
1047 uint8_t rt_written = fs->info.outputs_written >> FRAG_RESULT_DATA0;
1048 uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
1049 MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
1050
1051 cfg.allow_forward_pixel_to_kill =
1052 fs->info.fs.can_fpk && !(rt_mask & ~rt_written) &&
1053 !alpha_to_coverage && !cmdbuf->state.gfx.cb.info.any_dest_read;
1054
1055 bool writes_zs = writes_z || writes_s;
1056 bool zs_always_passes = ds_test_always_passes(cmdbuf);
1057 bool oq = false; /* TODO: Occlusion queries */
1058
1059 struct pan_earlyzs_state earlyzs =
1060 pan_earlyzs_get(pan_earlyzs_analyze(&fs->info), writes_zs || oq,
1061 alpha_to_coverage, zs_always_passes);
1062
1063 cfg.pixel_kill_operation = earlyzs.kill;
1064 cfg.zs_update_operation = earlyzs.update;
1065 } else {
1066 cfg.allow_forward_pixel_to_kill = true;
1067 cfg.allow_forward_pixel_to_be_killed = true;
1068 cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
1069 cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
1070 cfg.overdraw_alpha0 = true;
1071 cfg.overdraw_alpha1 = true;
1072 }
1073
1074 cfg.front_face_ccw = rs->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE;
1075 cfg.cull_front_face = (rs->cull_mode & VK_CULL_MODE_FRONT_BIT) != 0;
1076 cfg.cull_back_face = (rs->cull_mode & VK_CULL_MODE_BACK_BIT) != 0;
1077
1078 cfg.multisample_enable = dyns->ms.rasterization_samples > 1;
1079 }
1080
1081 cs_update_vt_ctx(b)
1082 cs_move32_to(b, cs_sr_reg32(b, 57), dcd0.opaque[0]);
1083 }
1084
1085 if (dcd1_dirty) {
1086 struct mali_dcd_flags_1_packed dcd1;
1087 pan_pack(&dcd1, DCD_FLAGS_1, cfg) {
1088 cfg.sample_mask = dyns->ms.rasterization_samples > 1
1089 ? dyns->ms.sample_mask
1090 : UINT16_MAX;
1091
1092 if (needs_fs) {
1093 cfg.render_target_mask =
1094 (fs->info.outputs_written >> FRAG_RESULT_DATA0) &
1095 cmdbuf->state.gfx.render.bound_attachments;
1096 }
1097 }
1098
1099 cs_update_vt_ctx(b)
1100 cs_move32_to(b, cs_sr_reg32(b, 58), dcd1.opaque[0]);
1101 }
1102 }
1103
1104 static void
clear_dirty(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)1105 clear_dirty(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
1106 {
1107 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
1108 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
1109
1110 if (vs) {
1111 const struct vk_input_assembly_state *ia =
1112 &cmdbuf->vk.dynamic_graphics_state.ia;
1113
1114 cmdbuf->state.gfx.vs.spds.pos =
1115 ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST
1116 ? panvk_priv_mem_dev_addr(vs->spds.pos_points)
1117 : panvk_priv_mem_dev_addr(vs->spds.pos_triangles);
1118 cmdbuf->state.gfx.vs.spds.var = panvk_priv_mem_dev_addr(vs->spds.var);
1119 }
1120
1121 cmdbuf->state.gfx.fs.spd = fs ? panvk_priv_mem_dev_addr(fs->spd) : 0;
1122
1123 if (draw->index.size)
1124 cmdbuf->state.gfx.ib.dirty = false;
1125
1126 cmdbuf->state.gfx.render.dirty = false;
1127 vk_dynamic_graphics_state_clear_dirty(&cmdbuf->vk.dynamic_graphics_state);
1128 }
1129
1130 static void
panvk_cmd_draw(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)1131 panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
1132 {
1133 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
1134 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
1135 struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
1136 const struct vk_rasterization_state *rs =
1137 &cmdbuf->vk.dynamic_graphics_state.rs;
1138 const struct vk_input_assembly_state *ia =
1139 &cmdbuf->vk.dynamic_graphics_state.ia;
1140 bool idvs = vs->info.vs.idvs;
1141 VkResult result;
1142
1143 /* If there's no vertex shader, we can skip the draw. */
1144 if (!panvk_priv_mem_dev_addr(vs->spds.pos_points))
1145 return;
1146
1147 /* FIXME: support non-IDVS. */
1148 assert(idvs);
1149
1150 if (!cmdbuf->state.gfx.linked) {
1151 result = panvk_per_arch(link_shaders)(&cmdbuf->desc_pool, vs, fs,
1152 &cmdbuf->state.gfx.link);
1153 if (result != VK_SUCCESS) {
1154 vk_command_buffer_set_error(&cmdbuf->vk, result);
1155 return;
1156 }
1157 cmdbuf->state.gfx.linked = true;
1158 }
1159
1160 result = update_tls(cmdbuf);
1161 if (result != VK_SUCCESS)
1162 return;
1163
1164 bool needs_tiling = !rs->rasterizer_discard_enable;
1165
1166 if (needs_tiling) {
1167 result = get_tiler_desc(cmdbuf);
1168 if (result != VK_SUCCESS)
1169 return;
1170
1171 result = get_fb_descs(cmdbuf);
1172 if (result != VK_SUCCESS)
1173 return;
1174 }
1175
1176 struct cs_builder *b =
1177 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
1178
1179 uint32_t used_set_mask =
1180 vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
1181
1182 result =
1183 panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state, used_set_mask);
1184 if (result != VK_SUCCESS)
1185 return;
1186
1187 prepare_sysvals(cmdbuf);
1188
1189 result = prepare_push_uniforms(cmdbuf);
1190 if (result != VK_SUCCESS)
1191 return;
1192
1193 result = prepare_vs(cmdbuf, draw);
1194 if (result != VK_SUCCESS)
1195 return;
1196
1197 /* No need to setup the FS desc tables if the FS is not executed. */
1198 if (needs_tiling && fs_required(cmdbuf)) {
1199 result = prepare_fs(cmdbuf);
1200 if (result != VK_SUCCESS)
1201 return;
1202 }
1203
1204 struct mali_primitive_flags_packed tiler_idvs_flags;
1205 bool writes_point_size =
1206 vs->info.vs.writes_point_size &&
1207 ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
1208
1209 pan_pack(&tiler_idvs_flags, PRIMITIVE_FLAGS, cfg) {
1210 cfg.draw_mode = translate_prim_topology(ia->primitive_topology);
1211 cfg.index_type = index_size_to_index_type(draw->index.size);
1212
1213 if (writes_point_size) {
1214 cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
1215 cfg.position_fifo_format = MALI_FIFO_FORMAT_EXTENDED;
1216 } else {
1217 cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_NONE;
1218 cfg.position_fifo_format = MALI_FIFO_FORMAT_BASIC;
1219 }
1220
1221 if (vs->info.outputs_written & VARYING_BIT_LAYER) {
1222 cfg.layer_index_enable = true;
1223 cfg.position_fifo_format = MALI_FIFO_FORMAT_EXTENDED;
1224 }
1225
1226 cfg.secondary_shader =
1227 vs->info.vs.secondary_enable && fs_required(cmdbuf);
1228 cfg.primitive_restart = ia->primitive_restart_enable;
1229 }
1230
1231 uint32_t varying_size = 0;
1232
1233 if (vs && fs) {
1234 unsigned vs_vars = vs->info.varyings.output_count;
1235 unsigned fs_vars = fs->info.varyings.input_count;
1236 unsigned var_slots = MAX2(vs_vars, fs_vars);
1237
1238 /* Assumes 16 byte slots. We could do better. */
1239 varying_size = var_slots * 16;
1240 }
1241
1242 cs_update_vt_ctx(b) {
1243 cs_move32_to(b, cs_sr_reg32(b, 32), draw->vertex.base);
1244 cs_move32_to(b, cs_sr_reg32(b, 33), draw->vertex.count);
1245 cs_move32_to(b, cs_sr_reg32(b, 34), draw->instance.count);
1246 cs_move32_to(b, cs_sr_reg32(b, 35), draw->index.offset);
1247 cs_move32_to(b, cs_sr_reg32(b, 36), draw->index.vertex_offset);
1248
1249 /* Instance ID is assumed to be zero-based for now. See if we can
1250 * extend nir_lower_system_values() and the lower options to make
1251 * instance-ID non-zero based, or if it's fine to always return
1252 * zero for the instance base. */
1253 cs_move32_to(b, cs_sr_reg32(b, 37), 0);
1254
1255 /* We don't use the resource dep system yet. */
1256 cs_move32_to(b, cs_sr_reg32(b, 38), 0);
1257
1258 cs_move32_to(
1259 b, cs_sr_reg32(b, 39),
1260 (draw->index.offset + draw->vertex.count) * draw->index.size);
1261
1262 if (draw->index.size && cmdbuf->state.gfx.ib.dirty) {
1263 cs_move64_to(b, cs_sr_reg64(b, 54),
1264 panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer,
1265 cmdbuf->state.gfx.ib.offset));
1266 }
1267
1268 /* TODO: Revisit to avoid passing everything through the override flags
1269 * (likely needed for state preservation in secondary command buffers). */
1270 cs_move32_to(b, cs_sr_reg32(b, 56), 0);
1271
1272 cs_move32_to(b, cs_sr_reg32(b, 48), varying_size);
1273
1274 result = prepare_blend(cmdbuf);
1275 if (result != VK_SUCCESS)
1276 return;
1277
1278 result = prepare_ds(cmdbuf);
1279 if (result != VK_SUCCESS)
1280 return;
1281
1282 prepare_dcd(cmdbuf);
1283 prepare_vp(cmdbuf);
1284 }
1285
1286 clear_dirty(cmdbuf, draw);
1287
1288 cs_req_res(b, CS_IDVS_RES);
1289 cs_run_idvs(b, tiler_idvs_flags.opaque[0], false, true,
1290 cs_shader_res_sel(0, 0, 1, 0), cs_shader_res_sel(2, 2, 2, 0),
1291 cs_undef());
1292 cs_req_res(b, 0);
1293 }
1294
1295 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDraw)1296 panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount,
1297 uint32_t instanceCount, uint32_t firstVertex,
1298 uint32_t firstInstance)
1299 {
1300 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1301
1302 if (instanceCount == 0 || vertexCount == 0)
1303 return;
1304
1305 struct panvk_draw_info draw = {
1306 .vertex.base = firstVertex,
1307 .vertex.count = vertexCount,
1308 .instance.base = firstInstance,
1309 .instance.count = instanceCount,
1310 };
1311
1312 panvk_cmd_draw(cmdbuf, &draw);
1313 }
1314
1315 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndexed)1316 panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
1317 uint32_t indexCount, uint32_t instanceCount,
1318 uint32_t firstIndex, int32_t vertexOffset,
1319 uint32_t firstInstance)
1320 {
1321 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1322
1323 if (instanceCount == 0 || indexCount == 0)
1324 return;
1325
1326 struct panvk_draw_info draw = {
1327 .index.size = cmdbuf->state.gfx.ib.index_size,
1328 .index.offset = firstIndex,
1329 .index.vertex_offset = vertexOffset,
1330 .vertex.count = indexCount,
1331 .instance.count = instanceCount,
1332 .instance.base = firstInstance,
1333 };
1334
1335 panvk_cmd_draw(cmdbuf, &draw);
1336 }
1337
1338 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndirect)1339 panvk_per_arch(CmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer _buffer,
1340 VkDeviceSize offset, uint32_t drawCount,
1341 uint32_t stride)
1342 {
1343 panvk_stub();
1344 }
1345
1346 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndexedIndirect)1347 panvk_per_arch(CmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer,
1348 VkBuffer _buffer, VkDeviceSize offset,
1349 uint32_t drawCount, uint32_t stride)
1350 {
1351 panvk_stub();
1352 }
1353
1354 static void
panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer * cmdbuf,const VkRenderingInfo * pRenderingInfo)1355 panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer *cmdbuf,
1356 const VkRenderingInfo *pRenderingInfo)
1357 {
1358 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
1359 struct panvk_physical_device *phys_dev =
1360 to_panvk_physical_device(dev->vk.physical);
1361 struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1362 uint32_t att_width = 0, att_height = 0;
1363
1364 cmdbuf->state.gfx.render.flags = pRenderingInfo->flags;
1365
1366 /* Resuming from a suspended pass, the state should be unchanged. */
1367 if (cmdbuf->state.gfx.render.flags & VK_RENDERING_RESUMING_BIT)
1368 return;
1369
1370 cmdbuf->state.gfx.render.dirty = true;
1371 memset(cmdbuf->state.gfx.render.fb.crc_valid, 0,
1372 sizeof(cmdbuf->state.gfx.render.fb.crc_valid));
1373 memset(&cmdbuf->state.gfx.render.color_attachments, 0,
1374 sizeof(cmdbuf->state.gfx.render.color_attachments));
1375 memset(&cmdbuf->state.gfx.render.z_attachment, 0,
1376 sizeof(cmdbuf->state.gfx.render.z_attachment));
1377 memset(&cmdbuf->state.gfx.render.s_attachment, 0,
1378 sizeof(cmdbuf->state.gfx.render.s_attachment));
1379 cmdbuf->state.gfx.render.bound_attachments = 0;
1380
1381 cmdbuf->state.gfx.render.layer_count = pRenderingInfo->layerCount;
1382 *fbinfo = (struct pan_fb_info){
1383 .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
1384 .nr_samples = 1,
1385 .rt_count = pRenderingInfo->colorAttachmentCount,
1386 };
1387
1388 assert(pRenderingInfo->colorAttachmentCount <= ARRAY_SIZE(fbinfo->rts));
1389
1390 for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
1391 const VkRenderingAttachmentInfo *att =
1392 &pRenderingInfo->pColorAttachments[i];
1393 VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
1394
1395 if (!iview)
1396 continue;
1397
1398 struct panvk_image *img =
1399 container_of(iview->vk.image, struct panvk_image, vk);
1400 const VkExtent3D iview_size =
1401 vk_image_mip_level_extent(&img->vk, iview->vk.base_mip_level);
1402
1403 cmdbuf->state.gfx.render.bound_attachments |=
1404 MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
1405 cmdbuf->state.gfx.render.color_attachments.fmts[i] = iview->vk.format;
1406 cmdbuf->state.gfx.render.color_attachments.samples[i] = img->vk.samples;
1407 att_width = MAX2(iview_size.width, att_width);
1408 att_height = MAX2(iview_size.height, att_height);
1409
1410 fbinfo->rts[i].view = &iview->pview;
1411 fbinfo->rts[i].crc_valid = &cmdbuf->state.gfx.render.fb.crc_valid[i];
1412 fbinfo->nr_samples =
1413 MAX2(fbinfo->nr_samples, pan_image_view_get_nr_samples(&iview->pview));
1414
1415 if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1416 enum pipe_format fmt = vk_format_to_pipe_format(iview->vk.format);
1417 union pipe_color_union *col =
1418 (union pipe_color_union *)&att->clearValue.color;
1419
1420 fbinfo->rts[i].clear = true;
1421 pan_pack_color(phys_dev->formats.blendable, fbinfo->rts[i].clear_value,
1422 col, fmt, false);
1423 } else if (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
1424 fbinfo->rts[i].preload = true;
1425 }
1426
1427 if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
1428 struct panvk_resolve_attachment *resolve_info =
1429 &cmdbuf->state.gfx.render.color_attachments.resolve[i];
1430 VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
1431
1432 resolve_info->mode = att->resolveMode;
1433 resolve_info->src_iview = iview;
1434 resolve_info->dst_iview = resolve_iview;
1435 }
1436 }
1437
1438 if (pRenderingInfo->pDepthAttachment &&
1439 pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE) {
1440 const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
1441 VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
1442 struct panvk_image *img =
1443 container_of(iview->vk.image, struct panvk_image, vk);
1444 const VkExtent3D iview_size =
1445 vk_image_mip_level_extent(&img->vk, iview->vk.base_mip_level);
1446
1447 if (iview->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1448 cmdbuf->state.gfx.render.bound_attachments |=
1449 MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
1450 att_width = MAX2(iview_size.width, att_width);
1451 att_height = MAX2(iview_size.height, att_height);
1452
1453 fbinfo->zs.view.zs = &iview->pview;
1454 fbinfo->nr_samples = MAX2(
1455 fbinfo->nr_samples, pan_image_view_get_nr_samples(&iview->pview));
1456
1457 if (vk_format_has_stencil(img->vk.format))
1458 fbinfo->zs.preload.s = true;
1459
1460 if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1461 fbinfo->zs.clear.z = true;
1462 fbinfo->zs.clear_value.depth = att->clearValue.depthStencil.depth;
1463 } else if (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
1464 fbinfo->zs.preload.z = true;
1465 }
1466
1467 if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
1468 struct panvk_resolve_attachment *resolve_info =
1469 &cmdbuf->state.gfx.render.z_attachment.resolve;
1470 VK_FROM_HANDLE(panvk_image_view, resolve_iview,
1471 att->resolveImageView);
1472
1473 resolve_info->mode = att->resolveMode;
1474 resolve_info->src_iview = iview;
1475 resolve_info->dst_iview = resolve_iview;
1476 }
1477 }
1478 }
1479
1480 if (pRenderingInfo->pStencilAttachment &&
1481 pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE) {
1482 const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
1483 VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
1484 struct panvk_image *img =
1485 container_of(iview->vk.image, struct panvk_image, vk);
1486 const VkExtent3D iview_size =
1487 vk_image_mip_level_extent(&img->vk, iview->vk.base_mip_level);
1488
1489 if (iview->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1490 cmdbuf->state.gfx.render.bound_attachments |=
1491 MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
1492 att_width = MAX2(iview_size.width, att_width);
1493 att_height = MAX2(iview_size.height, att_height);
1494
1495 if (drm_is_afbc(img->pimage.layout.modifier)) {
1496 assert(fbinfo->zs.view.zs == &iview->pview || !fbinfo->zs.view.zs);
1497 fbinfo->zs.view.zs = &iview->pview;
1498 } else {
1499 fbinfo->zs.view.s =
1500 &iview->pview != fbinfo->zs.view.zs ? &iview->pview : NULL;
1501 }
1502
1503 fbinfo->zs.view.s =
1504 &iview->pview != fbinfo->zs.view.zs ? &iview->pview : NULL;
1505 fbinfo->nr_samples = MAX2(
1506 fbinfo->nr_samples, pan_image_view_get_nr_samples(&iview->pview));
1507
1508 if (vk_format_has_depth(img->vk.format)) {
1509 assert(fbinfo->zs.view.zs == NULL ||
1510 &iview->pview == fbinfo->zs.view.zs);
1511 fbinfo->zs.view.zs = &iview->pview;
1512
1513 fbinfo->zs.preload.s = false;
1514 fbinfo->zs.clear.s = false;
1515 if (!fbinfo->zs.clear.z)
1516 fbinfo->zs.preload.z = true;
1517 }
1518
1519 if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1520 fbinfo->zs.clear.s = true;
1521 fbinfo->zs.clear_value.stencil =
1522 att->clearValue.depthStencil.stencil;
1523 } else if (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
1524 fbinfo->zs.preload.s = true;
1525 }
1526
1527 if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
1528 struct panvk_resolve_attachment *resolve_info =
1529 &cmdbuf->state.gfx.render.s_attachment.resolve;
1530 VK_FROM_HANDLE(panvk_image_view, resolve_iview,
1531 att->resolveImageView);
1532
1533 resolve_info->mode = att->resolveMode;
1534 resolve_info->src_iview = iview;
1535 resolve_info->dst_iview = resolve_iview;
1536 }
1537 }
1538 }
1539
1540 if (fbinfo->zs.view.zs) {
1541 const struct util_format_description *fdesc =
1542 util_format_description(fbinfo->zs.view.zs->format);
1543 bool needs_depth = fbinfo->zs.clear.z | fbinfo->zs.preload.z |
1544 util_format_has_depth(fdesc);
1545 bool needs_stencil = fbinfo->zs.clear.s | fbinfo->zs.preload.s |
1546 util_format_has_stencil(fdesc);
1547 enum pipe_format new_fmt =
1548 util_format_get_blocksize(fbinfo->zs.view.zs->format) == 4
1549 ? PIPE_FORMAT_Z24_UNORM_S8_UINT
1550 : PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
1551
1552 if (needs_depth && needs_stencil &&
1553 fbinfo->zs.view.zs->format != new_fmt) {
1554 cmdbuf->state.gfx.render.zs_pview = *fbinfo->zs.view.zs;
1555 cmdbuf->state.gfx.render.zs_pview.format = new_fmt;
1556 fbinfo->zs.view.zs = &cmdbuf->state.gfx.render.zs_pview;
1557 }
1558 }
1559
1560 fbinfo->extent.minx = pRenderingInfo->renderArea.offset.x;
1561 fbinfo->extent.maxx = pRenderingInfo->renderArea.offset.x +
1562 pRenderingInfo->renderArea.extent.width - 1;
1563 fbinfo->extent.miny = pRenderingInfo->renderArea.offset.y;
1564 fbinfo->extent.maxy = pRenderingInfo->renderArea.offset.y +
1565 pRenderingInfo->renderArea.extent.height - 1;
1566
1567 if (cmdbuf->state.gfx.render.bound_attachments) {
1568 fbinfo->width = att_width;
1569 fbinfo->height = att_height;
1570 } else {
1571 fbinfo->width = fbinfo->extent.maxx + 1;
1572 fbinfo->height = fbinfo->extent.maxy + 1;
1573 }
1574
1575 assert(fbinfo->width && fbinfo->height);
1576 }
1577
1578 static void
preload_render_area_border(struct panvk_cmd_buffer * cmdbuf,const VkRenderingInfo * render_info)1579 preload_render_area_border(struct panvk_cmd_buffer *cmdbuf,
1580 const VkRenderingInfo *render_info)
1581 {
1582 struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1583 bool render_area_is_32x32_aligned =
1584 ((fbinfo->extent.minx | fbinfo->extent.miny) % 32) == 0 &&
1585 (fbinfo->extent.maxx + 1 == fbinfo->width ||
1586 (fbinfo->extent.maxx % 32) == 31) &&
1587 (fbinfo->extent.maxy + 1 == fbinfo->height ||
1588 (fbinfo->extent.maxy % 32) == 31);
1589
1590 /* If the render area is aligned on a 32x32 section, we're good. */
1591 if (render_area_is_32x32_aligned)
1592 return;
1593
1594 /* We force preloading for all active attachments to preserve content falling
1595 * outside the render area, but we need to compensate with attachment clears
1596 * for attachments that were initially cleared.
1597 */
1598 uint32_t bound_atts = cmdbuf->state.gfx.render.bound_attachments;
1599 VkClearAttachment clear_atts[MAX_RTS + 2];
1600 uint32_t clear_att_count = 0;
1601
1602 for (uint32_t i = 0; i < render_info->colorAttachmentCount; i++) {
1603 if (bound_atts & MESA_VK_RP_ATTACHMENT_COLOR_BIT(i)) {
1604 if (fbinfo->rts[i].clear) {
1605 const VkRenderingAttachmentInfo *att =
1606 &render_info->pColorAttachments[i];
1607
1608 clear_atts[clear_att_count++] = (VkClearAttachment){
1609 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1610 .colorAttachment = i,
1611 .clearValue = att->clearValue,
1612 };
1613 }
1614
1615 fbinfo->rts[i].preload = true;
1616 fbinfo->rts[i].clear = false;
1617 }
1618 }
1619
1620 if (bound_atts & MESA_VK_RP_ATTACHMENT_DEPTH_BIT) {
1621 if (fbinfo->zs.clear.z) {
1622 const VkRenderingAttachmentInfo *att = render_info->pDepthAttachment;
1623
1624 clear_atts[clear_att_count++] = (VkClearAttachment){
1625 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
1626 .clearValue = att->clearValue,
1627 };
1628 }
1629
1630 fbinfo->zs.preload.z = true;
1631 fbinfo->zs.clear.z = false;
1632 }
1633
1634 if (bound_atts & MESA_VK_RP_ATTACHMENT_STENCIL_BIT) {
1635 if (fbinfo->zs.clear.s) {
1636 const VkRenderingAttachmentInfo *att = render_info->pStencilAttachment;
1637
1638 clear_atts[clear_att_count++] = (VkClearAttachment){
1639 .aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
1640 .clearValue = att->clearValue,
1641 };
1642 }
1643
1644 fbinfo->zs.preload.s = true;
1645 fbinfo->zs.clear.s = false;
1646 }
1647
1648 if (clear_att_count) {
1649 VkClearRect clear_rect = {
1650 .rect = render_info->renderArea,
1651 .baseArrayLayer = 0,
1652 .layerCount = render_info->layerCount,
1653 };
1654
1655 panvk_per_arch(CmdClearAttachments)(panvk_cmd_buffer_to_handle(cmdbuf),
1656 clear_att_count, clear_atts, 1,
1657 &clear_rect);
1658 }
1659 }
1660
1661 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBeginRendering)1662 panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer,
1663 const VkRenderingInfo *pRenderingInfo)
1664 {
1665 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1666 struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
1667
1668 panvk_cmd_begin_rendering_init_state(cmdbuf, pRenderingInfo);
1669
1670 bool resuming = state->render.flags & VK_RENDERING_RESUMING_BIT;
1671
1672 /* If we're not resuming, the FBD should be NULL. */
1673 assert(!state->render.fbds.gpu || resuming);
1674
1675 if (!resuming)
1676 preload_render_area_border(cmdbuf, pRenderingInfo);
1677 }
1678
1679 static void
resolve_attachments(struct panvk_cmd_buffer * cmdbuf)1680 resolve_attachments(struct panvk_cmd_buffer *cmdbuf)
1681 {
1682 struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1683 bool needs_resolve = false;
1684
1685 unsigned bound_atts = cmdbuf->state.gfx.render.bound_attachments;
1686 unsigned color_att_count =
1687 util_last_bit(bound_atts & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS);
1688 VkRenderingAttachmentInfo color_atts[MAX_RTS];
1689 for (uint32_t i = 0; i < color_att_count; i++) {
1690 const struct panvk_resolve_attachment *resolve_info =
1691 &cmdbuf->state.gfx.render.color_attachments.resolve[i];
1692
1693 color_atts[i] = (VkRenderingAttachmentInfo){
1694 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1695 .imageView = panvk_image_view_to_handle(resolve_info->src_iview),
1696 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1697 .resolveMode = resolve_info->mode,
1698 .resolveImageView =
1699 panvk_image_view_to_handle(resolve_info->dst_iview),
1700 .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
1701 };
1702
1703 if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
1704 needs_resolve = true;
1705 }
1706
1707 const struct panvk_resolve_attachment *resolve_info =
1708 &cmdbuf->state.gfx.render.z_attachment.resolve;
1709 VkRenderingAttachmentInfo z_att = {
1710 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1711 .imageView = panvk_image_view_to_handle(resolve_info->src_iview),
1712 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1713 .resolveMode = resolve_info->mode,
1714 .resolveImageView = panvk_image_view_to_handle(resolve_info->dst_iview),
1715 .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
1716 };
1717
1718 if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
1719 needs_resolve = true;
1720
1721 resolve_info = &cmdbuf->state.gfx.render.s_attachment.resolve;
1722
1723 VkRenderingAttachmentInfo s_att = {
1724 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1725 .imageView = panvk_image_view_to_handle(resolve_info->src_iview),
1726 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1727 .resolveMode = resolve_info->mode,
1728 .resolveImageView = panvk_image_view_to_handle(resolve_info->dst_iview),
1729 .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
1730 };
1731
1732 if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
1733 needs_resolve = true;
1734
1735 if (!needs_resolve)
1736 return;
1737
1738 const VkRenderingInfo render_info = {
1739 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
1740 .renderArea =
1741 {
1742 .offset.x = fbinfo->extent.minx,
1743 .offset.y = fbinfo->extent.miny,
1744 .extent.width = fbinfo->extent.maxx - fbinfo->extent.minx + 1,
1745 .extent.height = fbinfo->extent.maxy - fbinfo->extent.miny + 1,
1746 },
1747 .layerCount = cmdbuf->state.gfx.render.layer_count,
1748 .viewMask = 0,
1749 .colorAttachmentCount = color_att_count,
1750 .pColorAttachments = color_atts,
1751 .pDepthAttachment = &z_att,
1752 .pStencilAttachment = &s_att,
1753 };
1754
1755 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
1756 struct panvk_cmd_meta_graphics_save_ctx save = {0};
1757
1758 panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save);
1759 vk_meta_resolve_rendering(&cmdbuf->vk, &dev->meta, &render_info);
1760 panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save);
1761 }
1762
1763 static uint8_t
prepare_fb_desc(struct panvk_cmd_buffer * cmdbuf,uint32_t layer,void * fbd)1764 prepare_fb_desc(struct panvk_cmd_buffer *cmdbuf, uint32_t layer, void *fbd)
1765 {
1766 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
1767 bool simul_use =
1768 !(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
1769
1770 if (cmdbuf->state.tls.desc.gpu) {
1771 ASSERTED unsigned num_preload_jobs =
1772 GENX(pan_preload_fb)(&dev->blitter.cache, &cmdbuf->desc_pool.base,
1773 &cmdbuf->state.gfx.render.fb.info, layer,
1774 cmdbuf->state.tls.desc.gpu, NULL);
1775
1776 /* Valhall GPUs use pre frame DCDs to preload the FB content. We
1777 * thus expect num_preload_jobs to be zero.
1778 */
1779 assert(!num_preload_jobs);
1780 }
1781
1782 struct pan_tiler_context tiler_ctx = {
1783 .valhall.desc = !simul_use ? cmdbuf->state.gfx.render.tiler : 0,
1784 };
1785
1786 return GENX(pan_emit_fbd)(&cmdbuf->state.gfx.render.fb.info, layer, NULL,
1787 &tiler_ctx, fbd);
1788 }
1789
1790 static void
flush_tiling(struct panvk_cmd_buffer * cmdbuf)1791 flush_tiling(struct panvk_cmd_buffer *cmdbuf)
1792 {
1793 if (!cmdbuf->state.gfx.render.fbds.gpu)
1794 return;
1795
1796 struct cs_builder *b =
1797 panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
1798
1799 struct cs_index render_ctx = cs_scratch_reg64(b, 2);
1800
1801 if (cmdbuf->state.gfx.render.tiler) {
1802 /* Flush the tiling operations and signal the internal sync object. */
1803 cs_req_res(b, CS_TILER_RES);
1804 cs_finish_tiling(b, false);
1805 cs_req_res(b, 0);
1806
1807 struct cs_index sync_addr = cs_scratch_reg64(b, 0);
1808 struct cs_index iter_sb = cs_scratch_reg32(b, 2);
1809 struct cs_index cmp_scratch = cs_scratch_reg32(b, 3);
1810 struct cs_index add_val = cs_scratch_reg64(b, 4);
1811
1812 cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
1813 BITFIELD_MASK(3),
1814 offsetof(struct panvk_cs_subqueue_context, syncobjs));
1815 cs_wait_slot(b, SB_ID(LS), false);
1816
1817 /* We're relying on PANVK_SUBQUEUE_VERTEX_TILER being the first queue to
1818 * skip an ADD operation on the syncobjs pointer. */
1819 STATIC_ASSERT(PANVK_SUBQUEUE_VERTEX_TILER == 0);
1820
1821 cs_move64_to(b, add_val, 1);
1822
1823 cs_match(b, iter_sb, cmp_scratch) {
1824 #define CASE(x) \
1825 cs_case(b, x) { \
1826 cs_heap_operation(b, \
1827 MALI_CS_HEAP_OPERATION_VERTEX_TILER_COMPLETED, \
1828 cs_defer(SB_WAIT_ITER(x), \
1829 SB_ID(DEFERRED_SYNC))); \
1830 cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, \
1831 add_val, sync_addr, \
1832 cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \
1833 cs_move32_to(b, iter_sb, next_iter_sb(x)); \
1834 }
1835
1836 CASE(0)
1837 CASE(1)
1838 CASE(2)
1839 CASE(3)
1840 CASE(4)
1841 #undef CASE
1842 }
1843
1844 cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
1845 offsetof(struct panvk_cs_subqueue_context, iter_sb));
1846 cs_wait_slot(b, SB_ID(LS), false);
1847
1848 /* Update the vertex seqno. */
1849 ++cmdbuf->state.cs[PANVK_SUBQUEUE_VERTEX_TILER].relative_sync_point;
1850 } else {
1851 cs_load64_to(b, render_ctx, cs_subqueue_ctx_reg(b),
1852 offsetof(struct panvk_cs_subqueue_context, render));
1853 cs_wait_slot(b, SB_ID(LS), false);
1854 }
1855 }
1856
1857 static void
wait_finish_tiling(struct panvk_cmd_buffer * cmdbuf)1858 wait_finish_tiling(struct panvk_cmd_buffer *cmdbuf)
1859 {
1860 if (!cmdbuf->state.gfx.render.tiler)
1861 return;
1862
1863 struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
1864 struct cs_index vt_sync_addr = cs_scratch_reg64(b, 0);
1865 struct cs_index vt_sync_point = cs_scratch_reg64(b, 2);
1866 uint64_t rel_vt_sync_point =
1867 cmdbuf->state.cs[PANVK_SUBQUEUE_VERTEX_TILER].relative_sync_point;
1868
1869 cs_load64_to(b, vt_sync_addr, cs_subqueue_ctx_reg(b),
1870 offsetof(struct panvk_cs_subqueue_context, syncobjs));
1871 cs_wait_slot(b, SB_ID(LS), false);
1872
1873 cs_add64(b, vt_sync_point,
1874 cs_progress_seqno_reg(b, PANVK_SUBQUEUE_VERTEX_TILER),
1875 rel_vt_sync_point);
1876 cs_sync64_wait(b, false, MALI_CS_CONDITION_GREATER, vt_sync_point,
1877 vt_sync_addr);
1878 }
1879
1880 static void
issue_fragment_jobs(struct panvk_cmd_buffer * cmdbuf)1881 issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
1882 {
1883 if (!cmdbuf->state.gfx.render.fbds.gpu)
1884 return;
1885
1886 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
1887 struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1888 struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
1889
1890 /* Wait for the tiling to be done before submitting the fragment job. */
1891 wait_finish_tiling(cmdbuf);
1892
1893 /* Reserve a scoreboard for the fragment job. */
1894 panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_FRAGMENT);
1895
1896 /* Now initialize the fragment bits. */
1897 cs_update_frag_ctx(b) {
1898 cs_move32_to(b, cs_sr_reg32(b, 42),
1899 (fbinfo->extent.miny << 16) | fbinfo->extent.minx);
1900 cs_move32_to(b, cs_sr_reg32(b, 43),
1901 (fbinfo->extent.maxy << 16) | fbinfo->extent.maxx);
1902 }
1903
1904 fbinfo->sample_positions =
1905 dev->sample_positions->addr.dev +
1906 panfrost_sample_positions_offset(pan_sample_pattern(fbinfo->nr_samples));
1907
1908 bool simul_use =
1909 cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
1910
1911 /* The only bit we patch in FBDs is the tiler pointer. If tiler is not
1912 * involved (clear job) or if the update can happen in place (not
1913 * simultaneous use of the command buffer), we can avoid the
1914 * copy. */
1915 bool copy_fbds = simul_use && cmdbuf->state.gfx.render.tiler;
1916 uint32_t fbd_sz = calc_fbd_size(cmdbuf);
1917 struct panfrost_ptr fbds = cmdbuf->state.gfx.render.fbds;
1918 uint8_t fbd_flags = 0;
1919
1920 /* We prepare all FB descriptors upfront. */
1921 for (uint32_t i = 0; i < cmdbuf->state.gfx.render.layer_count; i++) {
1922 uint32_t new_fbd_flags =
1923 prepare_fb_desc(cmdbuf, i, fbds.cpu + (fbd_sz * i));
1924
1925 /* Make sure all FBDs have the same flags. */
1926 assert(i == 0 || new_fbd_flags == fbd_flags);
1927 fbd_flags = new_fbd_flags;
1928 }
1929
1930 struct cs_index layer_count = cs_sr_reg32(b, 47);
1931 struct cs_index fbd_ptr = cs_sr_reg64(b, 48);
1932 struct cs_index tiler_ptr = cs_sr_reg64(b, 50);
1933 struct cs_index src_fbd_ptr = cs_undef();
1934
1935 if (copy_fbds) {
1936 src_fbd_ptr = cs_sr_reg64(b, 52);
1937
1938 cs_move32_to(b, layer_count, cmdbuf->state.gfx.render.layer_count);
1939 cs_load64_to(
1940 b, tiler_ptr, cs_subqueue_ctx_reg(b),
1941 offsetof(struct panvk_cs_subqueue_context, render.desc_ringbuf.ptr));
1942 cs_wait_slot(b, SB_ID(LS), false);
1943
1944 cs_add64(b, fbd_ptr, tiler_ptr, pan_size(TILER_CONTEXT));
1945 cs_move64_to(b, src_fbd_ptr, fbds.gpu);
1946 } else if (cmdbuf->state.gfx.render.tiler) {
1947 cs_move64_to(b, fbd_ptr, fbds.gpu);
1948 cs_move64_to(b, tiler_ptr, cmdbuf->state.gfx.render.tiler);
1949 }
1950
1951 cs_move32_to(b, layer_count, cmdbuf->state.gfx.render.layer_count);
1952 cs_while(b, MALI_CS_CONDITION_GREATER, layer_count) {
1953 if (copy_fbds) {
1954 for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) {
1955 cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16), src_fbd_ptr,
1956 BITFIELD_MASK(16), fbd_off);
1957 cs_wait_slot(b, SB_ID(LS), false);
1958 cs_store(b, cs_scratch_reg_tuple(b, 0, 16), fbd_ptr,
1959 BITFIELD_MASK(16), fbd_off);
1960 cs_wait_slot(b, SB_ID(LS), false);
1961 }
1962
1963 cs_add64(b, src_fbd_ptr, src_fbd_ptr, fbd_sz);
1964 }
1965
1966 if (cmdbuf->state.gfx.render.tiler) {
1967 cs_store64(b, tiler_ptr, fbd_ptr, 56);
1968 cs_wait_slot(b, SB_ID(LS), false);
1969 }
1970
1971 cs_update_frag_ctx(b)
1972 cs_add64(b, cs_sr_reg64(b, 40), fbd_ptr, fbd_flags);
1973
1974 cs_req_res(b, CS_FRAG_RES);
1975 cs_run_fragment(b, false, MALI_TILE_RENDER_ORDER_Z_ORDER, false);
1976 cs_req_res(b, 0);
1977 cs_add64(b, fbd_ptr, fbd_ptr, fbd_sz);
1978 cs_add32(b, layer_count, layer_count, -1);
1979 }
1980
1981 struct cs_index sync_addr = cs_scratch_reg64(b, 0);
1982 struct cs_index iter_sb = cs_scratch_reg32(b, 2);
1983 struct cs_index cmp_scratch = cs_scratch_reg32(b, 3);
1984 struct cs_index add_val = cs_scratch_reg64(b, 4);
1985 struct cs_index release_sz = cs_scratch_reg32(b, 5);
1986 struct cs_index ringbuf_sync_addr = cs_scratch_reg64(b, 6);
1987 struct cs_index completed = cs_scratch_reg_tuple(b, 10, 4);
1988 struct cs_index completed_top = cs_scratch_reg64(b, 10);
1989 struct cs_index completed_bottom = cs_scratch_reg64(b, 12);
1990
1991 cs_move64_to(b, add_val, 1);
1992 cs_load_to(b, cs_scratch_reg_tuple(b, 0, 3), cs_subqueue_ctx_reg(b),
1993 BITFIELD_MASK(3),
1994 offsetof(struct panvk_cs_subqueue_context, syncobjs));
1995
1996 if (copy_fbds) {
1997 cs_move32_to(b, release_sz, calc_render_descs_size(cmdbuf));
1998 cs_load64_to(b, ringbuf_sync_addr, cs_subqueue_ctx_reg(b),
1999 offsetof(struct panvk_cs_subqueue_context,
2000 render.desc_ringbuf.syncobj));
2001 }
2002
2003 if (cmdbuf->state.gfx.render.tiler)
2004 cs_load_to(b, completed, tiler_ptr, BITFIELD_MASK(4), 40);
2005
2006 cs_wait_slot(b, SB_ID(LS), false);
2007
2008 cs_add64(b, sync_addr, sync_addr,
2009 PANVK_SUBQUEUE_FRAGMENT * sizeof(struct panvk_cs_sync64));
2010
2011 cs_match(b, iter_sb, cmp_scratch) {
2012 #define CASE(x) \
2013 cs_case(b, x) { \
2014 if (cmdbuf->state.gfx.render.tiler) { \
2015 cs_finish_fragment(b, true, completed_top, completed_bottom, \
2016 cs_defer(SB_WAIT_ITER(x), \
2017 SB_ID(DEFERRED_SYNC))); \
2018 } \
2019 if (copy_fbds) { \
2020 cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_CSG, \
2021 release_sz, ringbuf_sync_addr, \
2022 cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \
2023 } \
2024 cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, \
2025 add_val, sync_addr, \
2026 cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC))); \
2027 cs_move32_to(b, iter_sb, next_iter_sb(x)); \
2028 }
2029
2030 CASE(0)
2031 CASE(1)
2032 CASE(2)
2033 CASE(3)
2034 CASE(4)
2035 #undef CASE
2036 }
2037
2038 cs_store32(b, iter_sb, cs_subqueue_ctx_reg(b),
2039 offsetof(struct panvk_cs_subqueue_context, iter_sb));
2040 cs_wait_slot(b, SB_ID(LS), false);
2041
2042 /* Update the ring buffer position. */
2043 if (copy_fbds)
2044 cs_render_desc_ringbuf_move_ptr(b, calc_render_descs_size(cmdbuf));
2045
2046 /* Update the frag seqno. */
2047 ++cmdbuf->state.cs[PANVK_SUBQUEUE_FRAGMENT].relative_sync_point;
2048
2049 memset(&cmdbuf->state.gfx.render.fbds, 0,
2050 sizeof(cmdbuf->state.gfx.render.fbds));
2051 cmdbuf->state.gfx.render.tiler = 0;
2052 }
2053
2054 void
panvk_per_arch(cmd_flush_draws)2055 panvk_per_arch(cmd_flush_draws)(struct panvk_cmd_buffer *cmdbuf)
2056 {
2057 /* If there was no draw queued, we don't need to force a preload. */
2058 if (!cmdbuf->state.gfx.render.fbds.gpu)
2059 return;
2060
2061 flush_tiling(cmdbuf);
2062 issue_fragment_jobs(cmdbuf);
2063 force_fb_preload(cmdbuf);
2064 memset(&cmdbuf->state.gfx.render.fbds, 0,
2065 sizeof(cmdbuf->state.gfx.render.fbds));
2066 cmdbuf->state.gfx.render.tiler = 0;
2067 }
2068
2069 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdEndRendering)2070 panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer)
2071 {
2072 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
2073
2074 if (!(cmdbuf->state.gfx.render.flags & VK_RENDERING_SUSPENDING_BIT)) {
2075 struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
2076 bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
2077 for (unsigned i = 0; i < fbinfo->rt_count; i++)
2078 clear |= fbinfo->rts[i].clear;
2079
2080 if (clear) {
2081 VkResult result = get_fb_descs(cmdbuf);
2082 if (result != VK_SUCCESS)
2083 return;
2084 }
2085
2086 flush_tiling(cmdbuf);
2087 issue_fragment_jobs(cmdbuf);
2088 resolve_attachments(cmdbuf);
2089 }
2090 }
2091
2092 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindVertexBuffers)2093 panvk_per_arch(CmdBindVertexBuffers)(VkCommandBuffer commandBuffer,
2094 uint32_t firstBinding,
2095 uint32_t bindingCount,
2096 const VkBuffer *pBuffers,
2097 const VkDeviceSize *pOffsets)
2098 {
2099 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
2100
2101 assert(firstBinding + bindingCount <= MAX_VBS);
2102
2103 for (uint32_t i = 0; i < bindingCount; i++) {
2104 VK_FROM_HANDLE(panvk_buffer, buffer, pBuffers[i]);
2105
2106 cmdbuf->state.gfx.vb.bufs[firstBinding + i].address =
2107 panvk_buffer_gpu_ptr(buffer, pOffsets[i]);
2108 cmdbuf->state.gfx.vb.bufs[firstBinding + i].size =
2109 panvk_buffer_range(buffer, pOffsets[i], VK_WHOLE_SIZE);
2110 }
2111
2112 cmdbuf->state.gfx.vb.count =
2113 MAX2(cmdbuf->state.gfx.vb.count, firstBinding + bindingCount);
2114 memset(&cmdbuf->state.gfx.vs.desc.driver_set, 0,
2115 sizeof(cmdbuf->state.gfx.vs.desc.driver_set));
2116 }
2117
2118 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindIndexBuffer)2119 panvk_per_arch(CmdBindIndexBuffer)(VkCommandBuffer commandBuffer,
2120 VkBuffer buffer, VkDeviceSize offset,
2121 VkIndexType indexType)
2122 {
2123 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
2124 VK_FROM_HANDLE(panvk_buffer, buf, buffer);
2125
2126 cmdbuf->state.gfx.ib.buffer = buf;
2127 cmdbuf->state.gfx.ib.offset = offset;
2128 cmdbuf->state.gfx.ib.index_size = vk_index_type_to_bytes(indexType);
2129 cmdbuf->state.gfx.ib.dirty = true;
2130 }
2131