xref: /aosp_15_r20/external/mesa3d/src/gallium/frontends/lavapipe/lvp_execute.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Red Hat.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /* use a gallium context to execute a command buffer */
25 
26 #include "lvp_private.h"
27 #include "lvp_acceleration_structure.h"
28 
29 #include "pipe/p_context.h"
30 #include "pipe/p_state.h"
31 #include "lvp_conv.h"
32 
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_from_mesa.h"
35 
36 #include "util/format/u_format.h"
37 #include "util/u_surface.h"
38 #include "util/u_sampler.h"
39 #include "util/box.h"
40 #include "util/u_inlines.h"
41 #include "util/u_math.h"
42 #include "util/u_memory.h"
43 #include "util/u_prim.h"
44 #include "util/u_prim_restart.h"
45 #include "util/format/u_format_zs.h"
46 #include "util/ptralloc.h"
47 #include "tgsi/tgsi_from_mesa.h"
48 
49 #include "vk_blend.h"
50 #include "vk_cmd_enqueue_entrypoints.h"
51 #include "vk_descriptor_update_template.h"
52 #include "vk_util.h"
53 
54 #define VK_PROTOTYPES
55 #include <vulkan/vulkan.h>
56 
57 #define DOUBLE_EQ(a, b) (fabs((a) - (b)) < DBL_EPSILON)
58 
59 enum gs_output {
60   GS_OUTPUT_NONE,
61   GS_OUTPUT_NOT_LINES,
62   GS_OUTPUT_LINES,
63 };
64 
65 struct descriptor_buffer_offset {
66    uint32_t buffer_index;
67    VkDeviceSize offset;
68 
69    const struct lvp_descriptor_set_layout *sampler_layout;
70 };
71 
72 struct lvp_render_attachment {
73    struct lvp_image_view *imgv;
74    VkResolveModeFlags resolve_mode;
75    struct lvp_image_view *resolve_imgv;
76    VkAttachmentLoadOp load_op;
77    VkAttachmentStoreOp store_op;
78    VkClearValue clear_value;
79    bool read_only;
80 };
81 
82 struct rendering_state {
83    struct pipe_context *pctx;
84    struct lvp_device *device; //for uniform inlining only
85    struct u_upload_mgr *uploader;
86    struct cso_context *cso;
87 
88    bool blend_dirty;
89    bool rs_dirty;
90    bool dsa_dirty;
91    bool dsa_no_stencil;
92    bool stencil_ref_dirty;
93    bool clip_state_dirty;
94    bool blend_color_dirty;
95    bool ve_dirty;
96    bool vb_dirty;
97    bool constbuf_dirty[LVP_SHADER_STAGES];
98    bool pcbuf_dirty[LVP_SHADER_STAGES];
99    bool has_pcbuf[LVP_SHADER_STAGES];
100    bool inlines_dirty[LVP_SHADER_STAGES];
101    bool vp_dirty;
102    bool scissor_dirty;
103    bool ib_dirty;
104    bool sample_mask_dirty;
105    bool min_samples_dirty;
106    bool poison_mem;
107    bool noop_fs_bound;
108    struct pipe_draw_indirect_info indirect_info;
109    struct pipe_draw_info info;
110 
111    struct pipe_grid_info dispatch_info;
112    struct pipe_grid_info trace_rays_info;
113    struct pipe_framebuffer_state framebuffer;
114    int fb_map[PIPE_MAX_COLOR_BUFS];
115    bool fb_remapped;
116 
117    struct pipe_blend_state blend_state;
118    struct {
119       float offset_units;
120       float offset_scale;
121       float offset_clamp;
122       bool enabled;
123    } depth_bias;
124    struct pipe_rasterizer_state rs_state;
125    struct pipe_depth_stencil_alpha_state dsa_state;
126 
127    struct pipe_blend_color blend_color;
128    struct pipe_stencil_ref stencil_ref;
129    struct pipe_clip_state clip_state;
130 
131    int num_scissors;
132    struct pipe_scissor_state scissors[16];
133 
134    int num_viewports;
135    struct pipe_viewport_state viewports[16];
136    struct {
137       float min, max;
138    } depth[16];
139 
140    uint8_t patch_vertices;
141    uint8_t index_size;
142    unsigned index_offset;
143    unsigned index_buffer_size; //UINT32_MAX for unset
144    struct pipe_resource *index_buffer;
145    struct pipe_constant_buffer const_buffer[LVP_SHADER_STAGES][16];
146    struct lvp_descriptor_set *desc_sets[LVP_PIPELINE_TYPE_COUNT][MAX_SETS];
147    struct pipe_resource *desc_buffers[MAX_SETS];
148    uint8_t *desc_buffer_addrs[MAX_SETS];
149    struct descriptor_buffer_offset desc_buffer_offsets[LVP_PIPELINE_TYPE_COUNT][MAX_SETS];
150    int num_const_bufs[LVP_SHADER_STAGES];
151    int num_vb;
152    unsigned start_vb;
153    bool vb_strides_dirty;
154    unsigned vb_strides[PIPE_MAX_ATTRIBS];
155    struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
156    size_t vb_sizes[PIPE_MAX_ATTRIBS]; //UINT32_MAX for unset
157    uint8_t vertex_buffer_index[PIPE_MAX_ATTRIBS]; /* temp storage to sort for start_vb */
158    struct cso_velems_state velem;
159 
160    bool disable_multisample;
161    enum gs_output gs_output_lines : 2;
162 
163    uint32_t color_write_disables:8;
164    uint32_t pad:13;
165 
166    void *velems_cso;
167 
168    uint8_t push_constants[128 * 4];
169    uint16_t push_size[LVP_PIPELINE_TYPE_COUNT];
170    uint16_t gfx_push_sizes[LVP_SHADER_STAGES];
171 
172    VkRect2D render_area;
173    bool suspending;
174    bool render_cond;
175    uint32_t color_att_count;
176    struct lvp_render_attachment color_att[PIPE_MAX_COLOR_BUFS];
177    struct lvp_render_attachment depth_att;
178    struct lvp_render_attachment stencil_att;
179    struct lvp_image_view *ds_imgv;
180    struct lvp_image_view *ds_resolve_imgv;
181    uint32_t                                     forced_sample_count;
182    VkResolveModeFlagBits                        forced_depth_resolve_mode;
183    VkResolveModeFlagBits                        forced_stencil_resolve_mode;
184 
185    uint32_t sample_mask;
186    unsigned min_samples;
187    unsigned rast_samples;
188    float min_sample_shading;
189    bool force_min_sample;
190    bool sample_shading;
191    bool depth_clamp_sets_clip;
192 
193    uint32_t num_so_targets;
194    struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS];
195    uint32_t so_offsets[PIPE_MAX_SO_BUFFERS];
196 
197    struct lvp_shader *shaders[LVP_SHADER_STAGES];
198    bool compute_shader_dirty;
199 
200    bool tess_ccw;
201    void *tess_states[2];
202 
203    struct util_dynarray push_desc_sets;
204    struct util_dynarray internal_buffers;
205 
206    struct lvp_pipeline *exec_graph;
207 };
208 
209 static struct pipe_resource *
get_buffer_resource(struct pipe_context * ctx,void * mem)210 get_buffer_resource(struct pipe_context *ctx, void *mem)
211 {
212    struct pipe_screen *pscreen = ctx->screen;
213    struct pipe_resource templ = {0};
214 
215    if (!mem)
216       return NULL;
217 
218    templ.screen = pscreen;
219    templ.target = PIPE_BUFFER;
220    templ.format = PIPE_FORMAT_R8_UNORM;
221    templ.width0 = UINT32_MAX;
222    templ.height0 = 1;
223    templ.depth0 = 1;
224    templ.array_size = 1;
225    templ.bind |= PIPE_BIND_CONSTANT_BUFFER;
226    templ.flags = PIPE_RESOURCE_FLAG_DONT_OVER_ALLOCATE;
227 
228    uint64_t size;
229    struct pipe_resource *pres = pscreen->resource_create_unbacked(pscreen, &templ, &size);
230 
231    struct llvmpipe_memory_allocation alloc = {
232       .cpu_addr = mem,
233    };
234 
235    pscreen->resource_bind_backing(pscreen, pres, (void *)&alloc, 0, 0, 0);
236    return pres;
237 }
238 
239 ALWAYS_INLINE static void
assert_subresource_layers(const struct pipe_resource * pres,const struct lvp_image * image,const VkImageSubresourceLayers * layers,const VkOffset3D * offsets)240 assert_subresource_layers(const struct pipe_resource *pres,
241                           const struct lvp_image *image,
242                           const VkImageSubresourceLayers *layers, const VkOffset3D *offsets)
243 {
244 #ifndef NDEBUG
245    if (pres->target == PIPE_TEXTURE_3D) {
246       assert(layers->baseArrayLayer == 0);
247       assert(layers->layerCount == 1);
248       assert(offsets[0].z <= pres->depth0);
249       assert(offsets[1].z <= pres->depth0);
250    } else {
251       assert(layers->baseArrayLayer < pres->array_size);
252       assert(layers->baseArrayLayer + vk_image_subresource_layer_count(&image->vk, layers) <= pres->array_size);
253       assert(offsets[0].z == 0);
254       assert(offsets[1].z == 1);
255    }
256 #endif
257 }
258 
finish_fence(struct rendering_state * state)259 static void finish_fence(struct rendering_state *state)
260 {
261    struct pipe_fence_handle *handle = NULL;
262 
263    state->pctx->flush(state->pctx, &handle, 0);
264 
265    state->pctx->screen->fence_finish(state->pctx->screen,
266                                      NULL,
267                                      handle, OS_TIMEOUT_INFINITE);
268    state->pctx->screen->fence_reference(state->pctx->screen,
269                                         &handle, NULL);
270 }
271 
272 static unsigned
get_pcbuf_size(struct rendering_state * state,enum pipe_shader_type pstage)273 get_pcbuf_size(struct rendering_state *state, enum pipe_shader_type pstage)
274 {
275    enum lvp_pipeline_type type =
276       ffs(lvp_pipeline_types_from_shader_stages(mesa_to_vk_shader_stage(pstage))) - 1;
277    return state->has_pcbuf[pstage] ? state->push_size[type] : 0;
278 }
279 
280 static void
update_pcbuf(struct rendering_state * state,enum pipe_shader_type pstage,enum pipe_shader_type api_stage)281 update_pcbuf(struct rendering_state *state, enum pipe_shader_type pstage,
282              enum pipe_shader_type api_stage)
283 {
284    unsigned size = get_pcbuf_size(state, api_stage);
285    if (size) {
286       uint8_t *mem;
287       struct pipe_constant_buffer cbuf;
288       cbuf.buffer_size = size;
289       cbuf.buffer = NULL;
290       cbuf.user_buffer = NULL;
291       u_upload_alloc(state->uploader, 0, size, 64, &cbuf.buffer_offset, &cbuf.buffer, (void**)&mem);
292       memcpy(mem, state->push_constants, size);
293       state->pctx->set_constant_buffer(state->pctx, pstage, 0, true, &cbuf);
294    }
295    state->pcbuf_dirty[api_stage] = false;
296 }
297 
298 static void
update_inline_shader_state(struct rendering_state * state,enum pipe_shader_type sh,bool pcbuf_dirty)299 update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type sh, bool pcbuf_dirty)
300 {
301    unsigned stage = tgsi_processor_to_shader_stage(sh);
302    state->inlines_dirty[sh] = false;
303    struct lvp_shader *shader = state->shaders[stage];
304    if (!shader || !shader->inlines.can_inline)
305       return;
306    struct lvp_inline_variant v;
307    v.mask = shader->inlines.can_inline;
308    /* these buffers have already been flushed in llvmpipe, so they're safe to read */
309    nir_shader *base_nir = shader->pipeline_nir->nir;
310    if (stage == MESA_SHADER_TESS_EVAL && state->tess_ccw)
311       base_nir = shader->tess_ccw->nir;
312    nir_function_impl *impl = nir_shader_get_entrypoint(base_nir);
313    unsigned ssa_alloc = impl->ssa_alloc;
314    unsigned count = shader->inlines.count[0];
315    if (count && pcbuf_dirty) {
316       unsigned push_size = get_pcbuf_size(state, sh);
317       for (unsigned i = 0; i < count; i++) {
318          unsigned offset = shader->inlines.uniform_offsets[0][i];
319          if (offset < push_size) {
320             memcpy(&v.vals[0][i], &state->push_constants[offset], sizeof(uint32_t));
321          }
322       }
323       for (unsigned i = count; i < MAX_INLINABLE_UNIFORMS; i++)
324          v.vals[0][i] = 0;
325    }
326    bool found = false;
327    struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(&shader->inlines.variants, v.mask, &v, &found);
328    void *shader_state;
329    if (found) {
330       const struct lvp_inline_variant *variant = entry->key;
331       shader_state = variant->cso;
332    } else {
333       nir_shader *nir = nir_shader_clone(NULL, base_nir);
334       NIR_PASS_V(nir, lvp_inline_uniforms, shader, v.vals[0], 0);
335       lvp_shader_optimize(nir);
336       impl = nir_shader_get_entrypoint(nir);
337       if (ssa_alloc - impl->ssa_alloc < ssa_alloc / 2 &&
338          !shader->inlines.must_inline) {
339          /* not enough change; don't inline further */
340          shader->inlines.can_inline = 0;
341          ralloc_free(nir);
342          shader->shader_cso = lvp_shader_compile(state->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir), true);
343          _mesa_set_remove(&shader->inlines.variants, entry);
344          shader_state = shader->shader_cso;
345       } else {
346          shader_state = lvp_shader_compile(state->device, shader, nir, true);
347          struct lvp_inline_variant *variant = mem_dup(&v, sizeof(v));
348          variant->cso = shader_state;
349          entry->key = variant;
350       }
351    }
352    switch (sh) {
353    case MESA_SHADER_VERTEX:
354       state->pctx->bind_vs_state(state->pctx, shader_state);
355       break;
356    case MESA_SHADER_TESS_CTRL:
357       state->pctx->bind_tcs_state(state->pctx, shader_state);
358       break;
359    case MESA_SHADER_TESS_EVAL:
360       state->pctx->bind_tes_state(state->pctx, shader_state);
361       break;
362    case MESA_SHADER_GEOMETRY:
363       state->pctx->bind_gs_state(state->pctx, shader_state);
364       break;
365    case MESA_SHADER_TASK:
366       state->pctx->bind_ts_state(state->pctx, shader_state);
367       break;
368    case MESA_SHADER_MESH:
369       state->pctx->bind_ms_state(state->pctx, shader_state);
370       break;
371    case MESA_SHADER_FRAGMENT:
372       state->pctx->bind_fs_state(state->pctx, shader_state);
373       state->noop_fs_bound = false;
374       break;
375    case MESA_SHADER_COMPUTE:
376       state->pctx->bind_compute_state(state->pctx, shader_state);
377       break;
378    default: break;
379    }
380 }
381 
emit_compute_state(struct rendering_state * state)382 static void emit_compute_state(struct rendering_state *state)
383 {
384    bool pcbuf_dirty = state->pcbuf_dirty[MESA_SHADER_COMPUTE];
385    if (state->pcbuf_dirty[MESA_SHADER_COMPUTE])
386       update_pcbuf(state, MESA_SHADER_COMPUTE, MESA_SHADER_COMPUTE);
387 
388    if (state->constbuf_dirty[MESA_SHADER_COMPUTE]) {
389       for (unsigned i = 0; i < state->num_const_bufs[MESA_SHADER_COMPUTE]; i++)
390          state->pctx->set_constant_buffer(state->pctx, MESA_SHADER_COMPUTE,
391                                           i + 1, false, &state->const_buffer[MESA_SHADER_COMPUTE][i]);
392       state->constbuf_dirty[MESA_SHADER_COMPUTE] = false;
393    }
394 
395    if (state->inlines_dirty[MESA_SHADER_COMPUTE] &&
396        state->shaders[MESA_SHADER_COMPUTE]->inlines.can_inline) {
397       update_inline_shader_state(state, MESA_SHADER_COMPUTE, pcbuf_dirty);
398    } else if (state->compute_shader_dirty) {
399       state->pctx->bind_compute_state(state->pctx, state->shaders[MESA_SHADER_COMPUTE]->shader_cso);
400    }
401 
402    state->compute_shader_dirty = false;
403 
404    state->pcbuf_dirty[MESA_SHADER_RAYGEN] = true;
405    state->constbuf_dirty[MESA_SHADER_RAYGEN] = true;
406 }
407 
408 static void
emit_fb_state(struct rendering_state * state)409 emit_fb_state(struct rendering_state *state)
410 {
411    if (state->fb_remapped) {
412       struct pipe_framebuffer_state fb = state->framebuffer;
413       memset(fb.cbufs, 0, sizeof(fb.cbufs));
414       for (unsigned i = 0; i < fb.nr_cbufs; i++) {
415          if (state->fb_map[i] < PIPE_MAX_COLOR_BUFS)
416             fb.cbufs[state->fb_map[i]] = state->framebuffer.cbufs[i];
417       }
418       state->pctx->set_framebuffer_state(state->pctx, &fb);
419    } else {
420       state->pctx->set_framebuffer_state(state->pctx, &state->framebuffer);
421    }
422 }
423 
424 static void
update_min_samples(struct rendering_state * state)425 update_min_samples(struct rendering_state *state)
426 {
427    state->min_samples = 1;
428    if (state->sample_shading) {
429       state->min_samples = ceil(state->rast_samples * state->min_sample_shading);
430       if (state->min_samples > 1)
431          state->min_samples = state->rast_samples;
432       if (state->min_samples < 1)
433          state->min_samples = 1;
434    }
435    if (state->force_min_sample)
436       state->min_samples = state->rast_samples;
437    if (state->rast_samples != state->framebuffer.samples) {
438       state->framebuffer.samples = state->rast_samples;
439       emit_fb_state(state);
440    }
441 }
442 
update_vertex_elements_buffer_index(struct rendering_state * state)443 static void update_vertex_elements_buffer_index(struct rendering_state *state)
444 {
445    for (int i = 0; i < state->velem.count; i++)
446       state->velem.velems[i].vertex_buffer_index = state->vertex_buffer_index[i] - state->start_vb;
447 }
448 
emit_state(struct rendering_state * state)449 static void emit_state(struct rendering_state *state)
450 {
451    if (!state->shaders[MESA_SHADER_FRAGMENT] && !state->noop_fs_bound) {
452       state->pctx->bind_fs_state(state->pctx, state->device->noop_fs);
453       state->noop_fs_bound = true;
454    }
455    if (state->blend_dirty) {
456       uint32_t mask = 0;
457       /* zero out the colormask values for disabled attachments */
458       if (state->color_write_disables) {
459          u_foreach_bit(att, state->color_write_disables) {
460             mask |= state->blend_state.rt[att].colormask << (att * 4);
461             state->blend_state.rt[att].colormask = 0;
462          }
463       }
464       if (state->fb_remapped) {
465          struct pipe_blend_state blend = state->blend_state;
466          for (unsigned i = 0; i < state->framebuffer.nr_cbufs; i++) {
467             if (state->fb_map[i] < PIPE_MAX_COLOR_BUFS) {
468                blend.rt[state->fb_map[i]] = state->blend_state.rt[i];
469             }
470          }
471          cso_set_blend(state->cso, &blend);
472       } else {
473          cso_set_blend(state->cso, &state->blend_state);
474       }
475       /* reset colormasks using saved bitmask */
476       if (state->color_write_disables) {
477          const uint32_t att_mask = BITFIELD_MASK(4);
478          u_foreach_bit(att, state->color_write_disables) {
479             state->blend_state.rt[att].colormask = (mask >> (att * 4)) & att_mask;
480          }
481       }
482       state->blend_dirty = false;
483    }
484 
485    if (state->rs_dirty) {
486       bool ms = state->rs_state.multisample;
487       if (state->disable_multisample &&
488           (state->gs_output_lines == GS_OUTPUT_LINES ||
489            (!state->shaders[MESA_SHADER_GEOMETRY] && u_reduced_prim(state->info.mode) == MESA_PRIM_LINES)))
490          state->rs_state.multisample = false;
491       assert(offsetof(struct pipe_rasterizer_state, offset_clamp) - offsetof(struct pipe_rasterizer_state, offset_units) == sizeof(float) * 2);
492       if (state->depth_bias.enabled) {
493          state->rs_state.offset_units = state->depth_bias.offset_units;
494          state->rs_state.offset_scale = state->depth_bias.offset_scale;
495          state->rs_state.offset_clamp = state->depth_bias.offset_clamp;
496          state->rs_state.offset_tri = true;
497          state->rs_state.offset_line = true;
498          state->rs_state.offset_point = true;
499       } else {
500          state->rs_state.offset_units = 0.0f;
501          state->rs_state.offset_scale = 0.0f;
502          state->rs_state.offset_clamp = 0.0f;
503          state->rs_state.offset_tri = false;
504          state->rs_state.offset_line = false;
505          state->rs_state.offset_point = false;
506       }
507       cso_set_rasterizer(state->cso, &state->rs_state);
508       state->rs_dirty = false;
509       state->rs_state.multisample = ms;
510    }
511 
512    if (state->dsa_dirty) {
513       bool s0_enabled = state->dsa_state.stencil[0].enabled;
514       bool s1_enabled = state->dsa_state.stencil[1].enabled;
515       if (state->dsa_no_stencil) {
516          state->dsa_state.stencil[0].enabled = false;
517          state->dsa_state.stencil[1].enabled = false;
518       }
519       cso_set_depth_stencil_alpha(state->cso, &state->dsa_state);
520       state->dsa_dirty = false;
521       state->dsa_state.stencil[0].enabled = s0_enabled;
522       state->dsa_state.stencil[1].enabled = s1_enabled;
523    }
524 
525    if (state->sample_mask_dirty) {
526       cso_set_sample_mask(state->cso, state->sample_mask);
527       state->sample_mask_dirty = false;
528    }
529 
530    if (state->min_samples_dirty) {
531       update_min_samples(state);
532       cso_set_min_samples(state->cso, state->min_samples);
533       state->min_samples_dirty = false;
534    }
535 
536    if (state->blend_color_dirty) {
537       state->pctx->set_blend_color(state->pctx, &state->blend_color);
538       state->blend_color_dirty = false;
539    }
540 
541    if (state->stencil_ref_dirty) {
542       cso_set_stencil_ref(state->cso, state->stencil_ref);
543       state->stencil_ref_dirty = false;
544    }
545 
546    if (state->ve_dirty)
547       update_vertex_elements_buffer_index(state);
548 
549    if (state->vb_strides_dirty) {
550       for (unsigned i = 0; i < state->velem.count; i++)
551          state->velem.velems[i].src_stride = state->vb_strides[state->velem.velems[i].vertex_buffer_index];
552       state->ve_dirty = true;
553       state->vb_strides_dirty = false;
554    }
555 
556    if (state->ve_dirty) {
557       cso_set_vertex_elements(state->cso, &state->velem);
558       state->ve_dirty = false;
559    }
560 
561    if (state->vb_dirty) {
562       cso_set_vertex_buffers(state->cso, state->num_vb, false, state->vb);
563       state->vb_dirty = false;
564    }
565 
566    bool pcbuf_dirty[LVP_SHADER_STAGES] = {false};
567 
568    lvp_forall_gfx_stage(sh) {
569       if (state->constbuf_dirty[sh]) {
570          for (unsigned idx = 0; idx < state->num_const_bufs[sh]; idx++)
571             state->pctx->set_constant_buffer(state->pctx, sh,
572                                              idx + 1, false, &state->const_buffer[sh][idx]);
573       }
574       state->constbuf_dirty[sh] = false;
575    }
576 
577    lvp_forall_gfx_stage(sh) {
578       pcbuf_dirty[sh] = state->pcbuf_dirty[sh];
579       if (state->pcbuf_dirty[sh])
580          update_pcbuf(state, sh, sh);
581    }
582 
583    lvp_forall_gfx_stage(sh) {
584       if (state->inlines_dirty[sh])
585          update_inline_shader_state(state, sh, pcbuf_dirty[sh]);
586    }
587 
588    if (state->vp_dirty) {
589       state->pctx->set_viewport_states(state->pctx, 0, state->num_viewports, state->viewports);
590       state->vp_dirty = false;
591    }
592 
593    if (state->scissor_dirty) {
594       state->pctx->set_scissor_states(state->pctx, 0, state->num_scissors, state->scissors);
595       state->scissor_dirty = false;
596    }
597 }
598 
599 static void
handle_compute_shader(struct rendering_state * state,struct lvp_shader * shader,struct lvp_pipeline_layout * layout)600 handle_compute_shader(struct rendering_state *state, struct lvp_shader *shader, struct lvp_pipeline_layout *layout)
601 {
602    state->shaders[MESA_SHADER_COMPUTE] = shader;
603 
604    if ((layout->push_constant_stages & VK_SHADER_STAGE_COMPUTE_BIT) > 0)
605       state->has_pcbuf[MESA_SHADER_COMPUTE] = layout->push_constant_size > 0;
606 
607    if (!state->has_pcbuf[MESA_SHADER_COMPUTE])
608       state->pcbuf_dirty[MESA_SHADER_COMPUTE] = false;
609 
610    state->dispatch_info.block[0] = shader->pipeline_nir->nir->info.workgroup_size[0];
611    state->dispatch_info.block[1] = shader->pipeline_nir->nir->info.workgroup_size[1];
612    state->dispatch_info.block[2] = shader->pipeline_nir->nir->info.workgroup_size[2];
613    state->inlines_dirty[MESA_SHADER_COMPUTE] = shader->inlines.can_inline;
614    if (!shader->inlines.can_inline)
615       state->compute_shader_dirty = true;
616 }
617 
handle_compute_pipeline(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)618 static void handle_compute_pipeline(struct vk_cmd_queue_entry *cmd,
619                                     struct rendering_state *state)
620 {
621    LVP_FROM_HANDLE(lvp_pipeline, pipeline, cmd->u.bind_pipeline.pipeline);
622 
623    handle_compute_shader(state, &pipeline->shaders[MESA_SHADER_COMPUTE], pipeline->layout);
624 }
625 
handle_ray_tracing_pipeline(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)626 static void handle_ray_tracing_pipeline(struct vk_cmd_queue_entry *cmd,
627                                     struct rendering_state *state)
628 {
629    LVP_FROM_HANDLE(lvp_pipeline, pipeline, cmd->u.bind_pipeline.pipeline);
630 
631    struct lvp_shader *shader = &pipeline->shaders[MESA_SHADER_RAYGEN];
632 
633    state->shaders[MESA_SHADER_RAYGEN] = shader;
634 
635    if ((pipeline->layout->push_constant_stages & LVP_RAY_TRACING_STAGES) > 0)
636       state->has_pcbuf[MESA_SHADER_RAYGEN] = pipeline->layout->push_constant_size > 0;
637 
638    if (!state->has_pcbuf[MESA_SHADER_RAYGEN])
639       state->pcbuf_dirty[MESA_SHADER_RAYGEN] = false;
640 
641    state->trace_rays_info.block[0] = shader->pipeline_nir->nir->info.workgroup_size[0];
642    state->trace_rays_info.block[1] = shader->pipeline_nir->nir->info.workgroup_size[1];
643    state->trace_rays_info.block[2] = shader->pipeline_nir->nir->info.workgroup_size[2];
644 }
645 
646 static void
set_viewport_depth_xform(struct rendering_state * state,unsigned idx)647 set_viewport_depth_xform(struct rendering_state *state, unsigned idx)
648 {
649    double n = state->depth[idx].min;
650    double f = state->depth[idx].max;
651 
652    if (!state->rs_state.clip_halfz) {
653       state->viewports[idx].scale[2] = 0.5 * (f - n);
654       state->viewports[idx].translate[2] = 0.5 * (n + f);
655    } else {
656       state->viewports[idx].scale[2] = (f - n);
657       state->viewports[idx].translate[2] = n;
658    }
659 }
660 
661 static void
get_viewport_xform(struct rendering_state * state,const VkViewport * viewport,unsigned idx)662 get_viewport_xform(struct rendering_state *state,
663                    const VkViewport *viewport,
664                    unsigned idx)
665 {
666    float x = viewport->x;
667    float y = viewport->y;
668    float half_width = 0.5f * viewport->width;
669    float half_height = 0.5f * viewport->height;
670 
671    state->viewports[idx].scale[0] = half_width;
672    state->viewports[idx].translate[0] = half_width + x;
673    state->viewports[idx].scale[1] = half_height;
674    state->viewports[idx].translate[1] = half_height + y;
675 
676    memcpy(&state->depth[idx].min, &viewport->minDepth, sizeof(float) * 2);
677 }
678 
679 static void
update_samples(struct rendering_state * state,VkSampleCountFlags samples)680 update_samples(struct rendering_state *state, VkSampleCountFlags samples)
681 {
682    state->rast_samples = samples;
683    state->rs_dirty |= state->rs_state.multisample != (samples > 1);
684    state->rs_state.multisample = samples > 1;
685    state->min_samples_dirty = true;
686 }
687 
688 static void
handle_graphics_stages(struct rendering_state * state,VkShaderStageFlagBits shader_stages,bool dynamic_tess_origin)689 handle_graphics_stages(struct rendering_state *state, VkShaderStageFlagBits shader_stages, bool dynamic_tess_origin)
690 {
691    u_foreach_bit(b, shader_stages) {
692       VkShaderStageFlagBits vk_stage = (1 << b);
693       gl_shader_stage stage = vk_to_mesa_shader_stage(vk_stage);
694 
695       state->has_pcbuf[stage] = false;
696 
697       switch (vk_stage) {
698       case VK_SHADER_STAGE_FRAGMENT_BIT:
699          state->inlines_dirty[MESA_SHADER_FRAGMENT] = state->shaders[MESA_SHADER_FRAGMENT]->inlines.can_inline;
700          if (!state->shaders[MESA_SHADER_FRAGMENT]->inlines.can_inline) {
701             state->pctx->bind_fs_state(state->pctx, state->shaders[MESA_SHADER_FRAGMENT]->shader_cso);
702             state->noop_fs_bound = false;
703          }
704          break;
705       case VK_SHADER_STAGE_VERTEX_BIT:
706          state->inlines_dirty[MESA_SHADER_VERTEX] = state->shaders[MESA_SHADER_VERTEX]->inlines.can_inline;
707          if (!state->shaders[MESA_SHADER_VERTEX]->inlines.can_inline)
708             state->pctx->bind_vs_state(state->pctx, state->shaders[MESA_SHADER_VERTEX]->shader_cso);
709          break;
710       case VK_SHADER_STAGE_GEOMETRY_BIT:
711          state->inlines_dirty[MESA_SHADER_GEOMETRY] = state->shaders[MESA_SHADER_GEOMETRY]->inlines.can_inline;
712          if (!state->shaders[MESA_SHADER_GEOMETRY]->inlines.can_inline)
713             state->pctx->bind_gs_state(state->pctx, state->shaders[MESA_SHADER_GEOMETRY]->shader_cso);
714          state->gs_output_lines = state->shaders[MESA_SHADER_GEOMETRY]->pipeline_nir->nir->info.gs.output_primitive == MESA_PRIM_LINES ? GS_OUTPUT_LINES : GS_OUTPUT_NOT_LINES;
715          break;
716       case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
717          state->inlines_dirty[MESA_SHADER_TESS_CTRL] = state->shaders[MESA_SHADER_TESS_CTRL]->inlines.can_inline;
718          if (!state->shaders[MESA_SHADER_TESS_CTRL]->inlines.can_inline)
719             state->pctx->bind_tcs_state(state->pctx, state->shaders[MESA_SHADER_TESS_CTRL]->shader_cso);
720          break;
721       case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
722          state->inlines_dirty[MESA_SHADER_TESS_EVAL] = state->shaders[MESA_SHADER_TESS_EVAL]->inlines.can_inline;
723          state->tess_states[0] = NULL;
724          state->tess_states[1] = NULL;
725          if (!state->shaders[MESA_SHADER_TESS_EVAL]->inlines.can_inline) {
726             if (dynamic_tess_origin) {
727                state->tess_states[0] = state->shaders[MESA_SHADER_TESS_EVAL]->shader_cso;
728                state->tess_states[1] = state->shaders[MESA_SHADER_TESS_EVAL]->tess_ccw_cso;
729                state->pctx->bind_tes_state(state->pctx, state->tess_states[state->tess_ccw]);
730             } else {
731                state->pctx->bind_tes_state(state->pctx, state->shaders[MESA_SHADER_TESS_EVAL]->shader_cso);
732             }
733          }
734          if (!dynamic_tess_origin)
735             state->tess_ccw = false;
736          break;
737       case VK_SHADER_STAGE_TASK_BIT_EXT:
738          state->inlines_dirty[MESA_SHADER_TASK] = state->shaders[MESA_SHADER_TASK]->inlines.can_inline;
739          if (!state->shaders[MESA_SHADER_TASK]->inlines.can_inline)
740             state->pctx->bind_ts_state(state->pctx, state->shaders[MESA_SHADER_TASK]->shader_cso);
741          break;
742       case VK_SHADER_STAGE_MESH_BIT_EXT:
743          state->inlines_dirty[MESA_SHADER_MESH] = state->shaders[MESA_SHADER_MESH]->inlines.can_inline;
744          if (!state->shaders[MESA_SHADER_MESH]->inlines.can_inline)
745             state->pctx->bind_ms_state(state->pctx, state->shaders[MESA_SHADER_MESH]->shader_cso);
746          break;
747       default:
748          assert(0);
749          break;
750       }
751    }
752 }
753 
754 static void
unbind_graphics_stages(struct rendering_state * state,VkShaderStageFlagBits shader_stages)755 unbind_graphics_stages(struct rendering_state *state, VkShaderStageFlagBits shader_stages)
756 {
757    u_foreach_bit(vkstage, shader_stages) {
758       gl_shader_stage stage = vk_to_mesa_shader_stage(1<<vkstage);
759       state->has_pcbuf[stage] = false;
760       switch (stage) {
761       case MESA_SHADER_FRAGMENT:
762          if (state->shaders[MESA_SHADER_FRAGMENT])
763             state->pctx->bind_fs_state(state->pctx, NULL);
764          state->noop_fs_bound = false;
765          break;
766       case MESA_SHADER_GEOMETRY:
767          if (state->shaders[MESA_SHADER_GEOMETRY])
768             state->pctx->bind_gs_state(state->pctx, NULL);
769          break;
770       case MESA_SHADER_TESS_CTRL:
771          if (state->shaders[MESA_SHADER_TESS_CTRL])
772             state->pctx->bind_tcs_state(state->pctx, NULL);
773          break;
774       case MESA_SHADER_TESS_EVAL:
775          if (state->shaders[MESA_SHADER_TESS_EVAL])
776             state->pctx->bind_tes_state(state->pctx, NULL);
777          break;
778       case MESA_SHADER_VERTEX:
779          if (state->shaders[MESA_SHADER_VERTEX])
780             state->pctx->bind_vs_state(state->pctx, NULL);
781          break;
782       case MESA_SHADER_TASK:
783          if (state->shaders[MESA_SHADER_TASK])
784             state->pctx->bind_ts_state(state->pctx, NULL);
785          break;
786       case MESA_SHADER_MESH:
787          if (state->shaders[MESA_SHADER_MESH])
788             state->pctx->bind_ms_state(state->pctx, NULL);
789          break;
790       default:
791          unreachable("what stage is this?!");
792       }
793       state->shaders[stage] = NULL;
794    }
795 }
796 
797 static void
handle_graphics_layout(struct rendering_state * state,gl_shader_stage stage,struct lvp_pipeline_layout * layout)798 handle_graphics_layout(struct rendering_state *state, gl_shader_stage stage, struct lvp_pipeline_layout *layout)
799 {
800    if (layout->push_constant_stages & BITFIELD_BIT(stage)) {
801       state->has_pcbuf[stage] = layout->push_constant_size > 0;
802       if (!state->has_pcbuf[stage])
803          state->pcbuf_dirty[stage] = false;
804    }
805 }
806 
handle_graphics_pipeline(struct lvp_pipeline * pipeline,struct rendering_state * state)807 static void handle_graphics_pipeline(struct lvp_pipeline *pipeline,
808                                      struct rendering_state *state)
809 {
810    const struct vk_graphics_pipeline_state *ps = &pipeline->graphics_state;
811    lvp_pipeline_shaders_compile(pipeline, true);
812    bool dynamic_tess_origin = BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN);
813    unbind_graphics_stages(state,
814                           (~pipeline->graphics_state.shader_stages) &
815                           (VK_SHADER_STAGE_ALL_GRAPHICS |
816                            VK_SHADER_STAGE_TASK_BIT_EXT |
817                            VK_SHADER_STAGE_MESH_BIT_EXT));
818    lvp_forall_gfx_stage(sh) {
819       if (pipeline->graphics_state.shader_stages & mesa_to_vk_shader_stage(sh))
820          state->shaders[sh] = &pipeline->shaders[sh];
821    }
822 
823    handle_graphics_stages(state, pipeline->graphics_state.shader_stages, dynamic_tess_origin);
824    lvp_forall_gfx_stage(sh) {
825       handle_graphics_layout(state, sh, pipeline->layout);
826    }
827 
828    /* rasterization state */
829    if (ps->rs) {
830       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE))
831          state->rs_state.depth_clamp = ps->rs->depth_clamp_enable;
832       if (BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE)) {
833          state->depth_clamp_sets_clip = false;
834       } else {
835          state->depth_clamp_sets_clip =
836             ps->rs->depth_clip_enable == VK_MESA_DEPTH_CLIP_ENABLE_NOT_CLAMP;
837          if (state->depth_clamp_sets_clip)
838             state->rs_state.depth_clip_near = state->rs_state.depth_clip_far = !state->rs_state.depth_clamp;
839          else
840             state->rs_state.depth_clip_near = state->rs_state.depth_clip_far =
841                vk_rasterization_state_depth_clip_enable(ps->rs);
842       }
843 
844       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE))
845          state->rs_state.rasterizer_discard = ps->rs->rasterizer_discard_enable;
846 
847       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_LINE_MODE)) {
848          state->rs_state.line_smooth = pipeline->line_smooth;
849          state->rs_state.line_rectangular = pipeline->line_rectangular;
850       }
851       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE))
852          state->rs_state.line_stipple_enable = ps->rs->line.stipple.enable;
853       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_POLYGON_MODE)) {
854          state->rs_state.fill_front = vk_polygon_mode_to_pipe(ps->rs->polygon_mode);
855          state->rs_state.fill_back = vk_polygon_mode_to_pipe(ps->rs->polygon_mode);
856       }
857       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) {
858          state->rs_state.flatshade_first =
859             ps->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
860       }
861 
862       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_LINE_WIDTH))
863          state->rs_state.line_width = ps->rs->line.width;
864       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
865          state->rs_state.line_stipple_factor = ps->rs->line.stipple.factor - 1;
866          state->rs_state.line_stipple_pattern = ps->rs->line.stipple.pattern;
867       }
868 
869       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE))
870          state->depth_bias.enabled = ps->rs->depth_bias.enable;
871       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
872          state->depth_bias.offset_units = ps->rs->depth_bias.constant;
873          state->depth_bias.offset_scale = ps->rs->depth_bias.slope;
874          state->depth_bias.offset_clamp = ps->rs->depth_bias.clamp;
875       }
876 
877       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_CULL_MODE))
878          state->rs_state.cull_face = vk_cull_to_pipe(ps->rs->cull_mode);
879 
880       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_FRONT_FACE))
881          state->rs_state.front_ccw = (ps->rs->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE);
882       state->rs_dirty = true;
883    }
884 
885    if (ps->ds) {
886       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE))
887          state->dsa_state.depth_enabled = ps->ds->depth.test_enable;
888       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE))
889          state->dsa_state.depth_writemask = ps->ds->depth.write_enable;
890       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP))
891          state->dsa_state.depth_func = ps->ds->depth.compare_op;
892       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE))
893          state->dsa_state.depth_bounds_test = ps->ds->depth.bounds_test.enable;
894 
895       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
896          state->dsa_state.depth_bounds_min = ps->ds->depth.bounds_test.min;
897          state->dsa_state.depth_bounds_max = ps->ds->depth.bounds_test.max;
898       }
899 
900       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE)) {
901          state->dsa_state.stencil[0].enabled = ps->ds->stencil.test_enable;
902          state->dsa_state.stencil[1].enabled = ps->ds->stencil.test_enable;
903       }
904 
905       const struct vk_stencil_test_face_state *front = &ps->ds->stencil.front;
906       const struct vk_stencil_test_face_state *back = &ps->ds->stencil.back;
907 
908       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_OP)) {
909          state->dsa_state.stencil[0].func = front->op.compare;
910          state->dsa_state.stencil[0].fail_op = vk_conv_stencil_op(front->op.fail);
911          state->dsa_state.stencil[0].zpass_op = vk_conv_stencil_op(front->op.pass);
912          state->dsa_state.stencil[0].zfail_op = vk_conv_stencil_op(front->op.depth_fail);
913 
914          state->dsa_state.stencil[1].func = back->op.compare;
915          state->dsa_state.stencil[1].fail_op = vk_conv_stencil_op(back->op.fail);
916          state->dsa_state.stencil[1].zpass_op = vk_conv_stencil_op(back->op.pass);
917          state->dsa_state.stencil[1].zfail_op = vk_conv_stencil_op(back->op.depth_fail);
918       }
919 
920       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK)) {
921          state->dsa_state.stencil[0].valuemask = front->compare_mask;
922          state->dsa_state.stencil[1].valuemask = back->compare_mask;
923       }
924 
925       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
926          state->dsa_state.stencil[0].writemask = front->write_mask;
927          state->dsa_state.stencil[1].writemask = back->write_mask;
928       }
929 
930       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
931          state->stencil_ref.ref_value[0] = front->reference;
932          state->stencil_ref.ref_value[1] = back->reference;
933          state->stencil_ref_dirty = true;
934       }
935       state->dsa_dirty = true;
936    }
937 
938    if (ps->cb) {
939       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE))
940          state->blend_state.logicop_enable = ps->cb->logic_op_enable;
941       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_LOGIC_OP))
942          state->blend_state.logicop_func = vk_logic_op_to_pipe(ps->cb->logic_op);
943 
944       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES))
945          state->color_write_disables = ~ps->cb->color_write_enables;
946 
947       for (unsigned i = 0; i < ps->cb->attachment_count; i++) {
948          const struct vk_color_blend_attachment_state *att = &ps->cb->attachments[i];
949          if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_WRITE_MASKS))
950             state->blend_state.rt[i].colormask = att->write_mask;
951          if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_BLEND_ENABLES))
952             state->blend_state.rt[i].blend_enable = att->blend_enable;
953 
954          if (!att->blend_enable) {
955             state->blend_state.rt[i].rgb_func = 0;
956             state->blend_state.rt[i].rgb_src_factor = 0;
957             state->blend_state.rt[i].rgb_dst_factor = 0;
958             state->blend_state.rt[i].alpha_func = 0;
959             state->blend_state.rt[i].alpha_src_factor = 0;
960             state->blend_state.rt[i].alpha_dst_factor = 0;
961          } else if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) {
962             state->blend_state.rt[i].rgb_func = vk_blend_op_to_pipe(att->color_blend_op);
963             state->blend_state.rt[i].rgb_src_factor = vk_blend_factor_to_pipe(att->src_color_blend_factor);
964             state->blend_state.rt[i].rgb_dst_factor = vk_blend_factor_to_pipe(att->dst_color_blend_factor);
965             state->blend_state.rt[i].alpha_func = vk_blend_op_to_pipe(att->alpha_blend_op);
966             state->blend_state.rt[i].alpha_src_factor = vk_blend_factor_to_pipe(att->src_alpha_blend_factor);
967             state->blend_state.rt[i].alpha_dst_factor = vk_blend_factor_to_pipe(att->dst_alpha_blend_factor);
968          }
969 
970          /* At least llvmpipe applies the blend factor prior to the blend function,
971           * regardless of what function is used. (like i965 hardware).
972           * It means for MIN/MAX the blend factor has to be stomped to ONE.
973           */
974          if (att->color_blend_op == VK_BLEND_OP_MIN ||
975              att->color_blend_op == VK_BLEND_OP_MAX) {
976             state->blend_state.rt[i].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
977             state->blend_state.rt[i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
978          }
979 
980          if (att->alpha_blend_op == VK_BLEND_OP_MIN ||
981              att->alpha_blend_op == VK_BLEND_OP_MAX) {
982             state->blend_state.rt[i].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
983             state->blend_state.rt[i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
984          }
985       }
986       state->blend_dirty = true;
987       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
988          memcpy(state->blend_color.color, ps->cb->blend_constants, 4 * sizeof(float));
989          state->blend_color_dirty = true;
990       }
991    } else if (ps->rp->color_attachment_count == 0) {
992       memset(&state->blend_state, 0, sizeof(state->blend_state));
993       state->blend_state.rt[0].colormask = 0xf;
994       state->blend_dirty = true;
995    }
996 
997    if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_RS_LINE_MODE))
998       state->disable_multisample = pipeline->disable_multisample;
999    if (ps->ms) {
1000       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_MS_SAMPLE_MASK)) {
1001          state->sample_mask = ps->ms->sample_mask;
1002          state->sample_mask_dirty = true;
1003       }
1004       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE))
1005          state->blend_state.alpha_to_coverage = ps->ms->alpha_to_coverage_enable;
1006       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE))
1007          state->blend_state.alpha_to_one = ps->ms->alpha_to_one_enable;
1008       state->force_min_sample = pipeline->force_min_sample;
1009       state->sample_shading = ps->ms->sample_shading_enable;
1010       state->min_sample_shading = ps->ms->min_sample_shading;
1011       state->min_samples_dirty = true;
1012       state->blend_dirty = true;
1013       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES))
1014          update_samples(state, ps->ms->rasterization_samples);
1015    } else {
1016       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_MS_SAMPLE_MASK) &&
1017           !BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE))
1018          state->rs_state.multisample = false;
1019       state->sample_shading = false;
1020       state->force_min_sample = false;
1021       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_MS_SAMPLE_MASK)) {
1022          state->sample_mask_dirty = state->sample_mask != 0xffffffff;
1023          state->sample_mask = 0xffffffff;
1024          state->min_samples_dirty = !!state->min_samples;
1025          state->min_samples = 0;
1026       }
1027       state->blend_dirty |= state->blend_state.alpha_to_coverage || state->blend_state.alpha_to_one;
1028       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE))
1029          state->blend_state.alpha_to_coverage = false;
1030       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE))
1031          state->blend_state.alpha_to_one = false;
1032       state->rs_dirty = true;
1033    }
1034 
1035    if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_VI) && ps->vi) {
1036       u_foreach_bit(a, ps->vi->attributes_valid) {
1037          uint32_t b = ps->vi->attributes[a].binding;
1038          state->velem.velems[a].src_offset = ps->vi->attributes[a].offset;
1039          state->vertex_buffer_index[a] = b;
1040          state->velem.velems[a].src_format =
1041             lvp_vk_format_to_pipe_format(ps->vi->attributes[a].format);
1042          state->velem.velems[a].dual_slot = false;
1043 
1044          uint32_t d = ps->vi->bindings[b].divisor;
1045          switch (ps->vi->bindings[b].input_rate) {
1046          case VK_VERTEX_INPUT_RATE_VERTEX:
1047             state->velem.velems[a].instance_divisor = 0;
1048             break;
1049          case VK_VERTEX_INPUT_RATE_INSTANCE:
1050             state->velem.velems[a].instance_divisor = d ? d : UINT32_MAX;
1051             break;
1052          default:
1053             unreachable("Invalid vertex input rate");
1054          }
1055 
1056          if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES)) {
1057             state->vb_strides[b] = ps->vi->bindings[b].stride;
1058             state->vb_strides_dirty = true;
1059             state->ve_dirty = true;
1060          }
1061       }
1062 
1063       state->velem.count = util_last_bit(ps->vi->attributes_valid);
1064       state->vb_dirty = true;
1065       state->ve_dirty = true;
1066    }
1067 
1068    if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) && ps->ia) {
1069       state->info.mode = vk_conv_topology(ps->ia->primitive_topology);
1070       state->rs_dirty = true;
1071    }
1072    if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE) && ps->ia)
1073       state->info.primitive_restart = ps->ia->primitive_restart_enable;
1074 
1075    if (ps->ts && !BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS)) {
1076       if (state->patch_vertices != ps->ts->patch_control_points)
1077          state->pctx->set_patch_vertices(state->pctx, ps->ts->patch_control_points);
1078       state->patch_vertices = ps->ts->patch_control_points;
1079    }
1080 
1081    if (ps->vp) {
1082       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT)) {
1083          state->num_viewports = ps->vp->viewport_count;
1084          state->vp_dirty = true;
1085       }
1086       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT)) {
1087          state->num_scissors = ps->vp->scissor_count;
1088          state->scissor_dirty = true;
1089       }
1090 
1091       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_VP_VIEWPORTS)) {
1092          for (uint32_t i = 0; i < ps->vp->viewport_count; i++) {
1093             get_viewport_xform(state, &ps->vp->viewports[i], i);
1094             set_viewport_depth_xform(state, i);
1095          }
1096          state->vp_dirty = true;
1097       }
1098       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_VP_SCISSORS)) {
1099          for (uint32_t i = 0; i < ps->vp->scissor_count; i++) {
1100             const VkRect2D *ss = &ps->vp->scissors[i];
1101             state->scissors[i].minx = ss->offset.x;
1102             state->scissors[i].miny = ss->offset.y;
1103             state->scissors[i].maxx = ss->offset.x + ss->extent.width;
1104             state->scissors[i].maxy = ss->offset.y + ss->extent.height;
1105          }
1106          state->scissor_dirty = true;
1107       }
1108 
1109       if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) &&
1110           state->rs_state.clip_halfz != !ps->vp->depth_clip_negative_one_to_one) {
1111          state->rs_state.clip_halfz = !ps->vp->depth_clip_negative_one_to_one;
1112          state->rs_dirty = true;
1113          for (uint32_t i = 0; i < state->num_viewports; i++)
1114             set_viewport_depth_xform(state, i);
1115          state->vp_dirty = true;
1116       }
1117    }
1118 }
1119 
handle_pipeline(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1120 static void handle_pipeline(struct vk_cmd_queue_entry *cmd,
1121                             struct rendering_state *state)
1122 {
1123    LVP_FROM_HANDLE(lvp_pipeline, pipeline, cmd->u.bind_pipeline.pipeline);
1124    pipeline->used = true;
1125    if (pipeline->type == LVP_PIPELINE_COMPUTE) {
1126       handle_compute_pipeline(cmd, state);
1127    } else if (pipeline->type == LVP_PIPELINE_RAY_TRACING) {
1128       handle_ray_tracing_pipeline(cmd, state);
1129    } else if (pipeline->type == LVP_PIPELINE_GRAPHICS) {
1130       handle_graphics_pipeline(pipeline, state);
1131    } else if (pipeline->type == LVP_PIPELINE_EXEC_GRAPH) {
1132       state->exec_graph = pipeline;
1133    }
1134    state->push_size[pipeline->type] = pipeline->layout->push_constant_size;
1135 }
1136 
1137 static void
handle_graphics_pipeline_group(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1138 handle_graphics_pipeline_group(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
1139 {
1140    assert(cmd->u.bind_pipeline_shader_group_nv.pipeline_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
1141    LVP_FROM_HANDLE(lvp_pipeline, pipeline, cmd->u.bind_pipeline_shader_group_nv.pipeline);
1142    if (cmd->u.bind_pipeline_shader_group_nv.group_index)
1143       pipeline = lvp_pipeline_from_handle(pipeline->groups[cmd->u.bind_pipeline_shader_group_nv.group_index - 1]);
1144    handle_graphics_pipeline(pipeline, state);
1145    state->push_size[pipeline->type] = pipeline->layout->push_constant_size;
1146 }
1147 
handle_vertex_buffers2(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1148 static void handle_vertex_buffers2(struct vk_cmd_queue_entry *cmd,
1149                                    struct rendering_state *state)
1150 {
1151    struct vk_cmd_bind_vertex_buffers2 *vcb = &cmd->u.bind_vertex_buffers2;
1152 
1153    int i;
1154    for (i = 0; i < vcb->binding_count; i++) {
1155       int idx = i + vcb->first_binding;
1156 
1157       state->vb[idx].buffer_offset = vcb->offsets[i];
1158       if (state->vb_sizes[idx] != UINT32_MAX)
1159          pipe_resource_reference(&state->vb[idx].buffer.resource, NULL);
1160       state->vb[idx].buffer.resource = vcb->buffers[i] && (!vcb->sizes || vcb->sizes[i]) ? lvp_buffer_from_handle(vcb->buffers[i])->bo : NULL;
1161       if (state->vb[idx].buffer.resource && vcb->sizes) {
1162          if (vcb->sizes[i] == VK_WHOLE_SIZE || vcb->offsets[i] + vcb->sizes[i] >= state->vb[idx].buffer.resource->width0) {
1163             state->vb_sizes[idx] = UINT32_MAX;
1164          } else {
1165             struct pipe_transfer *xfer;
1166             uint8_t *mem = pipe_buffer_map(state->pctx, state->vb[idx].buffer.resource, 0, &xfer);
1167             state->pctx->buffer_unmap(state->pctx, xfer);
1168             state->vb[idx].buffer.resource = get_buffer_resource(state->pctx, mem);
1169             state->vb[idx].buffer.resource->width0 = MIN2(vcb->offsets[i] + vcb->sizes[i], state->vb[idx].buffer.resource->width0);
1170             state->vb_sizes[idx] = vcb->sizes[i];
1171          }
1172       } else {
1173          state->vb_sizes[idx] = UINT32_MAX;
1174       }
1175 
1176       if (vcb->strides) {
1177          state->vb_strides[idx] = vcb->strides[i];
1178          state->vb_strides_dirty = true;
1179       }
1180    }
1181    if (vcb->first_binding < state->start_vb)
1182       state->start_vb = vcb->first_binding;
1183    if (vcb->first_binding + vcb->binding_count >= state->num_vb)
1184       state->num_vb = vcb->first_binding + vcb->binding_count;
1185    state->vb_dirty = true;
1186 }
1187 
1188 static void
handle_set_stage_buffer(struct rendering_state * state,struct pipe_resource * bo,size_t offset,gl_shader_stage stage,uint32_t index)1189 handle_set_stage_buffer(struct rendering_state *state,
1190                         struct pipe_resource *bo,
1191                         size_t offset,
1192                         gl_shader_stage stage,
1193                         uint32_t index)
1194 {
1195    state->const_buffer[stage][index].buffer = bo;
1196    state->const_buffer[stage][index].buffer_offset = offset;
1197    state->const_buffer[stage][index].buffer_size = bo->width0;
1198    state->const_buffer[stage][index].user_buffer = NULL;
1199 
1200    state->constbuf_dirty[stage] = true;
1201 
1202    if (state->num_const_bufs[stage] <= index)
1203       state->num_const_bufs[stage] = index + 1;
1204 }
1205 
handle_set_stage(struct rendering_state * state,struct lvp_descriptor_set * set,enum lvp_pipeline_type pipeline_type,gl_shader_stage stage,uint32_t index)1206 static void handle_set_stage(struct rendering_state *state,
1207                              struct lvp_descriptor_set *set,
1208                              enum lvp_pipeline_type pipeline_type,
1209                              gl_shader_stage stage,
1210                              uint32_t index)
1211 {
1212    state->desc_sets[pipeline_type][index] = set;
1213    handle_set_stage_buffer(state, set->bo, 0, stage, index);
1214 }
1215 
1216 static void
apply_dynamic_offsets(struct lvp_descriptor_set ** out_set,const uint32_t * offsets,uint32_t offset_count,struct rendering_state * state)1217 apply_dynamic_offsets(struct lvp_descriptor_set **out_set, const uint32_t *offsets, uint32_t offset_count,
1218                       struct rendering_state *state)
1219 {
1220    if (!offset_count)
1221       return;
1222 
1223    struct lvp_descriptor_set *in_set = *out_set;
1224 
1225    struct lvp_descriptor_set *set;
1226    lvp_descriptor_set_create(state->device, in_set->layout, &set);
1227 
1228    util_dynarray_append(&state->push_desc_sets, struct lvp_descriptor_set *, set);
1229 
1230    memcpy(set->map, in_set->map, in_set->bo->width0);
1231 
1232    *out_set = set;
1233 
1234    for (uint32_t i = 0; i < set->layout->binding_count; i++) {
1235       const struct lvp_descriptor_set_binding_layout *binding = &set->layout->binding[i];
1236       if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
1237           binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
1238          continue;
1239 
1240       struct lp_descriptor *desc = set->map;
1241       desc += binding->descriptor_index;
1242 
1243       for (uint32_t j = 0; j < binding->array_size; j++) {
1244          uint32_t offset_index = binding->dynamic_index + j;
1245          if (offset_index >= offset_count)
1246             return;
1247 
1248          desc[j].buffer.u = (uint32_t *)((uint8_t *)desc[j].buffer.u + offsets[offset_index]);
1249       }
1250    }
1251 }
1252 
1253 static void
handle_descriptor_sets(VkBindDescriptorSetsInfoKHR * bds,struct rendering_state * state)1254 handle_descriptor_sets(VkBindDescriptorSetsInfoKHR *bds, struct rendering_state *state)
1255 {
1256    LVP_FROM_HANDLE(lvp_pipeline_layout, layout, bds->layout);
1257 
1258    uint32_t dynamic_offset_index = 0;
1259 
1260    uint32_t types = lvp_pipeline_types_from_shader_stages(bds->stageFlags);
1261    u_foreach_bit(pipeline_type, types) {
1262       for (uint32_t i = 0; i < bds->descriptorSetCount; i++) {
1263          if (state->desc_buffers[bds->firstSet + i]) {
1264             /* always unset descriptor buffers when binding sets */
1265             if (pipeline_type == LVP_PIPELINE_COMPUTE) {
1266                   bool changed = state->const_buffer[MESA_SHADER_COMPUTE][bds->firstSet + i].buffer == state->desc_buffers[bds->firstSet + i];
1267                   state->constbuf_dirty[MESA_SHADER_COMPUTE] |= changed;
1268             } else if (pipeline_type == LVP_PIPELINE_RAY_TRACING) {
1269                   bool changed = state->const_buffer[MESA_SHADER_RAYGEN][bds->firstSet + i].buffer == state->desc_buffers[bds->firstSet + i];
1270                   state->constbuf_dirty[MESA_SHADER_RAYGEN] |= changed;
1271             } else {
1272                lvp_forall_gfx_stage(j) {
1273                   bool changed = state->const_buffer[j][bds->firstSet + i].buffer == state->desc_buffers[bds->firstSet + i];
1274                   state->constbuf_dirty[j] |= changed;
1275                }
1276             }
1277          }
1278          if (!layout->vk.set_layouts[bds->firstSet + i])
1279             continue;
1280 
1281          struct lvp_descriptor_set *set = lvp_descriptor_set_from_handle(bds->pDescriptorSets[i]);
1282          if (!set)
1283             continue;
1284 
1285          apply_dynamic_offsets(&set, bds->pDynamicOffsets + dynamic_offset_index,
1286                               bds->dynamicOffsetCount - dynamic_offset_index, state);
1287 
1288          dynamic_offset_index += set->layout->dynamic_offset_count;
1289 
1290          if (pipeline_type == LVP_PIPELINE_COMPUTE || pipeline_type == LVP_PIPELINE_EXEC_GRAPH) {
1291             if (set->layout->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT)
1292                handle_set_stage(state, set, pipeline_type, MESA_SHADER_COMPUTE, bds->firstSet + i);
1293             continue;
1294          }
1295 
1296          if (pipeline_type == LVP_PIPELINE_RAY_TRACING) {
1297             if (set->layout->shader_stages & LVP_RAY_TRACING_STAGES)
1298                handle_set_stage(state, set, pipeline_type, MESA_SHADER_RAYGEN, bds->firstSet + i);
1299             continue;
1300          }
1301 
1302          if (set->layout->shader_stages & VK_SHADER_STAGE_VERTEX_BIT)
1303             handle_set_stage(state, set, pipeline_type, MESA_SHADER_VERTEX, bds->firstSet + i);
1304 
1305          if (set->layout->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT)
1306             handle_set_stage(state, set, pipeline_type, MESA_SHADER_GEOMETRY, bds->firstSet + i);
1307 
1308          if (set->layout->shader_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1309             handle_set_stage(state, set, pipeline_type, MESA_SHADER_TESS_CTRL, bds->firstSet + i);
1310 
1311          if (set->layout->shader_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1312             handle_set_stage(state, set, pipeline_type, MESA_SHADER_TESS_EVAL, bds->firstSet + i);
1313 
1314          if (set->layout->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT)
1315             handle_set_stage(state, set, pipeline_type, MESA_SHADER_FRAGMENT, bds->firstSet + i);
1316 
1317          if (set->layout->shader_stages & VK_SHADER_STAGE_TASK_BIT_EXT)
1318             handle_set_stage(state, set, pipeline_type, MESA_SHADER_TASK, bds->firstSet + i);
1319 
1320          if (set->layout->shader_stages & VK_SHADER_STAGE_MESH_BIT_EXT)
1321             handle_set_stage(state, set, pipeline_type, MESA_SHADER_MESH, bds->firstSet + i);
1322       }
1323    }
1324 }
1325 
1326 static void
handle_descriptor_sets_cmd(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1327 handle_descriptor_sets_cmd(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
1328 {
1329    VkBindDescriptorSetsInfoKHR *bds = cmd->u.bind_descriptor_sets2_khr.bind_descriptor_sets_info;
1330    handle_descriptor_sets(bds, state);
1331 }
1332 
create_img_surface_bo(struct rendering_state * state,VkImageSubresourceRange * range,struct pipe_resource * bo,enum pipe_format pformat,int width,int height,int base_layer,int layer_count,int level)1333 static struct pipe_surface *create_img_surface_bo(struct rendering_state *state,
1334                                                   VkImageSubresourceRange *range,
1335                                                   struct pipe_resource *bo,
1336                                                   enum pipe_format pformat,
1337                                                   int width,
1338                                                   int height,
1339                                                   int base_layer, int layer_count,
1340                                                   int level)
1341 {
1342    if (pformat == PIPE_FORMAT_NONE)
1343       return NULL;
1344 
1345    const struct pipe_surface template = {
1346       .format = pformat,
1347       .width = width,
1348       .height = height,
1349       .u.tex.first_layer = range->baseArrayLayer + base_layer,
1350       .u.tex.last_layer = range->baseArrayLayer + base_layer + layer_count - 1,
1351       .u.tex.level = range->baseMipLevel + level,
1352    };
1353 
1354    return state->pctx->create_surface(state->pctx,
1355                                       bo, &template);
1356 
1357 }
create_img_surface(struct rendering_state * state,struct lvp_image_view * imgv,VkFormat format,int width,int height,int base_layer,int layer_count)1358 static struct pipe_surface *create_img_surface(struct rendering_state *state,
1359                                                struct lvp_image_view *imgv,
1360                                                VkFormat format, int width,
1361                                                int height,
1362                                                int base_layer, int layer_count)
1363 {
1364    VkImageSubresourceRange imgv_subres =
1365       vk_image_view_subresource_range(&imgv->vk);
1366 
1367    return create_img_surface_bo(state, &imgv_subres, imgv->image->planes[0].bo,
1368                                 lvp_vk_format_to_pipe_format(format),
1369                                 width, height, base_layer, layer_count, 0);
1370 }
1371 
add_img_view_surface(struct rendering_state * state,struct lvp_image_view * imgv,int width,int height,int layer_count)1372 static void add_img_view_surface(struct rendering_state *state,
1373                                  struct lvp_image_view *imgv, int width, int height,
1374                                  int layer_count)
1375 {
1376    if (imgv->surface) {
1377       if ((imgv->surface->u.tex.last_layer - imgv->surface->u.tex.first_layer) != (layer_count - 1))
1378          pipe_surface_reference(&imgv->surface, NULL);
1379    }
1380 
1381    if (!imgv->surface) {
1382       imgv->surface = create_img_surface(state, imgv, imgv->vk.format,
1383                                          width, height,
1384                                          0, layer_count);
1385    }
1386 }
1387 
1388 static bool
render_needs_clear(struct rendering_state * state)1389 render_needs_clear(struct rendering_state *state)
1390 {
1391    for (uint32_t i = 0; i < state->color_att_count; i++) {
1392       if (state->color_att[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
1393          return true;
1394    }
1395    if (state->depth_att.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
1396       return true;
1397    if (state->stencil_att.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
1398       return true;
1399    return false;
1400 }
1401 
clear_attachment_layers(struct rendering_state * state,struct lvp_image_view * imgv,const VkRect2D * rect,unsigned base_layer,unsigned layer_count,unsigned ds_clear_flags,double dclear_val,uint32_t sclear_val,union pipe_color_union * col_val)1402 static void clear_attachment_layers(struct rendering_state *state,
1403                                     struct lvp_image_view *imgv,
1404                                     const VkRect2D *rect,
1405                                     unsigned base_layer, unsigned layer_count,
1406                                     unsigned ds_clear_flags, double dclear_val,
1407                                     uint32_t sclear_val,
1408                                     union pipe_color_union *col_val)
1409 {
1410    struct pipe_surface *clear_surf = create_img_surface(state,
1411                                                         imgv,
1412                                                         imgv->vk.format,
1413                                                         state->framebuffer.width,
1414                                                         state->framebuffer.height,
1415                                                         base_layer,
1416                                                         layer_count);
1417 
1418    if (ds_clear_flags) {
1419       state->pctx->clear_depth_stencil(state->pctx,
1420                                        clear_surf,
1421                                        ds_clear_flags,
1422                                        dclear_val, sclear_val,
1423                                        rect->offset.x, rect->offset.y,
1424                                        rect->extent.width, rect->extent.height,
1425                                        true);
1426    } else {
1427       state->pctx->clear_render_target(state->pctx, clear_surf,
1428                                        col_val,
1429                                        rect->offset.x, rect->offset.y,
1430                                        rect->extent.width, rect->extent.height,
1431                                        true);
1432    }
1433    state->pctx->surface_destroy(state->pctx, clear_surf);
1434 }
1435 
render_clear(struct rendering_state * state)1436 static void render_clear(struct rendering_state *state)
1437 {
1438    for (uint32_t i = 0; i < state->color_att_count; i++) {
1439       if (state->color_att[i].load_op != VK_ATTACHMENT_LOAD_OP_CLEAR)
1440          continue;
1441 
1442       union pipe_color_union color_clear_val = { 0 };
1443       const VkClearValue value = state->color_att[i].clear_value;
1444       color_clear_val.ui[0] = value.color.uint32[0];
1445       color_clear_val.ui[1] = value.color.uint32[1];
1446       color_clear_val.ui[2] = value.color.uint32[2];
1447       color_clear_val.ui[3] = value.color.uint32[3];
1448 
1449       struct lvp_image_view *imgv = state->color_att[i].imgv;
1450       assert(imgv->surface);
1451 
1452       if (state->info.view_mask) {
1453          u_foreach_bit(i, state->info.view_mask)
1454             clear_attachment_layers(state, imgv, &state->render_area,
1455                                     i, 1, 0, 0, 0, &color_clear_val);
1456       } else {
1457          state->pctx->clear_render_target(state->pctx,
1458                                           imgv->surface,
1459                                           &color_clear_val,
1460                                           state->render_area.offset.x,
1461                                           state->render_area.offset.y,
1462                                           state->render_area.extent.width,
1463                                           state->render_area.extent.height,
1464                                           false);
1465       }
1466    }
1467 
1468    uint32_t ds_clear_flags = 0;
1469    double dclear_val = 0;
1470    if (state->depth_att.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1471       ds_clear_flags |= PIPE_CLEAR_DEPTH;
1472       dclear_val = state->depth_att.clear_value.depthStencil.depth;
1473    }
1474 
1475    uint32_t sclear_val = 0;
1476    if (state->stencil_att.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1477       ds_clear_flags |= PIPE_CLEAR_STENCIL;
1478       sclear_val = state->stencil_att.clear_value.depthStencil.stencil;
1479    }
1480 
1481    if (ds_clear_flags) {
1482       if (state->info.view_mask) {
1483          u_foreach_bit(i, state->info.view_mask)
1484             clear_attachment_layers(state, state->ds_imgv, &state->render_area,
1485                                     i, 1, ds_clear_flags, dclear_val, sclear_val, NULL);
1486       } else {
1487          state->pctx->clear_depth_stencil(state->pctx,
1488                                           state->ds_imgv->surface,
1489                                           ds_clear_flags,
1490                                           dclear_val, sclear_val,
1491                                           state->render_area.offset.x,
1492                                           state->render_area.offset.y,
1493                                           state->render_area.extent.width,
1494                                           state->render_area.extent.height,
1495                                           false);
1496       }
1497    }
1498 }
1499 
render_clear_fast(struct rendering_state * state)1500 static void render_clear_fast(struct rendering_state *state)
1501 {
1502    /*
1503     * the state tracker clear interface only works if all the attachments have the same
1504     * clear color.
1505     */
1506    /* llvmpipe doesn't support scissored clears yet */
1507    if (state->render_area.offset.x || state->render_area.offset.y)
1508       goto slow_clear;
1509 
1510    if (state->render_area.extent.width != state->framebuffer.width ||
1511        state->render_area.extent.height != state->framebuffer.height)
1512       goto slow_clear;
1513 
1514    if (state->info.view_mask)
1515       goto slow_clear;
1516 
1517    if (state->render_cond)
1518       goto slow_clear;
1519 
1520    uint32_t buffers = 0;
1521    bool has_color_value = false;
1522    VkClearValue color_value = {0};
1523    for (uint32_t i = 0; i < state->color_att_count; i++) {
1524       if (state->color_att[i].load_op != VK_ATTACHMENT_LOAD_OP_CLEAR)
1525          continue;
1526 
1527       buffers |= (PIPE_CLEAR_COLOR0 << i);
1528 
1529       if (has_color_value) {
1530          if (memcmp(&color_value, &state->color_att[i].clear_value, sizeof(VkClearValue)))
1531             goto slow_clear;
1532       } else {
1533          memcpy(&color_value, &state->color_att[i].clear_value, sizeof(VkClearValue));
1534          has_color_value = true;
1535       }
1536    }
1537 
1538    double dclear_val = 0;
1539    if (state->depth_att.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1540       buffers |= PIPE_CLEAR_DEPTH;
1541       dclear_val = state->depth_att.clear_value.depthStencil.depth;
1542    }
1543 
1544    uint32_t sclear_val = 0;
1545    if (state->stencil_att.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1546       buffers |= PIPE_CLEAR_STENCIL;
1547       sclear_val = state->stencil_att.clear_value.depthStencil.stencil;
1548    }
1549 
1550    union pipe_color_union col_val;
1551    for (unsigned i = 0; i < 4; i++)
1552       col_val.ui[i] = color_value.color.uint32[i];
1553 
1554    state->pctx->clear(state->pctx, buffers,
1555                       NULL, &col_val,
1556                       dclear_val, sclear_val);
1557    return;
1558 
1559 slow_clear:
1560    render_clear(state);
1561 }
1562 
1563 static struct lvp_image_view *
destroy_multisample_surface(struct rendering_state * state,struct lvp_image_view * imgv)1564 destroy_multisample_surface(struct rendering_state *state, struct lvp_image_view *imgv)
1565 {
1566    assert(imgv->image->vk.samples > 1);
1567    struct lvp_image_view *base = imgv->multisample;
1568    base->multisample = NULL;
1569    free((void*)imgv->image);
1570    pipe_surface_reference(&imgv->surface, NULL);
1571    free(imgv);
1572    return base;
1573 }
1574 
1575 static void
resolve_ds(struct rendering_state * state,bool multi)1576 resolve_ds(struct rendering_state *state, bool multi)
1577 {
1578    VkResolveModeFlagBits depth_resolve_mode = multi ? state->forced_depth_resolve_mode : state->depth_att.resolve_mode;
1579    VkResolveModeFlagBits stencil_resolve_mode = multi ? state->forced_stencil_resolve_mode : state->stencil_att.resolve_mode;
1580    if (!depth_resolve_mode && !stencil_resolve_mode)
1581       return;
1582 
1583    struct lvp_image_view *src_imgv = state->ds_imgv;
1584    if (multi && !src_imgv->multisample)
1585       return;
1586    if (!multi && src_imgv->image->vk.samples == 1)
1587       return;
1588 
1589    assert(state->depth_att.resolve_imgv == NULL ||
1590           state->stencil_att.resolve_imgv == NULL ||
1591           state->depth_att.resolve_imgv == state->stencil_att.resolve_imgv ||
1592           multi);
1593    struct lvp_image_view *dst_imgv =
1594       multi ? src_imgv->multisample :
1595       state->depth_att.resolve_imgv ? state->depth_att.resolve_imgv :
1596                                       state->stencil_att.resolve_imgv;
1597 
1598    unsigned num_blits = 1;
1599    if (depth_resolve_mode != stencil_resolve_mode)
1600       num_blits = 2;
1601 
1602    for (unsigned i = 0; i < num_blits; i++) {
1603       if (i == 0 && depth_resolve_mode == VK_RESOLVE_MODE_NONE)
1604          continue;
1605 
1606       if (i == 1 && stencil_resolve_mode == VK_RESOLVE_MODE_NONE)
1607          continue;
1608 
1609       struct pipe_blit_info info = {0};
1610 
1611       info.src.resource = src_imgv->image->planes[0].bo;
1612       info.dst.resource = dst_imgv->image->planes[0].bo;
1613       info.src.format = src_imgv->pformat;
1614       info.dst.format = dst_imgv->pformat;
1615       info.filter = PIPE_TEX_FILTER_NEAREST;
1616 
1617       if (num_blits == 1)
1618          info.mask = PIPE_MASK_ZS;
1619       else if (i == 0)
1620          info.mask = PIPE_MASK_Z;
1621       else
1622          info.mask = PIPE_MASK_S;
1623 
1624       if (i == 0 && depth_resolve_mode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT)
1625          info.sample0_only = true;
1626       if (i == 1 && stencil_resolve_mode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT)
1627          info.sample0_only = true;
1628 
1629       info.src.box.x = state->render_area.offset.x;
1630       info.src.box.y = state->render_area.offset.y;
1631       info.src.box.width = state->render_area.extent.width;
1632       info.src.box.height = state->render_area.extent.height;
1633       info.src.box.depth = state->framebuffer.layers;
1634 
1635       info.dst.box = info.src.box;
1636 
1637       state->pctx->blit(state->pctx, &info);
1638    }
1639    if (multi)
1640       state->ds_imgv = destroy_multisample_surface(state, state->ds_imgv);
1641 }
1642 
1643 static void
resolve_color(struct rendering_state * state,bool multi)1644 resolve_color(struct rendering_state *state, bool multi)
1645 {
1646    for (uint32_t i = 0; i < state->color_att_count; i++) {
1647       if (!state->color_att[i].resolve_mode &&
1648           !(multi && state->forced_sample_count && state->color_att[i].imgv))
1649          continue;
1650 
1651       struct lvp_image_view *src_imgv = state->color_att[i].imgv;
1652       /* skip non-msrtss resolves during msrtss resolve */
1653       if (multi && !src_imgv->multisample)
1654          continue;
1655       struct lvp_image_view *dst_imgv = multi ? src_imgv->multisample : state->color_att[i].resolve_imgv;
1656 
1657       struct pipe_blit_info info = { 0 };
1658 
1659       info.src.resource = src_imgv->image->planes[0].bo;
1660       info.dst.resource = dst_imgv->image->planes[0].bo;
1661       info.src.format = src_imgv->pformat;
1662       info.dst.format = dst_imgv->pformat;
1663       info.filter = PIPE_TEX_FILTER_NEAREST;
1664       info.mask = PIPE_MASK_RGBA;
1665       info.src.box.x = state->render_area.offset.x;
1666       info.src.box.y = state->render_area.offset.y;
1667       info.src.box.width = state->render_area.extent.width;
1668       info.src.box.height = state->render_area.extent.height;
1669       info.src.box.depth = state->framebuffer.layers;
1670 
1671       info.dst.box = info.src.box;
1672       info.src.box.z = src_imgv->vk.base_array_layer;
1673       info.dst.box.z = dst_imgv->vk.base_array_layer;
1674 
1675       info.src.level = src_imgv->vk.base_mip_level;
1676       info.dst.level = dst_imgv->vk.base_mip_level;
1677 
1678       state->pctx->blit(state->pctx, &info);
1679    }
1680 
1681    if (!multi)
1682       return;
1683    for (uint32_t i = 0; i < state->color_att_count; i++) {
1684       struct lvp_image_view *src_imgv = state->color_att[i].imgv;
1685       if (src_imgv && src_imgv->multisample) //check if it has a msrtss view
1686          state->color_att[i].imgv = destroy_multisample_surface(state, src_imgv);
1687    }
1688 }
1689 
render_resolve(struct rendering_state * state)1690 static void render_resolve(struct rendering_state *state)
1691 {
1692    if (state->forced_sample_count) {
1693       resolve_ds(state, true);
1694       resolve_color(state, true);
1695    }
1696    resolve_ds(state, false);
1697    resolve_color(state, false);
1698 }
1699 
1700 static void
replicate_attachment(struct rendering_state * state,struct lvp_image_view * src,struct lvp_image_view * dst)1701 replicate_attachment(struct rendering_state *state,
1702                      struct lvp_image_view *src,
1703                      struct lvp_image_view *dst)
1704 {
1705    unsigned level = dst->surface->u.tex.level;
1706    const struct pipe_box box = {
1707       .x = 0,
1708       .y = 0,
1709       .z = 0,
1710       .width = u_minify(dst->image->planes[0].bo->width0, level),
1711       .height = u_minify(dst->image->planes[0].bo->height0, level),
1712       .depth = u_minify(dst->image->planes[0].bo->depth0, level),
1713    };
1714    state->pctx->resource_copy_region(state->pctx, dst->image->planes[0].bo, level,
1715                                      0, 0, 0, src->image->planes[0].bo, level, &box);
1716 }
1717 
1718 static struct lvp_image_view *
create_multisample_surface(struct rendering_state * state,struct lvp_image_view * imgv,uint32_t samples,bool replicate)1719 create_multisample_surface(struct rendering_state *state, struct lvp_image_view *imgv, uint32_t samples, bool replicate)
1720 {
1721    assert(!imgv->multisample);
1722 
1723    struct pipe_resource templ = *imgv->surface->texture;
1724    templ.nr_samples = samples;
1725    struct lvp_image *image = mem_dup(imgv->image, sizeof(struct lvp_image));
1726    image->vk.samples = samples;
1727    image->planes[0].pmem = NULL;
1728    image->planes[0].bo = state->pctx->screen->resource_create(state->pctx->screen, &templ);
1729 
1730    struct lvp_image_view *multi = mem_dup(imgv, sizeof(struct lvp_image_view));
1731    multi->image = image;
1732    multi->surface = state->pctx->create_surface(state->pctx, image->planes[0].bo, imgv->surface);
1733    struct pipe_resource *ref = image->planes[0].bo;
1734    pipe_resource_reference(&ref, NULL);
1735    imgv->multisample = multi;
1736    multi->multisample = imgv;
1737    if (replicate)
1738       replicate_attachment(state, imgv, multi);
1739    return multi;
1740 }
1741 
1742 static bool
att_needs_replicate(const struct rendering_state * state,const struct lvp_image_view * imgv,VkAttachmentLoadOp load_op)1743 att_needs_replicate(const struct rendering_state *state,
1744                     const struct lvp_image_view *imgv,
1745                     VkAttachmentLoadOp load_op)
1746 {
1747    if (load_op == VK_ATTACHMENT_LOAD_OP_LOAD ||
1748        load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
1749       return true;
1750    if (state->render_area.offset.x || state->render_area.offset.y)
1751       return true;
1752    if (state->render_area.extent.width < imgv->image->vk.extent.width ||
1753        state->render_area.extent.height < imgv->image->vk.extent.height)
1754       return true;
1755    return false;
1756 }
1757 
1758 
1759 static void
render_att_init(struct lvp_render_attachment * att,const VkRenderingAttachmentInfo * vk_att,bool poison_mem,bool stencil)1760 render_att_init(struct lvp_render_attachment* att,
1761                 const VkRenderingAttachmentInfo *vk_att,
1762                 bool poison_mem, bool stencil)
1763 {
1764    if (vk_att == NULL || vk_att->imageView == VK_NULL_HANDLE) {
1765       *att = (struct lvp_render_attachment) {
1766          .load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1767       };
1768       return;
1769    }
1770 
1771    *att = (struct lvp_render_attachment) {
1772       .imgv = lvp_image_view_from_handle(vk_att->imageView),
1773       .load_op = vk_att->loadOp,
1774       .store_op = vk_att->storeOp,
1775       .clear_value = vk_att->clearValue,
1776    };
1777    if (util_format_is_depth_or_stencil(att->imgv->pformat)) {
1778       if (stencil) {
1779          att->read_only =
1780             (vk_att->imageLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
1781              vk_att->imageLayout == VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL);
1782       } else {
1783          att->read_only =
1784             (vk_att->imageLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1785              vk_att->imageLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL);
1786       }
1787    }
1788    if (poison_mem && !att->read_only && att->load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) {
1789       att->load_op = VK_ATTACHMENT_LOAD_OP_CLEAR;
1790       if (util_format_is_depth_or_stencil(att->imgv->pformat)) {
1791          att->clear_value.depthStencil.depth = 0.12351251;
1792          att->clear_value.depthStencil.stencil = rand() % UINT8_MAX;
1793       } else {
1794          memset(att->clear_value.color.uint32, rand() % UINT8_MAX,
1795                 sizeof(att->clear_value.color.uint32));
1796       }
1797    }
1798 
1799    if (vk_att->resolveImageView && vk_att->resolveMode) {
1800       att->resolve_imgv = lvp_image_view_from_handle(vk_att->resolveImageView);
1801       att->resolve_mode = vk_att->resolveMode;
1802    }
1803 }
1804 
1805 
1806 static void
handle_begin_rendering(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1807 handle_begin_rendering(struct vk_cmd_queue_entry *cmd,
1808                        struct rendering_state *state)
1809 {
1810    const VkRenderingInfo *info = cmd->u.begin_rendering.rendering_info;
1811    bool resuming = (info->flags & VK_RENDERING_RESUMING_BIT) == VK_RENDERING_RESUMING_BIT;
1812    bool suspending = (info->flags & VK_RENDERING_SUSPENDING_BIT) == VK_RENDERING_SUSPENDING_BIT;
1813 
1814    state->fb_remapped = false;
1815    for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
1816       state->fb_map[i] = i;
1817 
1818    const VkMultisampledRenderToSingleSampledInfoEXT *ssi =
1819          vk_find_struct_const(info->pNext, MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT);
1820    if (ssi && ssi->multisampledRenderToSingleSampledEnable) {
1821       state->forced_sample_count = ssi->rasterizationSamples;
1822       state->forced_depth_resolve_mode = info->pDepthAttachment ? info->pDepthAttachment->resolveMode : 0;
1823       state->forced_stencil_resolve_mode = info->pStencilAttachment ? info->pStencilAttachment->resolveMode : 0;
1824    } else {
1825       state->forced_sample_count = 0;
1826       state->forced_depth_resolve_mode = 0;
1827       state->forced_stencil_resolve_mode = 0;
1828    }
1829 
1830    state->info.view_mask = info->viewMask;
1831    state->render_area = info->renderArea;
1832    state->suspending = suspending;
1833    state->framebuffer.width = info->renderArea.offset.x +
1834                               info->renderArea.extent.width;
1835    state->framebuffer.height = info->renderArea.offset.y +
1836                                info->renderArea.extent.height;
1837    state->framebuffer.layers = info->viewMask ? util_last_bit(info->viewMask) : info->layerCount;
1838    assert(info->colorAttachmentCount <= PIPE_MAX_COLOR_BUFS);
1839    state->framebuffer.nr_cbufs = info->colorAttachmentCount;
1840 
1841    state->color_att_count = info->colorAttachmentCount;
1842    memset(state->framebuffer.cbufs, 0, sizeof(state->framebuffer.cbufs));
1843    for (unsigned i = 0; i < info->colorAttachmentCount; i++) {
1844       render_att_init(&state->color_att[i], &info->pColorAttachments[i], state->poison_mem, false);
1845       if (state->color_att[i].imgv) {
1846          struct lvp_image_view *imgv = state->color_att[i].imgv;
1847          add_img_view_surface(state, imgv,
1848                               state->framebuffer.width, state->framebuffer.height,
1849                               state->framebuffer.layers);
1850          if (state->forced_sample_count && imgv->image->vk.samples == 1)
1851             state->color_att[i].imgv = create_multisample_surface(state, imgv, state->forced_sample_count,
1852                                                                   att_needs_replicate(state, imgv, state->color_att[i].load_op));
1853          state->framebuffer.cbufs[i] = state->color_att[i].imgv->surface;
1854          assert(state->render_area.offset.x + state->render_area.extent.width <= state->framebuffer.cbufs[i]->texture->width0);
1855          assert(state->render_area.offset.y + state->render_area.extent.height <= state->framebuffer.cbufs[i]->texture->height0);
1856       } else {
1857          state->framebuffer.cbufs[i] = NULL;
1858       }
1859    }
1860 
1861    render_att_init(&state->depth_att, info->pDepthAttachment, state->poison_mem, false);
1862    render_att_init(&state->stencil_att, info->pStencilAttachment, state->poison_mem, true);
1863    state->dsa_no_stencil = !state->stencil_att.imgv;
1864    state->dsa_dirty = true;
1865    if (state->depth_att.imgv || state->stencil_att.imgv) {
1866       assert(state->depth_att.imgv == NULL ||
1867              state->stencil_att.imgv == NULL ||
1868              state->depth_att.imgv == state->stencil_att.imgv);
1869       state->ds_imgv = state->depth_att.imgv ? state->depth_att.imgv :
1870                                                state->stencil_att.imgv;
1871       struct lvp_image_view *imgv = state->ds_imgv;
1872       add_img_view_surface(state, imgv,
1873                            state->framebuffer.width, state->framebuffer.height,
1874                            state->framebuffer.layers);
1875       if (state->forced_sample_count && imgv->image->vk.samples == 1) {
1876          VkAttachmentLoadOp load_op;
1877          if (state->depth_att.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR ||
1878              state->stencil_att.load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
1879             load_op = VK_ATTACHMENT_LOAD_OP_CLEAR;
1880          else if (state->depth_att.load_op == VK_ATTACHMENT_LOAD_OP_LOAD ||
1881                   state->stencil_att.load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
1882             load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1883          else
1884             load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
1885          state->ds_imgv = create_multisample_surface(state, imgv, state->forced_sample_count,
1886                                                      att_needs_replicate(state, imgv, load_op));
1887       }
1888       state->framebuffer.zsbuf = state->ds_imgv->surface;
1889       assert(state->render_area.offset.x + state->render_area.extent.width <= state->framebuffer.zsbuf->texture->width0);
1890       assert(state->render_area.offset.y + state->render_area.extent.height <= state->framebuffer.zsbuf->texture->height0);
1891    } else {
1892       state->ds_imgv = NULL;
1893       state->framebuffer.zsbuf = NULL;
1894    }
1895 
1896    state->pctx->set_framebuffer_state(state->pctx,
1897                                       &state->framebuffer);
1898 
1899    if (!resuming && render_needs_clear(state))
1900       render_clear_fast(state);
1901 }
1902 
handle_end_rendering(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1903 static void handle_end_rendering(struct vk_cmd_queue_entry *cmd,
1904                                  struct rendering_state *state)
1905 {
1906    if (state->suspending)
1907       return;
1908    render_resolve(state);
1909    if (!state->poison_mem)
1910       return;
1911 
1912    union pipe_color_union color_clear_val;
1913    memset(color_clear_val.ui, rand() % UINT8_MAX, sizeof(color_clear_val.ui));
1914 
1915    for (unsigned i = 0; i < state->framebuffer.nr_cbufs; i++) {
1916       if (state->color_att[i].imgv && state->color_att[i].store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE) {
1917          if (state->info.view_mask) {
1918             u_foreach_bit(i, state->info.view_mask)
1919                clear_attachment_layers(state, state->color_att[i].imgv, &state->render_area,
1920                                        i, 1, 0, 0, 0, &color_clear_val);
1921          } else {
1922             state->pctx->clear_render_target(state->pctx,
1923                                              state->color_att[i].imgv->surface,
1924                                              &color_clear_val,
1925                                              state->render_area.offset.x,
1926                                              state->render_area.offset.y,
1927                                              state->render_area.extent.width,
1928                                              state->render_area.extent.height,
1929                                              false);
1930          }
1931       }
1932    }
1933    uint32_t ds_clear_flags = 0;
1934    if (state->depth_att.imgv && !state->depth_att.read_only && state->depth_att.store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE)
1935       ds_clear_flags |= PIPE_CLEAR_DEPTH;
1936    if (state->stencil_att.imgv && !state->stencil_att.read_only && state->stencil_att.store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE)
1937       ds_clear_flags |= PIPE_CLEAR_STENCIL;
1938    double dclear_val = 0.2389234;
1939    uint32_t sclear_val = rand() % UINT8_MAX;
1940    if (ds_clear_flags) {
1941       if (state->info.view_mask) {
1942          u_foreach_bit(i, state->info.view_mask)
1943             clear_attachment_layers(state, state->ds_imgv, &state->render_area,
1944                                     i, 1, ds_clear_flags, dclear_val, sclear_val, NULL);
1945       } else {
1946          state->pctx->clear_depth_stencil(state->pctx,
1947                                           state->ds_imgv->surface,
1948                                           ds_clear_flags,
1949                                           dclear_val, sclear_val,
1950                                           state->render_area.offset.x,
1951                                           state->render_area.offset.y,
1952                                           state->render_area.extent.width,
1953                                           state->render_area.extent.height,
1954                                           false);
1955       }
1956    }
1957 }
1958 
1959 static void
handle_rendering_attachment_locations(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1960 handle_rendering_attachment_locations(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
1961 {
1962    VkRenderingAttachmentLocationInfoKHR *set = cmd->u.set_rendering_attachment_locations_khr.location_info;
1963    state->fb_remapped = true;
1964    memset(state->fb_map, PIPE_MAX_COLOR_BUFS, sizeof(state->fb_map));
1965    assert(state->color_att_count == set->colorAttachmentCount);
1966    for (unsigned i = 0; i < state->color_att_count; i++) {
1967       if (set->pColorAttachmentLocations[i] == VK_ATTACHMENT_UNUSED)
1968          continue;
1969       state->fb_map[i] = set->pColorAttachmentLocations[i];
1970    }
1971    emit_fb_state(state);
1972 }
1973 
1974 static void
handle_rendering_input_attachment_indices(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1975 handle_rendering_input_attachment_indices(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
1976 {
1977    /* do nothing */
1978 }
1979 
handle_draw(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1980 static void handle_draw(struct vk_cmd_queue_entry *cmd,
1981                         struct rendering_state *state)
1982 {
1983    struct pipe_draw_start_count_bias draw;
1984 
1985    state->info.index_size = 0;
1986    state->info.index.resource = NULL;
1987    state->info.start_instance = cmd->u.draw.first_instance;
1988    state->info.instance_count = cmd->u.draw.instance_count;
1989 
1990    draw.start = cmd->u.draw.first_vertex;
1991    draw.count = cmd->u.draw.vertex_count;
1992    draw.index_bias = 0;
1993 
1994    state->pctx->draw_vbo(state->pctx, &state->info, 0, NULL, &draw, 1);
1995 }
1996 
handle_draw_multi(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)1997 static void handle_draw_multi(struct vk_cmd_queue_entry *cmd,
1998                               struct rendering_state *state)
1999 {
2000    struct pipe_draw_start_count_bias *draws = calloc(cmd->u.draw_multi_ext.draw_count,
2001                                                      sizeof(*draws));
2002 
2003    state->info.index_size = 0;
2004    state->info.index.resource = NULL;
2005    state->info.start_instance = cmd->u.draw_multi_ext.first_instance;
2006    state->info.instance_count = cmd->u.draw_multi_ext.instance_count;
2007    if (cmd->u.draw_multi_ext.draw_count > 1)
2008       state->info.increment_draw_id = true;
2009 
2010    for (unsigned i = 0; i < cmd->u.draw_multi_ext.draw_count; i++) {
2011       draws[i].start = cmd->u.draw_multi_ext.vertex_info[i].firstVertex;
2012       draws[i].count = cmd->u.draw_multi_ext.vertex_info[i].vertexCount;
2013       draws[i].index_bias = 0;
2014    }
2015 
2016    if (cmd->u.draw_multi_indexed_ext.draw_count)
2017       state->pctx->draw_vbo(state->pctx, &state->info, 0, NULL, draws, cmd->u.draw_multi_ext.draw_count);
2018 
2019    free(draws);
2020 }
2021 
set_viewport(unsigned first_viewport,unsigned viewport_count,const VkViewport * viewports,struct rendering_state * state)2022 static void set_viewport(unsigned first_viewport, unsigned viewport_count,
2023                          const VkViewport* viewports,
2024                          struct rendering_state *state)
2025 {
2026    unsigned base = 0;
2027    if (first_viewport == UINT32_MAX)
2028       state->num_viewports = viewport_count;
2029    else
2030       base = first_viewport;
2031 
2032    for (unsigned i = 0; i < viewport_count; i++) {
2033       int idx = i + base;
2034       const VkViewport *vp = &viewports[i];
2035       get_viewport_xform(state, vp, idx);
2036       set_viewport_depth_xform(state, idx);
2037    }
2038    state->vp_dirty = true;
2039 }
2040 
handle_set_viewport(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2041 static void handle_set_viewport(struct vk_cmd_queue_entry *cmd,
2042                                 struct rendering_state *state)
2043 {
2044    set_viewport(cmd->u.set_viewport.first_viewport,
2045                 cmd->u.set_viewport.viewport_count,
2046                 cmd->u.set_viewport.viewports,
2047                 state);
2048 }
2049 
handle_set_viewport_with_count(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2050 static void handle_set_viewport_with_count(struct vk_cmd_queue_entry *cmd,
2051                                            struct rendering_state *state)
2052 {
2053    set_viewport(UINT32_MAX,
2054                 cmd->u.set_viewport_with_count.viewport_count,
2055                 cmd->u.set_viewport_with_count.viewports,
2056                 state);
2057 }
2058 
set_scissor(unsigned first_scissor,unsigned scissor_count,const VkRect2D * scissors,struct rendering_state * state)2059 static void set_scissor(unsigned first_scissor,
2060                         unsigned scissor_count,
2061                         const VkRect2D *scissors,
2062                         struct rendering_state *state)
2063 {
2064    unsigned base = 0;
2065    if (first_scissor == UINT32_MAX)
2066       state->num_scissors = scissor_count;
2067    else
2068       base = first_scissor;
2069 
2070    for (unsigned i = 0; i < scissor_count; i++) {
2071       unsigned idx = i + base;
2072       const VkRect2D *ss = &scissors[i];
2073       state->scissors[idx].minx = ss->offset.x;
2074       state->scissors[idx].miny = ss->offset.y;
2075       state->scissors[idx].maxx = ss->offset.x + ss->extent.width;
2076       state->scissors[idx].maxy = ss->offset.y + ss->extent.height;
2077    }
2078    state->scissor_dirty = true;
2079 }
2080 
handle_set_scissor(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2081 static void handle_set_scissor(struct vk_cmd_queue_entry *cmd,
2082                                struct rendering_state *state)
2083 {
2084    set_scissor(cmd->u.set_scissor.first_scissor,
2085                cmd->u.set_scissor.scissor_count,
2086                cmd->u.set_scissor.scissors,
2087                state);
2088 }
2089 
handle_set_scissor_with_count(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2090 static void handle_set_scissor_with_count(struct vk_cmd_queue_entry *cmd,
2091                                           struct rendering_state *state)
2092 {
2093    set_scissor(UINT32_MAX,
2094                cmd->u.set_scissor_with_count.scissor_count,
2095                cmd->u.set_scissor_with_count.scissors,
2096                state);
2097 }
2098 
handle_set_line_width(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2099 static void handle_set_line_width(struct vk_cmd_queue_entry *cmd,
2100                                   struct rendering_state *state)
2101 {
2102    state->rs_state.line_width = cmd->u.set_line_width.line_width;
2103    state->rs_dirty = true;
2104 }
2105 
handle_set_depth_bias(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2106 static void handle_set_depth_bias(struct vk_cmd_queue_entry *cmd,
2107                                   struct rendering_state *state)
2108 {
2109    state->depth_bias.offset_units = cmd->u.set_depth_bias.depth_bias_constant_factor;
2110    state->depth_bias.offset_scale = cmd->u.set_depth_bias.depth_bias_slope_factor;
2111    state->depth_bias.offset_clamp = cmd->u.set_depth_bias.depth_bias_clamp;
2112    state->rs_dirty = true;
2113 }
2114 
handle_set_blend_constants(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2115 static void handle_set_blend_constants(struct vk_cmd_queue_entry *cmd,
2116                                        struct rendering_state *state)
2117 {
2118    memcpy(state->blend_color.color, cmd->u.set_blend_constants.blend_constants, 4 * sizeof(float));
2119    state->blend_color_dirty = true;
2120 }
2121 
handle_set_depth_bounds(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2122 static void handle_set_depth_bounds(struct vk_cmd_queue_entry *cmd,
2123                                     struct rendering_state *state)
2124 {
2125    state->dsa_dirty |= !DOUBLE_EQ(state->dsa_state.depth_bounds_min, cmd->u.set_depth_bounds.min_depth_bounds);
2126    state->dsa_dirty |= !DOUBLE_EQ(state->dsa_state.depth_bounds_max, cmd->u.set_depth_bounds.max_depth_bounds);
2127    state->dsa_state.depth_bounds_min = cmd->u.set_depth_bounds.min_depth_bounds;
2128    state->dsa_state.depth_bounds_max = cmd->u.set_depth_bounds.max_depth_bounds;
2129 }
2130 
handle_set_stencil_compare_mask(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2131 static void handle_set_stencil_compare_mask(struct vk_cmd_queue_entry *cmd,
2132                                             struct rendering_state *state)
2133 {
2134    if (cmd->u.set_stencil_compare_mask.face_mask & VK_STENCIL_FACE_FRONT_BIT)
2135       state->dsa_state.stencil[0].valuemask = cmd->u.set_stencil_compare_mask.compare_mask;
2136    if (cmd->u.set_stencil_compare_mask.face_mask & VK_STENCIL_FACE_BACK_BIT)
2137       state->dsa_state.stencil[1].valuemask = cmd->u.set_stencil_compare_mask.compare_mask;
2138    state->dsa_dirty = true;
2139 }
2140 
handle_set_stencil_write_mask(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2141 static void handle_set_stencil_write_mask(struct vk_cmd_queue_entry *cmd,
2142                                           struct rendering_state *state)
2143 {
2144    if (cmd->u.set_stencil_write_mask.face_mask & VK_STENCIL_FACE_FRONT_BIT)
2145       state->dsa_state.stencil[0].writemask = cmd->u.set_stencil_write_mask.write_mask;
2146    if (cmd->u.set_stencil_write_mask.face_mask & VK_STENCIL_FACE_BACK_BIT)
2147       state->dsa_state.stencil[1].writemask = cmd->u.set_stencil_write_mask.write_mask;
2148    state->dsa_dirty = true;
2149 }
2150 
handle_set_stencil_reference(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2151 static void handle_set_stencil_reference(struct vk_cmd_queue_entry *cmd,
2152                                          struct rendering_state *state)
2153 {
2154    if (cmd->u.set_stencil_reference.face_mask & VK_STENCIL_FACE_FRONT_BIT)
2155       state->stencil_ref.ref_value[0] = cmd->u.set_stencil_reference.reference;
2156    if (cmd->u.set_stencil_reference.face_mask & VK_STENCIL_FACE_BACK_BIT)
2157       state->stencil_ref.ref_value[1] = cmd->u.set_stencil_reference.reference;
2158    state->stencil_ref_dirty = true;
2159 }
2160 
2161 static void
copy_depth_rect(uint8_t * dst,enum pipe_format dst_format,unsigned dst_stride,unsigned dst_x,unsigned dst_y,unsigned width,unsigned height,const uint8_t * src,enum pipe_format src_format,int src_stride,unsigned src_x,unsigned src_y)2162 copy_depth_rect(uint8_t * dst,
2163                 enum pipe_format dst_format,
2164                 unsigned dst_stride,
2165                 unsigned dst_x,
2166                 unsigned dst_y,
2167                 unsigned width,
2168                 unsigned height,
2169                 const uint8_t * src,
2170                 enum pipe_format src_format,
2171                 int src_stride,
2172                 unsigned src_x,
2173                 unsigned src_y)
2174 {
2175    int src_stride_pos = src_stride < 0 ? -src_stride : src_stride;
2176    int src_blocksize = util_format_get_blocksize(src_format);
2177    int src_blockwidth = util_format_get_blockwidth(src_format);
2178    int src_blockheight = util_format_get_blockheight(src_format);
2179    int dst_blocksize = util_format_get_blocksize(dst_format);
2180    int dst_blockwidth = util_format_get_blockwidth(dst_format);
2181    int dst_blockheight = util_format_get_blockheight(dst_format);
2182 
2183    assert(src_blocksize > 0);
2184    assert(src_blockwidth > 0);
2185    assert(src_blockheight > 0);
2186 
2187    dst_x /= dst_blockwidth;
2188    dst_y /= dst_blockheight;
2189    width = (width + src_blockwidth - 1)/src_blockwidth;
2190    height = (height + src_blockheight - 1)/src_blockheight;
2191    src_x /= src_blockwidth;
2192    src_y /= src_blockheight;
2193 
2194    dst += dst_x * dst_blocksize;
2195    src += src_x * src_blocksize;
2196    dst += dst_y * dst_stride;
2197    src += src_y * src_stride_pos;
2198 
2199    if (dst_format == PIPE_FORMAT_S8_UINT) {
2200       if (src_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
2201          util_format_z32_float_s8x24_uint_unpack_s_8uint(dst, dst_stride,
2202                                                          src, src_stride,
2203                                                          width, height);
2204       } else if (src_format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
2205          util_format_z24_unorm_s8_uint_unpack_s_8uint(dst, dst_stride,
2206                                                       src, src_stride,
2207                                                       width, height);
2208       } else {
2209       }
2210    } else if (dst_format == PIPE_FORMAT_Z24X8_UNORM) {
2211       util_format_z24_unorm_s8_uint_unpack_z24(dst, dst_stride,
2212                                                src, src_stride,
2213                                                width, height);
2214    } else if (dst_format == PIPE_FORMAT_Z32_FLOAT) {
2215       if (src_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
2216          util_format_z32_float_s8x24_uint_unpack_z_float((float *)dst, dst_stride,
2217                                                          src, src_stride,
2218                                                          width, height);
2219       }
2220    } else if (dst_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
2221       if (src_format == PIPE_FORMAT_Z32_FLOAT)
2222          util_format_z32_float_s8x24_uint_pack_z_float(dst, dst_stride,
2223                                                        (float *)src, src_stride,
2224                                                        width, height);
2225       else if (src_format == PIPE_FORMAT_S8_UINT)
2226          util_format_z32_float_s8x24_uint_pack_s_8uint(dst, dst_stride,
2227                                                        src, src_stride,
2228                                                        width, height);
2229    } else if (dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
2230       if (src_format == PIPE_FORMAT_S8_UINT)
2231          util_format_z24_unorm_s8_uint_pack_s_8uint(dst, dst_stride,
2232                                                     src, src_stride,
2233                                                     width, height);
2234       if (src_format == PIPE_FORMAT_Z24X8_UNORM)
2235          util_format_z24_unorm_s8_uint_pack_z24(dst, dst_stride,
2236                                                 src, src_stride,
2237                                                 width, height);
2238    }
2239 }
2240 
2241 static void
copy_depth_box(uint8_t * dst,enum pipe_format dst_format,unsigned dst_stride,uint64_t dst_slice_stride,unsigned dst_x,unsigned dst_y,unsigned dst_z,unsigned width,unsigned height,unsigned depth,const uint8_t * src,enum pipe_format src_format,int src_stride,uint64_t src_slice_stride,unsigned src_x,unsigned src_y,unsigned src_z)2242 copy_depth_box(uint8_t *dst,
2243                enum pipe_format dst_format,
2244                unsigned dst_stride, uint64_t dst_slice_stride,
2245                unsigned dst_x, unsigned dst_y, unsigned dst_z,
2246                unsigned width, unsigned height, unsigned depth,
2247                const uint8_t * src,
2248                enum pipe_format src_format,
2249                int src_stride, uint64_t src_slice_stride,
2250                unsigned src_x, unsigned src_y, unsigned src_z)
2251 {
2252    dst += dst_z * dst_slice_stride;
2253    src += src_z * src_slice_stride;
2254    for (unsigned z = 0; z < depth; ++z) {
2255       copy_depth_rect(dst,
2256                       dst_format,
2257                       dst_stride,
2258                       dst_x, dst_y,
2259                       width, height,
2260                       src,
2261                       src_format,
2262                       src_stride,
2263                       src_x, src_y);
2264 
2265       dst += dst_slice_stride;
2266       src += src_slice_stride;
2267    }
2268 }
2269 
2270 static unsigned
subresource_layercount(const struct lvp_image * image,const VkImageSubresourceLayers * sub)2271 subresource_layercount(const struct lvp_image *image, const VkImageSubresourceLayers *sub)
2272 {
2273    if (sub->layerCount != VK_REMAINING_ARRAY_LAYERS)
2274       return sub->layerCount;
2275    return image->vk.array_layers - sub->baseArrayLayer;
2276 }
2277 
handle_copy_image_to_buffer2(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2278 static void handle_copy_image_to_buffer2(struct vk_cmd_queue_entry *cmd,
2279                                              struct rendering_state *state)
2280 {
2281    const struct VkCopyImageToBufferInfo2 *copycmd = cmd->u.copy_image_to_buffer2.copy_image_to_buffer_info;
2282    LVP_FROM_HANDLE(lvp_image, src_image, copycmd->srcImage);
2283    struct pipe_box box, dbox;
2284    struct pipe_transfer *src_t, *dst_t;
2285    uint8_t *src_data, *dst_data;
2286 
2287    for (uint32_t i = 0; i < copycmd->regionCount; i++) {
2288       const VkBufferImageCopy2 *region = &copycmd->pRegions[i];
2289       const VkImageAspectFlagBits aspects = copycmd->pRegions[i].imageSubresource.aspectMask;
2290       uint8_t plane = lvp_image_aspects_to_plane(src_image, aspects);
2291 
2292       box.x = region->imageOffset.x;
2293       box.y = region->imageOffset.y;
2294       box.z = src_image->vk.image_type == VK_IMAGE_TYPE_3D ? region->imageOffset.z : region->imageSubresource.baseArrayLayer;
2295       box.width = region->imageExtent.width;
2296       box.height = region->imageExtent.height;
2297       box.depth = src_image->vk.image_type == VK_IMAGE_TYPE_3D ? region->imageExtent.depth : subresource_layercount(src_image, &region->imageSubresource);
2298 
2299       src_data = state->pctx->texture_map(state->pctx,
2300                                            src_image->planes[plane].bo,
2301                                            region->imageSubresource.mipLevel,
2302                                            PIPE_MAP_READ,
2303                                            &box,
2304                                            &src_t);
2305 
2306       dbox.x = region->bufferOffset;
2307       dbox.y = 0;
2308       dbox.z = 0;
2309       dbox.width = lvp_buffer_from_handle(copycmd->dstBuffer)->bo->width0 - region->bufferOffset;
2310       dbox.height = 1;
2311       dbox.depth = 1;
2312       dst_data = state->pctx->buffer_map(state->pctx,
2313                                            lvp_buffer_from_handle(copycmd->dstBuffer)->bo,
2314                                            0,
2315                                            PIPE_MAP_WRITE,
2316                                            &dbox,
2317                                            &dst_t);
2318 
2319       enum pipe_format src_format = src_image->planes[plane].bo->format;
2320       enum pipe_format dst_format = src_format;
2321       if (util_format_is_depth_or_stencil(src_format)) {
2322          if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
2323             dst_format = util_format_get_depth_only(src_format);
2324          } else if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
2325             dst_format = PIPE_FORMAT_S8_UINT;
2326          }
2327       }
2328 
2329       const struct vk_image_buffer_layout buffer_layout =
2330          vk_image_buffer_copy_layout(&src_image->vk, &copycmd->pRegions[i]);
2331       if (src_format != dst_format) {
2332          copy_depth_box(dst_data, dst_format,
2333                         buffer_layout.row_stride_B,
2334                         buffer_layout.image_stride_B,
2335                         0, 0, 0,
2336                         region->imageExtent.width,
2337                         region->imageExtent.height,
2338                         box.depth,
2339                         src_data, src_format, src_t->stride, src_t->layer_stride, 0, 0, 0);
2340       } else {
2341          util_copy_box((uint8_t *)dst_data, src_format,
2342                        buffer_layout.row_stride_B,
2343                        buffer_layout.image_stride_B,
2344                        0, 0, 0,
2345                        region->imageExtent.width,
2346                        region->imageExtent.height,
2347                        box.depth,
2348                        src_data, src_t->stride, src_t->layer_stride, 0, 0, 0);
2349       }
2350       state->pctx->texture_unmap(state->pctx, src_t);
2351       state->pctx->buffer_unmap(state->pctx, dst_t);
2352    }
2353 }
2354 
handle_copy_buffer_to_image(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2355 static void handle_copy_buffer_to_image(struct vk_cmd_queue_entry *cmd,
2356                                         struct rendering_state *state)
2357 {
2358    const struct VkCopyBufferToImageInfo2 *copycmd = cmd->u.copy_buffer_to_image2.copy_buffer_to_image_info;
2359    LVP_FROM_HANDLE(lvp_image, dst_image, copycmd->dstImage);
2360 
2361    for (uint32_t i = 0; i < copycmd->regionCount; i++) {
2362       const VkBufferImageCopy2 *region = &copycmd->pRegions[i];
2363       struct pipe_box box, sbox;
2364       struct pipe_transfer *src_t, *dst_t;
2365       void *src_data, *dst_data;
2366       const VkImageAspectFlagBits aspects = copycmd->pRegions[i].imageSubresource.aspectMask;
2367       uint8_t plane = lvp_image_aspects_to_plane(dst_image, aspects);
2368 
2369       sbox.x = region->bufferOffset;
2370       sbox.y = 0;
2371       sbox.z = 0;
2372       sbox.width = lvp_buffer_from_handle(copycmd->srcBuffer)->bo->width0;
2373       sbox.height = 1;
2374       sbox.depth = 1;
2375       src_data = state->pctx->buffer_map(state->pctx,
2376                                            lvp_buffer_from_handle(copycmd->srcBuffer)->bo,
2377                                            0,
2378                                            PIPE_MAP_READ,
2379                                            &sbox,
2380                                            &src_t);
2381 
2382 
2383       box.x = region->imageOffset.x;
2384       box.y = region->imageOffset.y;
2385       box.z = dst_image->vk.image_type == VK_IMAGE_TYPE_3D ? region->imageOffset.z : region->imageSubresource.baseArrayLayer;
2386       box.width = region->imageExtent.width;
2387       box.height = region->imageExtent.height;
2388       box.depth = dst_image->vk.image_type == VK_IMAGE_TYPE_3D ? region->imageExtent.depth : subresource_layercount(dst_image, &region->imageSubresource);
2389 
2390       dst_data = state->pctx->texture_map(state->pctx,
2391                                            dst_image->planes[plane].bo,
2392                                            region->imageSubresource.mipLevel,
2393                                            PIPE_MAP_WRITE,
2394                                            &box,
2395                                            &dst_t);
2396 
2397       enum pipe_format dst_format = dst_image->planes[plane].bo->format;
2398       enum pipe_format src_format = dst_format;
2399       if (util_format_is_depth_or_stencil(dst_format)) {
2400          if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
2401             src_format = util_format_get_depth_only(dst_image->planes[plane].bo->format);
2402          } else if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
2403             src_format = PIPE_FORMAT_S8_UINT;
2404          }
2405       }
2406 
2407       const struct vk_image_buffer_layout buffer_layout =
2408          vk_image_buffer_copy_layout(&dst_image->vk, &copycmd->pRegions[i]);
2409       if (src_format != dst_format) {
2410          copy_depth_box(dst_data, dst_format,
2411                         dst_t->stride, dst_t->layer_stride,
2412                         0, 0, 0,
2413                         region->imageExtent.width,
2414                         region->imageExtent.height,
2415                         box.depth,
2416                         src_data, src_format,
2417                         buffer_layout.row_stride_B,
2418                         buffer_layout.image_stride_B,
2419                         0, 0, 0);
2420       } else {
2421          util_copy_box(dst_data, dst_format,
2422                        dst_t->stride, dst_t->layer_stride,
2423                        0, 0, 0,
2424                        region->imageExtent.width,
2425                        region->imageExtent.height,
2426                        box.depth,
2427                        src_data,
2428                        buffer_layout.row_stride_B,
2429                        buffer_layout.image_stride_B,
2430                        0, 0, 0);
2431       }
2432       state->pctx->buffer_unmap(state->pctx, src_t);
2433       state->pctx->texture_unmap(state->pctx, dst_t);
2434    }
2435 }
2436 
handle_copy_image(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2437 static void handle_copy_image(struct vk_cmd_queue_entry *cmd,
2438                               struct rendering_state *state)
2439 {
2440    const struct VkCopyImageInfo2 *copycmd = cmd->u.copy_image2.copy_image_info;
2441    LVP_FROM_HANDLE(lvp_image, src_image, copycmd->srcImage);
2442    LVP_FROM_HANDLE(lvp_image, dst_image, copycmd->dstImage);
2443 
2444    for (uint32_t i = 0; i < copycmd->regionCount; i++) {
2445       const VkImageCopy2 *region = &copycmd->pRegions[i];
2446       const VkImageAspectFlagBits src_aspects =
2447          copycmd->pRegions[i].srcSubresource.aspectMask;
2448       uint8_t src_plane = lvp_image_aspects_to_plane(src_image, src_aspects);
2449       const VkImageAspectFlagBits dst_aspects =
2450          copycmd->pRegions[i].dstSubresource.aspectMask;
2451       uint8_t dst_plane = lvp_image_aspects_to_plane(dst_image, dst_aspects);
2452       struct pipe_box src_box;
2453       src_box.x = region->srcOffset.x;
2454       src_box.y = region->srcOffset.y;
2455       src_box.width = region->extent.width;
2456       src_box.height = region->extent.height;
2457       if (src_image->planes[src_plane].bo->target == PIPE_TEXTURE_3D) {
2458          src_box.depth = region->extent.depth;
2459          src_box.z = region->srcOffset.z;
2460       } else {
2461          src_box.depth = subresource_layercount(src_image, &region->srcSubresource);
2462          src_box.z = region->srcSubresource.baseArrayLayer;
2463       }
2464 
2465       unsigned dstz = dst_image->planes[dst_plane].bo->target == PIPE_TEXTURE_3D ?
2466                       region->dstOffset.z :
2467                       region->dstSubresource.baseArrayLayer;
2468       state->pctx->resource_copy_region(state->pctx, dst_image->planes[dst_plane].bo,
2469                                         region->dstSubresource.mipLevel,
2470                                         region->dstOffset.x,
2471                                         region->dstOffset.y,
2472                                         dstz,
2473                                         src_image->planes[src_plane].bo,
2474                                         region->srcSubresource.mipLevel,
2475                                         &src_box);
2476    }
2477 }
2478 
handle_copy_buffer(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2479 static void handle_copy_buffer(struct vk_cmd_queue_entry *cmd,
2480                                struct rendering_state *state)
2481 {
2482    const VkCopyBufferInfo2 *copycmd = cmd->u.copy_buffer2.copy_buffer_info;
2483 
2484    for (uint32_t i = 0; i < copycmd->regionCount; i++) {
2485       const VkBufferCopy2 *region = &copycmd->pRegions[i];
2486       struct pipe_box box = { 0 };
2487       u_box_1d(region->srcOffset, region->size, &box);
2488       state->pctx->resource_copy_region(state->pctx, lvp_buffer_from_handle(copycmd->dstBuffer)->bo, 0,
2489                                         region->dstOffset, 0, 0,
2490                                         lvp_buffer_from_handle(copycmd->srcBuffer)->bo, 0, &box);
2491    }
2492 }
2493 
handle_blit_image(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2494 static void handle_blit_image(struct vk_cmd_queue_entry *cmd,
2495                               struct rendering_state *state)
2496 {
2497    VkBlitImageInfo2 *blitcmd = cmd->u.blit_image2.blit_image_info;
2498    LVP_FROM_HANDLE(lvp_image, src_image, blitcmd->srcImage);
2499    LVP_FROM_HANDLE(lvp_image, dst_image, blitcmd->dstImage);
2500 
2501    struct pipe_blit_info info = {
2502       .src.resource = src_image->planes[0].bo,
2503       .dst.resource = dst_image->planes[0].bo,
2504       .src.format = src_image->planes[0].bo->format,
2505       .dst.format = dst_image->planes[0].bo->format,
2506       .mask = util_format_is_depth_or_stencil(info.src.format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA,
2507       .filter = blitcmd->filter == VK_FILTER_NEAREST ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR,
2508    };
2509 
2510    for (uint32_t i = 0; i < blitcmd->regionCount; i++) {
2511       int srcX0, srcX1, srcY0, srcY1, srcZ0, srcZ1;
2512       unsigned dstX0, dstX1, dstY0, dstY1, dstZ0, dstZ1;
2513 
2514       srcX0 = blitcmd->pRegions[i].srcOffsets[0].x;
2515       srcX1 = blitcmd->pRegions[i].srcOffsets[1].x;
2516       srcY0 = blitcmd->pRegions[i].srcOffsets[0].y;
2517       srcY1 = blitcmd->pRegions[i].srcOffsets[1].y;
2518       srcZ0 = blitcmd->pRegions[i].srcOffsets[0].z;
2519       srcZ1 = blitcmd->pRegions[i].srcOffsets[1].z;
2520 
2521       dstX0 = blitcmd->pRegions[i].dstOffsets[0].x;
2522       dstX1 = blitcmd->pRegions[i].dstOffsets[1].x;
2523       dstY0 = blitcmd->pRegions[i].dstOffsets[0].y;
2524       dstY1 = blitcmd->pRegions[i].dstOffsets[1].y;
2525       dstZ0 = blitcmd->pRegions[i].dstOffsets[0].z;
2526       dstZ1 = blitcmd->pRegions[i].dstOffsets[1].z;
2527 
2528       if (dstX0 < dstX1) {
2529          info.dst.box.x = dstX0;
2530          info.src.box.x = srcX0;
2531          info.dst.box.width = dstX1 - dstX0;
2532          info.src.box.width = srcX1 - srcX0;
2533       } else {
2534          info.dst.box.x = dstX1;
2535          info.src.box.x = srcX1;
2536          info.dst.box.width = dstX0 - dstX1;
2537          info.src.box.width = srcX0 - srcX1;
2538       }
2539 
2540       if (dstY0 < dstY1) {
2541          info.dst.box.y = dstY0;
2542          info.src.box.y = srcY0;
2543          info.dst.box.height = dstY1 - dstY0;
2544          info.src.box.height = srcY1 - srcY0;
2545       } else {
2546          info.dst.box.y = dstY1;
2547          info.src.box.y = srcY1;
2548          info.dst.box.height = dstY0 - dstY1;
2549          info.src.box.height = srcY0 - srcY1;
2550       }
2551 
2552       assert_subresource_layers(info.src.resource, src_image, &blitcmd->pRegions[i].srcSubresource, blitcmd->pRegions[i].srcOffsets);
2553       assert_subresource_layers(info.dst.resource, dst_image, &blitcmd->pRegions[i].dstSubresource, blitcmd->pRegions[i].dstOffsets);
2554       if (src_image->planes[0].bo->target == PIPE_TEXTURE_3D) {
2555          if (dstZ0 < dstZ1) {
2556             info.dst.box.z = dstZ0;
2557             info.src.box.z = srcZ0;
2558             info.dst.box.depth = dstZ1 - dstZ0;
2559             info.src.box.depth = srcZ1 - srcZ0;
2560          } else {
2561             info.dst.box.z = dstZ1;
2562             info.src.box.z = srcZ1;
2563             info.dst.box.depth = dstZ0 - dstZ1;
2564             info.src.box.depth = srcZ0 - srcZ1;
2565          }
2566       } else {
2567          info.src.box.z = blitcmd->pRegions[i].srcSubresource.baseArrayLayer;
2568          info.dst.box.z = blitcmd->pRegions[i].dstSubresource.baseArrayLayer;
2569          info.src.box.depth = subresource_layercount(src_image, &blitcmd->pRegions[i].srcSubresource);
2570          info.dst.box.depth = subresource_layercount(dst_image, &blitcmd->pRegions[i].dstSubresource);
2571       }
2572 
2573       info.src.level = blitcmd->pRegions[i].srcSubresource.mipLevel;
2574       info.dst.level = blitcmd->pRegions[i].dstSubresource.mipLevel;
2575       state->pctx->blit(state->pctx, &info);
2576    }
2577 }
2578 
handle_fill_buffer(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2579 static void handle_fill_buffer(struct vk_cmd_queue_entry *cmd,
2580                                struct rendering_state *state)
2581 {
2582    struct vk_cmd_fill_buffer *fillcmd = &cmd->u.fill_buffer;
2583    uint32_t size = fillcmd->size;
2584    struct lvp_buffer *dst = lvp_buffer_from_handle(fillcmd->dst_buffer);
2585 
2586    size = vk_buffer_range(&dst->vk, fillcmd->dst_offset, fillcmd->size);
2587    if (fillcmd->size == VK_WHOLE_SIZE)
2588       size = ROUND_DOWN_TO(size, 4);
2589 
2590    state->pctx->clear_buffer(state->pctx,
2591                              dst->bo,
2592                              fillcmd->dst_offset,
2593                              size,
2594                              &fillcmd->data,
2595                              4);
2596 }
2597 
handle_update_buffer(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2598 static void handle_update_buffer(struct vk_cmd_queue_entry *cmd,
2599                                  struct rendering_state *state)
2600 {
2601    struct vk_cmd_update_buffer *updcmd = &cmd->u.update_buffer;
2602    uint32_t *dst;
2603    struct pipe_transfer *dst_t;
2604    struct pipe_box box;
2605 
2606    u_box_1d(updcmd->dst_offset, updcmd->data_size, &box);
2607    dst = state->pctx->buffer_map(state->pctx,
2608                                    lvp_buffer_from_handle(updcmd->dst_buffer)->bo,
2609                                    0,
2610                                    PIPE_MAP_WRITE,
2611                                    &box,
2612                                    &dst_t);
2613 
2614    memcpy(dst, updcmd->data, updcmd->data_size);
2615    state->pctx->buffer_unmap(state->pctx, dst_t);
2616 }
2617 
handle_draw_indexed(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2618 static void handle_draw_indexed(struct vk_cmd_queue_entry *cmd,
2619                                 struct rendering_state *state)
2620 {
2621    struct pipe_draw_start_count_bias draw = {0};
2622 
2623    state->info.index_bounds_valid = false;
2624    state->info.min_index = 0;
2625    state->info.max_index = ~0U;
2626    state->info.index_size = state->index_size;
2627    state->info.index.resource = state->index_buffer;
2628    state->info.start_instance = cmd->u.draw_indexed.first_instance;
2629    state->info.instance_count = cmd->u.draw_indexed.instance_count;
2630 
2631    if (state->info.primitive_restart)
2632       state->info.restart_index = util_prim_restart_index_from_size(state->info.index_size);
2633 
2634    draw.count = MIN2(cmd->u.draw_indexed.index_count, state->index_buffer_size / state->index_size);
2635    draw.index_bias = cmd->u.draw_indexed.vertex_offset;
2636    /* TODO: avoid calculating multiple times if cmdbuf is submitted again */
2637    draw.start = util_clamped_uadd(state->index_offset / state->index_size,
2638                                   cmd->u.draw_indexed.first_index);
2639 
2640    state->info.index_bias_varies = !cmd->u.draw_indexed.vertex_offset;
2641    state->pctx->draw_vbo(state->pctx, &state->info, 0, NULL, &draw, 1);
2642 }
2643 
handle_draw_multi_indexed(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2644 static void handle_draw_multi_indexed(struct vk_cmd_queue_entry *cmd,
2645                                       struct rendering_state *state)
2646 {
2647    struct pipe_draw_start_count_bias *draws = calloc(cmd->u.draw_multi_indexed_ext.draw_count,
2648                                                      sizeof(*draws));
2649 
2650    state->info.index_bounds_valid = false;
2651    state->info.min_index = 0;
2652    state->info.max_index = ~0U;
2653    state->info.index_size = state->index_size;
2654    state->info.index.resource = state->index_buffer;
2655    state->info.start_instance = cmd->u.draw_multi_indexed_ext.first_instance;
2656    state->info.instance_count = cmd->u.draw_multi_indexed_ext.instance_count;
2657    if (cmd->u.draw_multi_indexed_ext.draw_count > 1)
2658       state->info.increment_draw_id = true;
2659 
2660    if (state->info.primitive_restart)
2661       state->info.restart_index = util_prim_restart_index_from_size(state->info.index_size);
2662 
2663    unsigned size = cmd->u.draw_multi_indexed_ext.draw_count * sizeof(struct pipe_draw_start_count_bias);
2664    memcpy(draws, cmd->u.draw_multi_indexed_ext.index_info, size);
2665    if (state->index_buffer_size != UINT32_MAX) {
2666       for (unsigned i = 0; i < cmd->u.draw_multi_indexed_ext.draw_count; i++)
2667          draws[i].count = MIN2(draws[i].count, state->index_buffer_size / state->index_size - draws[i].start);
2668    }
2669 
2670    /* only the first member is read if index_bias_varies is true */
2671    if (cmd->u.draw_multi_indexed_ext.draw_count &&
2672        cmd->u.draw_multi_indexed_ext.vertex_offset)
2673       draws[0].index_bias = *cmd->u.draw_multi_indexed_ext.vertex_offset;
2674 
2675    /* TODO: avoid calculating multiple times if cmdbuf is submitted again */
2676    for (unsigned i = 0; i < cmd->u.draw_multi_indexed_ext.draw_count; i++)
2677       draws[i].start = util_clamped_uadd(state->index_offset / state->index_size,
2678                                          draws[i].start);
2679 
2680    state->info.index_bias_varies = !cmd->u.draw_multi_indexed_ext.vertex_offset;
2681 
2682    if (cmd->u.draw_multi_indexed_ext.draw_count)
2683       state->pctx->draw_vbo(state->pctx, &state->info, 0, NULL, draws, cmd->u.draw_multi_indexed_ext.draw_count);
2684 
2685    free(draws);
2686 }
2687 
handle_draw_indirect(struct vk_cmd_queue_entry * cmd,struct rendering_state * state,bool indexed)2688 static void handle_draw_indirect(struct vk_cmd_queue_entry *cmd,
2689                                  struct rendering_state *state, bool indexed)
2690 {
2691    struct pipe_draw_start_count_bias draw = {0};
2692    struct pipe_resource *index = NULL;
2693    if (indexed) {
2694       state->info.index_bounds_valid = false;
2695       state->info.index_size = state->index_size;
2696       state->info.index.resource = state->index_buffer;
2697       state->info.max_index = ~0U;
2698       if (state->info.primitive_restart)
2699          state->info.restart_index = util_prim_restart_index_from_size(state->info.index_size);
2700       if (state->index_offset || state->index_buffer_size != UINT32_MAX) {
2701          struct pipe_transfer *xfer;
2702          uint8_t *mem = pipe_buffer_map(state->pctx, state->index_buffer, 0, &xfer);
2703          state->pctx->buffer_unmap(state->pctx, xfer);
2704          index = get_buffer_resource(state->pctx, mem + state->index_offset);
2705          index->width0 = MIN2(state->index_buffer->width0 - state->index_offset, state->index_buffer_size);
2706          state->info.index.resource = index;
2707       }
2708    } else
2709       state->info.index_size = 0;
2710    state->indirect_info.offset = cmd->u.draw_indirect.offset;
2711    state->indirect_info.stride = cmd->u.draw_indirect.stride;
2712    state->indirect_info.draw_count = cmd->u.draw_indirect.draw_count;
2713    state->indirect_info.buffer = lvp_buffer_from_handle(cmd->u.draw_indirect.buffer)->bo;
2714 
2715    state->pctx->draw_vbo(state->pctx, &state->info, 0, &state->indirect_info, &draw, 1);
2716    pipe_resource_reference(&index, NULL);
2717 }
2718 
handle_index_buffer(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2719 static void handle_index_buffer(struct vk_cmd_queue_entry *cmd,
2720                                 struct rendering_state *state)
2721 {
2722    struct vk_cmd_bind_index_buffer *ib = &cmd->u.bind_index_buffer;
2723    state->index_size = vk_index_type_to_bytes(ib->index_type);
2724    state->index_buffer_size = UINT32_MAX;
2725 
2726    if (ib->buffer) {
2727       state->index_offset = ib->offset;
2728       state->index_buffer = lvp_buffer_from_handle(ib->buffer)->bo;
2729    } else {
2730       state->index_offset = 0;
2731       state->index_buffer = state->device->zero_buffer;
2732    }
2733 
2734    state->ib_dirty = true;
2735 }
2736 
handle_index_buffer2(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2737 static void handle_index_buffer2(struct vk_cmd_queue_entry *cmd,
2738                                  struct rendering_state *state)
2739 {
2740    struct vk_cmd_bind_index_buffer2_khr *ib = &cmd->u.bind_index_buffer2_khr;
2741 
2742    if (ib->buffer) {
2743       state->index_size = vk_index_type_to_bytes(ib->index_type);
2744       state->index_buffer_size = ib->size;
2745       state->index_offset = ib->offset;
2746       state->index_buffer = lvp_buffer_from_handle(ib->buffer)->bo;
2747    } else {
2748       state->index_size = 4;
2749       state->index_buffer_size = UINT32_MAX;
2750       state->index_offset = 0;
2751       state->index_buffer = state->device->zero_buffer;
2752    }
2753 
2754    state->ib_dirty = true;
2755 }
2756 
handle_dispatch(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2757 static void handle_dispatch(struct vk_cmd_queue_entry *cmd,
2758                             struct rendering_state *state)
2759 {
2760    state->dispatch_info.grid[0] = cmd->u.dispatch.group_count_x;
2761    state->dispatch_info.grid[1] = cmd->u.dispatch.group_count_y;
2762    state->dispatch_info.grid[2] = cmd->u.dispatch.group_count_z;
2763    state->dispatch_info.grid_base[0] = 0;
2764    state->dispatch_info.grid_base[1] = 0;
2765    state->dispatch_info.grid_base[2] = 0;
2766    state->dispatch_info.indirect = NULL;
2767    state->pctx->launch_grid(state->pctx, &state->dispatch_info);
2768 }
2769 
handle_dispatch_base(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2770 static void handle_dispatch_base(struct vk_cmd_queue_entry *cmd,
2771                                  struct rendering_state *state)
2772 {
2773    state->dispatch_info.grid[0] = cmd->u.dispatch_base.group_count_x;
2774    state->dispatch_info.grid[1] = cmd->u.dispatch_base.group_count_y;
2775    state->dispatch_info.grid[2] = cmd->u.dispatch_base.group_count_z;
2776    state->dispatch_info.grid_base[0] = cmd->u.dispatch_base.base_group_x;
2777    state->dispatch_info.grid_base[1] = cmd->u.dispatch_base.base_group_y;
2778    state->dispatch_info.grid_base[2] = cmd->u.dispatch_base.base_group_z;
2779    state->dispatch_info.indirect = NULL;
2780    state->pctx->launch_grid(state->pctx, &state->dispatch_info);
2781 }
2782 
handle_dispatch_indirect(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2783 static void handle_dispatch_indirect(struct vk_cmd_queue_entry *cmd,
2784                                      struct rendering_state *state)
2785 {
2786    state->dispatch_info.indirect = lvp_buffer_from_handle(cmd->u.dispatch_indirect.buffer)->bo;
2787    state->dispatch_info.indirect_offset = cmd->u.dispatch_indirect.offset;
2788    state->pctx->launch_grid(state->pctx, &state->dispatch_info);
2789 }
2790 
handle_push_constants(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2791 static void handle_push_constants(struct vk_cmd_queue_entry *cmd,
2792                                   struct rendering_state *state)
2793 {
2794    VkPushConstantsInfoKHR *pci = cmd->u.push_constants2_khr.push_constants_info;
2795    memcpy(state->push_constants + pci->offset, pci->pValues, pci->size);
2796 
2797    VkShaderStageFlags stage_flags = pci->stageFlags;
2798    state->pcbuf_dirty[MESA_SHADER_VERTEX] |= (stage_flags & VK_SHADER_STAGE_VERTEX_BIT) > 0;
2799    state->pcbuf_dirty[MESA_SHADER_FRAGMENT] |= (stage_flags & VK_SHADER_STAGE_FRAGMENT_BIT) > 0;
2800    state->pcbuf_dirty[MESA_SHADER_GEOMETRY] |= (stage_flags & VK_SHADER_STAGE_GEOMETRY_BIT) > 0;
2801    state->pcbuf_dirty[MESA_SHADER_TESS_CTRL] |= (stage_flags & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) > 0;
2802    state->pcbuf_dirty[MESA_SHADER_TESS_EVAL] |= (stage_flags & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) > 0;
2803    state->pcbuf_dirty[MESA_SHADER_COMPUTE] |= (stage_flags & VK_SHADER_STAGE_COMPUTE_BIT) > 0;
2804    state->pcbuf_dirty[MESA_SHADER_TASK] |= (stage_flags & VK_SHADER_STAGE_TASK_BIT_EXT) > 0;
2805    state->pcbuf_dirty[MESA_SHADER_MESH] |= (stage_flags & VK_SHADER_STAGE_MESH_BIT_EXT) > 0;
2806    state->pcbuf_dirty[MESA_SHADER_RAYGEN] |= (stage_flags & LVP_RAY_TRACING_STAGES) > 0;
2807    state->inlines_dirty[MESA_SHADER_VERTEX] |= (stage_flags & VK_SHADER_STAGE_VERTEX_BIT) > 0;
2808    state->inlines_dirty[MESA_SHADER_FRAGMENT] |= (stage_flags & VK_SHADER_STAGE_FRAGMENT_BIT) > 0;
2809    state->inlines_dirty[MESA_SHADER_GEOMETRY] |= (stage_flags & VK_SHADER_STAGE_GEOMETRY_BIT) > 0;
2810    state->inlines_dirty[MESA_SHADER_TESS_CTRL] |= (stage_flags & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) > 0;
2811    state->inlines_dirty[MESA_SHADER_TESS_EVAL] |= (stage_flags & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) > 0;
2812    state->inlines_dirty[MESA_SHADER_COMPUTE] |= (stage_flags & VK_SHADER_STAGE_COMPUTE_BIT) > 0;
2813    state->inlines_dirty[MESA_SHADER_TASK] |= (stage_flags & VK_SHADER_STAGE_TASK_BIT_EXT) > 0;
2814    state->inlines_dirty[MESA_SHADER_MESH] |= (stage_flags & VK_SHADER_STAGE_MESH_BIT_EXT) > 0;
2815 }
2816 
2817 static void lvp_execute_cmd_buffer(struct list_head *cmds,
2818                                    struct rendering_state *state, bool print_cmds);
2819 
handle_execute_commands(struct vk_cmd_queue_entry * cmd,struct rendering_state * state,bool print_cmds)2820 static void handle_execute_commands(struct vk_cmd_queue_entry *cmd,
2821                                     struct rendering_state *state, bool print_cmds)
2822 {
2823    for (unsigned i = 0; i < cmd->u.execute_commands.command_buffer_count; i++) {
2824       LVP_FROM_HANDLE(lvp_cmd_buffer, secondary_buf, cmd->u.execute_commands.command_buffers[i]);
2825       lvp_execute_cmd_buffer(&secondary_buf->vk.cmd_queue.cmds, state, print_cmds);
2826    }
2827 }
2828 
handle_event_set2(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2829 static void handle_event_set2(struct vk_cmd_queue_entry *cmd,
2830                              struct rendering_state *state)
2831 {
2832    LVP_FROM_HANDLE(lvp_event, event, cmd->u.set_event2.event);
2833 
2834    VkPipelineStageFlags2 src_stage_mask = 0;
2835 
2836    for (uint32_t i = 0; i < cmd->u.set_event2.dependency_info->memoryBarrierCount; i++)
2837       src_stage_mask |= cmd->u.set_event2.dependency_info->pMemoryBarriers[i].srcStageMask;
2838    for (uint32_t i = 0; i < cmd->u.set_event2.dependency_info->bufferMemoryBarrierCount; i++)
2839       src_stage_mask |= cmd->u.set_event2.dependency_info->pBufferMemoryBarriers[i].srcStageMask;
2840    for (uint32_t i = 0; i < cmd->u.set_event2.dependency_info->imageMemoryBarrierCount; i++)
2841       src_stage_mask |= cmd->u.set_event2.dependency_info->pImageMemoryBarriers[i].srcStageMask;
2842 
2843    if (src_stage_mask & VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)
2844       state->pctx->flush(state->pctx, NULL, 0);
2845    event->event_storage = 1;
2846 }
2847 
handle_event_reset2(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2848 static void handle_event_reset2(struct vk_cmd_queue_entry *cmd,
2849                                struct rendering_state *state)
2850 {
2851    LVP_FROM_HANDLE(lvp_event, event, cmd->u.reset_event2.event);
2852 
2853    if (cmd->u.reset_event2.stage_mask == VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)
2854       state->pctx->flush(state->pctx, NULL, 0);
2855    event->event_storage = 0;
2856 }
2857 
handle_wait_events2(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2858 static void handle_wait_events2(struct vk_cmd_queue_entry *cmd,
2859                                struct rendering_state *state)
2860 {
2861    finish_fence(state);
2862    for (unsigned i = 0; i < cmd->u.wait_events2.event_count; i++) {
2863       LVP_FROM_HANDLE(lvp_event, event, cmd->u.wait_events2.events[i]);
2864 
2865       while (event->event_storage != true);
2866    }
2867 }
2868 
handle_pipeline_barrier(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2869 static void handle_pipeline_barrier(struct vk_cmd_queue_entry *cmd,
2870                                     struct rendering_state *state)
2871 {
2872    finish_fence(state);
2873 }
2874 
handle_begin_query(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2875 static void handle_begin_query(struct vk_cmd_queue_entry *cmd,
2876                                struct rendering_state *state)
2877 {
2878    struct vk_cmd_begin_query *qcmd = &cmd->u.begin_query;
2879    LVP_FROM_HANDLE(lvp_query_pool, pool, qcmd->query_pool);
2880 
2881    if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS &&
2882        pool->pipeline_stats & VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
2883       emit_compute_state(state);
2884 
2885    emit_state(state);
2886 
2887    uint32_t count = util_bitcount(state->info.view_mask ? state->info.view_mask : BITFIELD_BIT(0));
2888    for (unsigned idx = 0; idx < count; idx++) {
2889       if (!pool->queries[qcmd->query + idx]) {
2890          enum pipe_query_type qtype = pool->base_type;
2891          pool->queries[qcmd->query + idx] = state->pctx->create_query(state->pctx,
2892                                                                qtype, 0);
2893       }
2894 
2895       state->pctx->begin_query(state->pctx, pool->queries[qcmd->query + idx]);
2896       if (idx)
2897          state->pctx->end_query(state->pctx, pool->queries[qcmd->query + idx]);
2898    }
2899 }
2900 
handle_end_query(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2901 static void handle_end_query(struct vk_cmd_queue_entry *cmd,
2902                              struct rendering_state *state)
2903 {
2904    struct vk_cmd_end_query *qcmd = &cmd->u.end_query;
2905    LVP_FROM_HANDLE(lvp_query_pool, pool, qcmd->query_pool);
2906    assert(pool->queries[qcmd->query]);
2907 
2908    state->pctx->end_query(state->pctx, pool->queries[qcmd->query]);
2909 }
2910 
2911 
handle_begin_query_indexed_ext(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2912 static void handle_begin_query_indexed_ext(struct vk_cmd_queue_entry *cmd,
2913                                            struct rendering_state *state)
2914 {
2915    struct vk_cmd_begin_query_indexed_ext *qcmd = &cmd->u.begin_query_indexed_ext;
2916    LVP_FROM_HANDLE(lvp_query_pool, pool, qcmd->query_pool);
2917 
2918    if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS &&
2919        pool->pipeline_stats & VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
2920       emit_compute_state(state);
2921 
2922    emit_state(state);
2923 
2924    uint32_t count = util_bitcount(state->info.view_mask ? state->info.view_mask : BITFIELD_BIT(0));
2925    for (unsigned idx = 0; idx < count; idx++) {
2926       if (!pool->queries[qcmd->query + idx]) {
2927          enum pipe_query_type qtype = pool->base_type;
2928          pool->queries[qcmd->query + idx] = state->pctx->create_query(state->pctx,
2929                                                                       qtype, qcmd->index);
2930       }
2931 
2932       state->pctx->begin_query(state->pctx, pool->queries[qcmd->query + idx]);
2933       if (idx)
2934          state->pctx->end_query(state->pctx, pool->queries[qcmd->query + idx]);
2935    }
2936 }
2937 
handle_end_query_indexed_ext(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2938 static void handle_end_query_indexed_ext(struct vk_cmd_queue_entry *cmd,
2939                                          struct rendering_state *state)
2940 {
2941    struct vk_cmd_end_query_indexed_ext *qcmd = &cmd->u.end_query_indexed_ext;
2942    LVP_FROM_HANDLE(lvp_query_pool, pool, qcmd->query_pool);
2943    assert(pool->queries[qcmd->query]);
2944 
2945    state->pctx->end_query(state->pctx, pool->queries[qcmd->query]);
2946 }
2947 
handle_reset_query_pool(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2948 static void handle_reset_query_pool(struct vk_cmd_queue_entry *cmd,
2949                                     struct rendering_state *state)
2950 {
2951    struct vk_cmd_reset_query_pool *qcmd = &cmd->u.reset_query_pool;
2952    LVP_FROM_HANDLE(lvp_query_pool, pool, qcmd->query_pool);
2953    for (unsigned i = qcmd->first_query; i < qcmd->first_query + qcmd->query_count; i++) {
2954       if (pool->queries[i]) {
2955          state->pctx->destroy_query(state->pctx, pool->queries[i]);
2956          pool->queries[i] = NULL;
2957       }
2958    }
2959 }
2960 
handle_write_timestamp2(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2961 static void handle_write_timestamp2(struct vk_cmd_queue_entry *cmd,
2962                                     struct rendering_state *state)
2963 {
2964    struct vk_cmd_write_timestamp2 *qcmd = &cmd->u.write_timestamp2;
2965    LVP_FROM_HANDLE(lvp_query_pool, pool, qcmd->query_pool);
2966 
2967    if (!(qcmd->stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT))
2968       state->pctx->flush(state->pctx, NULL, 0);
2969 
2970    uint32_t count = util_bitcount(state->info.view_mask ? state->info.view_mask : BITFIELD_BIT(0));
2971    for (unsigned idx = 0; idx < count; idx++) {
2972       if (!pool->queries[qcmd->query + idx]) {
2973          pool->queries[qcmd->query + idx] = state->pctx->create_query(state->pctx, PIPE_QUERY_TIMESTAMP, 0);
2974       }
2975 
2976       state->pctx->end_query(state->pctx, pool->queries[qcmd->query + idx]);
2977    }
2978 }
2979 
handle_copy_query_pool_results(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)2980 static void handle_copy_query_pool_results(struct vk_cmd_queue_entry *cmd,
2981                                            struct rendering_state *state)
2982 {
2983    struct vk_cmd_copy_query_pool_results *copycmd = &cmd->u.copy_query_pool_results;
2984    LVP_FROM_HANDLE(lvp_query_pool, pool, copycmd->query_pool);
2985    enum pipe_query_flags flags = (copycmd->flags & VK_QUERY_RESULT_WAIT_BIT) ? PIPE_QUERY_WAIT : 0;
2986 
2987    if (copycmd->flags & VK_QUERY_RESULT_PARTIAL_BIT)
2988       flags |= PIPE_QUERY_PARTIAL;
2989    unsigned result_size = copycmd->flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
2990    for (unsigned i = copycmd->first_query; i < copycmd->first_query + copycmd->query_count; i++) {
2991       unsigned offset = copycmd->dst_offset + (copycmd->stride * (i - copycmd->first_query));
2992 
2993       if (pool->base_type >= PIPE_QUERY_TYPES) {
2994          struct pipe_transfer *transfer;
2995          uint8_t *map = pipe_buffer_map(state->pctx, lvp_buffer_from_handle(copycmd->dst_buffer)->bo, PIPE_MAP_WRITE, &transfer);
2996          map += offset;
2997 
2998          if (flags & VK_QUERY_RESULT_64_BIT) {
2999             uint64_t *dst = (uint64_t *)map;
3000             uint64_t *src = (uint64_t *)pool->data;
3001             *dst = src[i];
3002          } else {
3003             uint32_t *dst = (uint32_t *)map;
3004             uint64_t *src = (uint64_t *)pool->data;
3005             *dst = (uint32_t) (src[i] & UINT32_MAX);
3006          }
3007 
3008          state->pctx->buffer_unmap(state->pctx, transfer);
3009 
3010          continue;
3011       }
3012 
3013       if (pool->queries[i]) {
3014          unsigned num_results = 0;
3015          if (copycmd->flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
3016             if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
3017                num_results = util_bitcount(pool->pipeline_stats);
3018             } else
3019                num_results = pool-> type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ? 2 : 1;
3020             state->pctx->get_query_result_resource(state->pctx,
3021                                                    pool->queries[i],
3022                                                    flags,
3023                                                    copycmd->flags & VK_QUERY_RESULT_64_BIT ? PIPE_QUERY_TYPE_U64 : PIPE_QUERY_TYPE_U32,
3024                                                    -1,
3025                                                    lvp_buffer_from_handle(copycmd->dst_buffer)->bo,
3026                                                    offset + num_results * result_size);
3027          }
3028          if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
3029             num_results = 0;
3030             u_foreach_bit(bit, pool->pipeline_stats)
3031                state->pctx->get_query_result_resource(state->pctx,
3032                                                       pool->queries[i],
3033                                                       flags,
3034                                                       copycmd->flags & VK_QUERY_RESULT_64_BIT ? PIPE_QUERY_TYPE_U64 : PIPE_QUERY_TYPE_U32,
3035                                                       bit,
3036                                                       lvp_buffer_from_handle(copycmd->dst_buffer)->bo,
3037                                                       offset + num_results++ * result_size);
3038          } else {
3039             state->pctx->get_query_result_resource(state->pctx,
3040                                                    pool->queries[i],
3041                                                    flags,
3042                                                    copycmd->flags & VK_QUERY_RESULT_64_BIT ? PIPE_QUERY_TYPE_U64 : PIPE_QUERY_TYPE_U32,
3043                                                    0,
3044                                                    lvp_buffer_from_handle(copycmd->dst_buffer)->bo,
3045                                                    offset);
3046          }
3047       } else {
3048          /* if no queries emitted yet, just reset the buffer to 0 so avail is reported correctly */
3049          if (copycmd->flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
3050             struct pipe_transfer *src_t;
3051             uint32_t *map;
3052 
3053             struct pipe_box box = {0};
3054             box.x = offset;
3055             box.width = copycmd->stride;
3056             box.height = 1;
3057             box.depth = 1;
3058             map = state->pctx->buffer_map(state->pctx,
3059                                             lvp_buffer_from_handle(copycmd->dst_buffer)->bo, 0, PIPE_MAP_READ, &box,
3060                                             &src_t);
3061 
3062             memset(map, 0, box.width);
3063             state->pctx->buffer_unmap(state->pctx, src_t);
3064          }
3065       }
3066    }
3067 }
3068 
handle_clear_color_image(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3069 static void handle_clear_color_image(struct vk_cmd_queue_entry *cmd,
3070                                      struct rendering_state *state)
3071 {
3072    LVP_FROM_HANDLE(lvp_image, image, cmd->u.clear_color_image.image);
3073    union util_color uc;
3074    uint32_t *col_val = uc.ui;
3075    util_pack_color_union(image->planes[0].bo->format, &uc, (void*)cmd->u.clear_color_image.color);
3076    for (unsigned i = 0; i < cmd->u.clear_color_image.range_count; i++) {
3077       VkImageSubresourceRange *range = &cmd->u.clear_color_image.ranges[i];
3078       struct pipe_box box;
3079       box.x = 0;
3080       box.y = 0;
3081       box.z = 0;
3082 
3083       uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
3084       for (unsigned j = range->baseMipLevel; j < range->baseMipLevel + level_count; j++) {
3085          box.width = u_minify(image->planes[0].bo->width0, j);
3086          box.height = u_minify(image->planes[0].bo->height0, j);
3087          box.depth = 1;
3088          if (image->planes[0].bo->target == PIPE_TEXTURE_3D) {
3089             box.depth = u_minify(image->planes[0].bo->depth0, j);
3090          } else if (image->planes[0].bo->target == PIPE_TEXTURE_1D_ARRAY) {
3091             box.y = range->baseArrayLayer;
3092             box.height = vk_image_subresource_layer_count(&image->vk, range);
3093             box.depth = 1;
3094          } else {
3095             box.z = range->baseArrayLayer;
3096             box.depth = vk_image_subresource_layer_count(&image->vk, range);
3097          }
3098 
3099          state->pctx->clear_texture(state->pctx, image->planes[0].bo,
3100                                     j, &box, (void *)col_val);
3101       }
3102    }
3103 }
3104 
handle_clear_ds_image(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3105 static void handle_clear_ds_image(struct vk_cmd_queue_entry *cmd,
3106                                   struct rendering_state *state)
3107 {
3108    LVP_FROM_HANDLE(lvp_image, image, cmd->u.clear_depth_stencil_image.image);
3109    for (unsigned i = 0; i < cmd->u.clear_depth_stencil_image.range_count; i++) {
3110       VkImageSubresourceRange *range = &cmd->u.clear_depth_stencil_image.ranges[i];
3111       uint32_t ds_clear_flags = 0;
3112       if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
3113          ds_clear_flags |= PIPE_CLEAR_DEPTH;
3114       if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
3115          ds_clear_flags |= PIPE_CLEAR_STENCIL;
3116 
3117       uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
3118       for (unsigned j = 0; j < level_count; j++) {
3119          struct pipe_surface *surf;
3120          unsigned width, height, depth;
3121          width = u_minify(image->planes[0].bo->width0, range->baseMipLevel + j);
3122          height = u_minify(image->planes[0].bo->height0, range->baseMipLevel + j);
3123 
3124          if (image->planes[0].bo->target == PIPE_TEXTURE_3D) {
3125             depth = u_minify(image->planes[0].bo->depth0, range->baseMipLevel + j);
3126          } else {
3127             depth = vk_image_subresource_layer_count(&image->vk, range);
3128          }
3129 
3130          surf = create_img_surface_bo(state, range,
3131                                       image->planes[0].bo, image->planes[0].bo->format,
3132                                       width, height,
3133                                       0, depth, j);
3134 
3135          state->pctx->clear_depth_stencil(state->pctx,
3136                                           surf,
3137                                           ds_clear_flags,
3138                                           cmd->u.clear_depth_stencil_image.depth_stencil->depth,
3139                                           cmd->u.clear_depth_stencil_image.depth_stencil->stencil,
3140                                           0, 0,
3141                                           width, height, false);
3142          state->pctx->surface_destroy(state->pctx, surf);
3143       }
3144    }
3145 }
3146 
handle_clear_attachments(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3147 static void handle_clear_attachments(struct vk_cmd_queue_entry *cmd,
3148                                      struct rendering_state *state)
3149 {
3150    for (uint32_t a = 0; a < cmd->u.clear_attachments.attachment_count; a++) {
3151       VkClearAttachment *att = &cmd->u.clear_attachments.attachments[a];
3152       struct lvp_image_view *imgv;
3153 
3154       if (att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
3155          imgv = state->color_att[att->colorAttachment].imgv;
3156       } else {
3157          imgv = state->ds_imgv;
3158       }
3159       if (!imgv)
3160          continue;
3161 
3162       union pipe_color_union col_val;
3163       double dclear_val = 0;
3164       uint32_t sclear_val = 0;
3165       uint32_t ds_clear_flags = 0;
3166       if (att->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
3167          ds_clear_flags |= PIPE_CLEAR_DEPTH;
3168          dclear_val = att->clearValue.depthStencil.depth;
3169       }
3170       if (att->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
3171          ds_clear_flags |= PIPE_CLEAR_STENCIL;
3172          sclear_val = att->clearValue.depthStencil.stencil;
3173       }
3174       if (att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
3175          for (unsigned i = 0; i < 4; i++)
3176             col_val.ui[i] = att->clearValue.color.uint32[i];
3177       }
3178 
3179       for (uint32_t r = 0; r < cmd->u.clear_attachments.rect_count; r++) {
3180 
3181          VkClearRect *rect = &cmd->u.clear_attachments.rects[r];
3182          /* avoid crashing on spec violations */
3183          rect->rect.offset.x = MAX2(rect->rect.offset.x, 0);
3184          rect->rect.offset.y = MAX2(rect->rect.offset.y, 0);
3185          rect->rect.extent.width = MIN2(rect->rect.extent.width, state->framebuffer.width - rect->rect.offset.x);
3186          rect->rect.extent.height = MIN2(rect->rect.extent.height, state->framebuffer.height - rect->rect.offset.y);
3187          if (state->info.view_mask) {
3188             u_foreach_bit(i, state->info.view_mask)
3189                clear_attachment_layers(state, imgv, &rect->rect,
3190                                        i, 1,
3191                                        ds_clear_flags, dclear_val, sclear_val,
3192                                        &col_val);
3193          } else
3194             clear_attachment_layers(state, imgv, &rect->rect,
3195                                     rect->baseArrayLayer, rect->layerCount,
3196                                     ds_clear_flags, dclear_val, sclear_val,
3197                                     &col_val);
3198       }
3199    }
3200 }
3201 
handle_resolve_image(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3202 static void handle_resolve_image(struct vk_cmd_queue_entry *cmd,
3203                                  struct rendering_state *state)
3204 {
3205    VkResolveImageInfo2 *resolvecmd = cmd->u.resolve_image2.resolve_image_info;
3206    LVP_FROM_HANDLE(lvp_image, src_image, resolvecmd->srcImage);
3207    LVP_FROM_HANDLE(lvp_image, dst_image, resolvecmd->dstImage);
3208 
3209    struct pipe_blit_info info = {0};
3210    info.src.resource = src_image->planes[0].bo;
3211    info.dst.resource = dst_image->planes[0].bo;
3212    info.src.format = src_image->planes[0].bo->format;
3213    info.dst.format = dst_image->planes[0].bo->format;
3214    info.mask = util_format_is_depth_or_stencil(info.src.format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA;
3215    info.filter = PIPE_TEX_FILTER_NEAREST;
3216 
3217    for (uint32_t i = 0; i < resolvecmd->regionCount; i++) {
3218       int srcX0, srcY0;
3219       unsigned dstX0, dstY0;
3220 
3221       srcX0 = resolvecmd->pRegions[i].srcOffset.x;
3222       srcY0 = resolvecmd->pRegions[i].srcOffset.y;
3223 
3224       dstX0 = resolvecmd->pRegions[i].dstOffset.x;
3225       dstY0 = resolvecmd->pRegions[i].dstOffset.y;
3226 
3227       info.dst.box.x = dstX0;
3228       info.dst.box.y = dstY0;
3229       info.src.box.x = srcX0;
3230       info.src.box.y = srcY0;
3231 
3232       info.dst.box.width = resolvecmd->pRegions[i].extent.width;
3233       info.src.box.width = resolvecmd->pRegions[i].extent.width;
3234       info.dst.box.height = resolvecmd->pRegions[i].extent.height;
3235       info.src.box.height = resolvecmd->pRegions[i].extent.height;
3236 
3237       info.dst.box.depth = subresource_layercount(dst_image, &resolvecmd->pRegions[i].dstSubresource);
3238       info.src.box.depth = subresource_layercount(src_image, &resolvecmd->pRegions[i].srcSubresource);
3239 
3240       info.src.level = resolvecmd->pRegions[i].srcSubresource.mipLevel;
3241       info.src.box.z = resolvecmd->pRegions[i].srcOffset.z + resolvecmd->pRegions[i].srcSubresource.baseArrayLayer;
3242 
3243       info.dst.level = resolvecmd->pRegions[i].dstSubresource.mipLevel;
3244       info.dst.box.z = resolvecmd->pRegions[i].dstOffset.z + resolvecmd->pRegions[i].dstSubresource.baseArrayLayer;
3245 
3246       state->pctx->blit(state->pctx, &info);
3247    }
3248 }
3249 
handle_draw_indirect_count(struct vk_cmd_queue_entry * cmd,struct rendering_state * state,bool indexed)3250 static void handle_draw_indirect_count(struct vk_cmd_queue_entry *cmd,
3251                                        struct rendering_state *state, bool indexed)
3252 {
3253    struct pipe_draw_start_count_bias draw = {0};
3254    struct pipe_resource *index = NULL;
3255    if (indexed) {
3256       state->info.index_bounds_valid = false;
3257       state->info.index_size = state->index_size;
3258       state->info.index.resource = state->index_buffer;
3259       state->info.max_index = ~0U;
3260       if (state->index_offset || state->index_buffer_size != UINT32_MAX) {
3261          struct pipe_transfer *xfer;
3262          uint8_t *mem = pipe_buffer_map(state->pctx, state->index_buffer, 0, &xfer);
3263          state->pctx->buffer_unmap(state->pctx, xfer);
3264          index = get_buffer_resource(state->pctx, mem + state->index_offset);
3265          index->width0 = MIN2(state->index_buffer->width0 - state->index_offset, state->index_buffer_size);
3266          state->info.index.resource = index;
3267       }
3268    } else
3269       state->info.index_size = 0;
3270    state->indirect_info.offset = cmd->u.draw_indirect_count.offset;
3271    state->indirect_info.stride = cmd->u.draw_indirect_count.stride;
3272    state->indirect_info.draw_count = cmd->u.draw_indirect_count.max_draw_count;
3273    state->indirect_info.buffer = lvp_buffer_from_handle(cmd->u.draw_indirect_count.buffer)->bo;
3274    state->indirect_info.indirect_draw_count_offset = cmd->u.draw_indirect_count.count_buffer_offset;
3275    state->indirect_info.indirect_draw_count = lvp_buffer_from_handle(cmd->u.draw_indirect_count.count_buffer)->bo;
3276 
3277    state->pctx->draw_vbo(state->pctx, &state->info, 0, &state->indirect_info, &draw, 1);
3278    pipe_resource_reference(&index, NULL);
3279 }
3280 
handle_push_descriptor_set(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3281 static void handle_push_descriptor_set(struct vk_cmd_queue_entry *cmd,
3282                                        struct rendering_state *state)
3283 {
3284    VkPushDescriptorSetInfoKHR *pds = cmd->u.push_descriptor_set2_khr.push_descriptor_set_info;
3285    LVP_FROM_HANDLE(lvp_pipeline_layout, layout, pds->layout);
3286    struct lvp_descriptor_set_layout *set_layout = (struct lvp_descriptor_set_layout *)layout->vk.set_layouts[pds->set];
3287 
3288    struct lvp_descriptor_set *set;
3289    lvp_descriptor_set_create(state->device, set_layout, &set);
3290 
3291    util_dynarray_append(&state->push_desc_sets, struct lvp_descriptor_set *, set);
3292 
3293    uint32_t types = lvp_pipeline_types_from_shader_stages(pds->stageFlags);
3294    u_foreach_bit(pipeline_type, types) {
3295       struct lvp_descriptor_set *base = state->desc_sets[pipeline_type][pds->set];
3296       if (base)
3297          memcpy(set->map, base->map, MIN2(set->bo->width0, base->bo->width0));
3298 
3299       VkDescriptorSet set_handle = lvp_descriptor_set_to_handle(set);
3300 
3301       VkWriteDescriptorSet *writes = (void*)pds->pDescriptorWrites;
3302       for (uint32_t i = 0; i < pds->descriptorWriteCount; i++)
3303          writes[i].dstSet = set_handle;
3304 
3305       lvp_UpdateDescriptorSets(lvp_device_to_handle(state->device), pds->descriptorWriteCount, pds->pDescriptorWrites, 0, NULL);
3306 
3307       VkBindDescriptorSetsInfoKHR bind_info = {
3308          .stageFlags = pds->stageFlags,
3309          .layout = pds->layout,
3310          .firstSet = pds->set,
3311          .descriptorSetCount = 1,
3312          .pDescriptorSets = &set_handle,
3313       };
3314       handle_descriptor_sets(&bind_info, state);
3315    }
3316 }
3317 
handle_push_descriptor_set_with_template(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3318 static void handle_push_descriptor_set_with_template(struct vk_cmd_queue_entry *cmd,
3319                                                      struct rendering_state *state)
3320 {
3321    VkPushDescriptorSetWithTemplateInfoKHR *pds = cmd->u.push_descriptor_set_with_template2_khr.push_descriptor_set_with_template_info;
3322    LVP_FROM_HANDLE(vk_descriptor_update_template, templ, pds->descriptorUpdateTemplate);
3323    LVP_FROM_HANDLE(lvp_pipeline_layout, layout, pds->layout);
3324    struct lvp_descriptor_set_layout *set_layout = (struct lvp_descriptor_set_layout *)layout->vk.set_layouts[pds->set];
3325 
3326    struct lvp_descriptor_set *set;
3327    lvp_descriptor_set_create(state->device, set_layout, &set);
3328 
3329    util_dynarray_append(&state->push_desc_sets, struct lvp_descriptor_set *, set);
3330 
3331    struct lvp_descriptor_set *base = state->desc_sets[lvp_pipeline_type_from_bind_point(templ->bind_point)][pds->set];
3332    if (base)
3333       memcpy(set->map, base->map, MIN2(set->bo->width0, base->bo->width0));
3334 
3335    VkDescriptorSet set_handle = lvp_descriptor_set_to_handle(set);
3336    lvp_descriptor_set_update_with_template(lvp_device_to_handle(state->device), set_handle,
3337                                            pds->descriptorUpdateTemplate, pds->pData);
3338 
3339    VkBindDescriptorSetsInfoKHR bind_cmd = {
3340       .stageFlags = vk_shader_stages_from_bind_point(templ->bind_point),
3341       .layout = pds->layout,
3342       .firstSet = pds->set,
3343       .descriptorSetCount = 1,
3344       .pDescriptorSets = &set_handle,
3345    };
3346    handle_descriptor_sets(&bind_cmd, state);
3347 }
3348 
handle_bind_transform_feedback_buffers(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3349 static void handle_bind_transform_feedback_buffers(struct vk_cmd_queue_entry *cmd,
3350                                                    struct rendering_state *state)
3351 {
3352    struct vk_cmd_bind_transform_feedback_buffers_ext *btfb = &cmd->u.bind_transform_feedback_buffers_ext;
3353 
3354    for (unsigned i = 0; i < btfb->binding_count; i++) {
3355       int idx = i + btfb->first_binding;
3356       uint32_t size;
3357       struct lvp_buffer *buf = lvp_buffer_from_handle(btfb->buffers[i]);
3358 
3359       size = vk_buffer_range(&buf->vk, btfb->offsets[i], btfb->sizes ? btfb->sizes[i] : VK_WHOLE_SIZE);
3360 
3361       if (state->so_targets[idx])
3362          state->pctx->stream_output_target_destroy(state->pctx, state->so_targets[idx]);
3363 
3364       state->so_targets[idx] = state->pctx->create_stream_output_target(state->pctx,
3365                                                                         lvp_buffer_from_handle(btfb->buffers[i])->bo,
3366                                                                         btfb->offsets[i],
3367                                                                         size);
3368    }
3369    state->num_so_targets = btfb->first_binding + btfb->binding_count;
3370 }
3371 
handle_begin_transform_feedback(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3372 static void handle_begin_transform_feedback(struct vk_cmd_queue_entry *cmd,
3373                                             struct rendering_state *state)
3374 {
3375    struct vk_cmd_begin_transform_feedback_ext *btf = &cmd->u.begin_transform_feedback_ext;
3376    uint32_t offsets[4] = {0};
3377 
3378    for (unsigned i = 0; btf->counter_buffers && i < btf->counter_buffer_count; i++) {
3379       if (!btf->counter_buffers[i])
3380          continue;
3381 
3382       pipe_buffer_read(state->pctx,
3383                        btf->counter_buffers ? lvp_buffer_from_handle(btf->counter_buffers[i])->bo : NULL,
3384                        btf->counter_buffer_offsets ? btf->counter_buffer_offsets[i] : 0,
3385                        4,
3386                        &offsets[i]);
3387    }
3388    state->pctx->set_stream_output_targets(state->pctx, state->num_so_targets,
3389                                           state->so_targets, offsets);
3390 }
3391 
handle_end_transform_feedback(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3392 static void handle_end_transform_feedback(struct vk_cmd_queue_entry *cmd,
3393                                           struct rendering_state *state)
3394 {
3395    struct vk_cmd_end_transform_feedback_ext *etf = &cmd->u.end_transform_feedback_ext;
3396 
3397    if (etf->counter_buffer_count) {
3398       for (unsigned i = 0; etf->counter_buffers && i < etf->counter_buffer_count; i++) {
3399          if (!etf->counter_buffers[i])
3400             continue;
3401 
3402          uint32_t offset;
3403          offset = state->pctx->stream_output_target_offset(state->so_targets[i]);
3404 
3405          pipe_buffer_write(state->pctx,
3406                            etf->counter_buffers ? lvp_buffer_from_handle(etf->counter_buffers[i])->bo : NULL,
3407                            etf->counter_buffer_offsets ? etf->counter_buffer_offsets[i] : 0,
3408                            4,
3409                            &offset);
3410       }
3411    }
3412    state->pctx->set_stream_output_targets(state->pctx, 0, NULL, NULL);
3413 }
3414 
handle_draw_indirect_byte_count(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3415 static void handle_draw_indirect_byte_count(struct vk_cmd_queue_entry *cmd,
3416                                             struct rendering_state *state)
3417 {
3418    struct vk_cmd_draw_indirect_byte_count_ext *dibc = &cmd->u.draw_indirect_byte_count_ext;
3419    struct pipe_draw_start_count_bias draw = {0};
3420 
3421    pipe_buffer_read(state->pctx,
3422                     lvp_buffer_from_handle(dibc->counter_buffer)->bo,
3423                     dibc->counter_buffer_offset,
3424                     4, &draw.count);
3425 
3426    state->info.start_instance = cmd->u.draw_indirect_byte_count_ext.first_instance;
3427    state->info.instance_count = cmd->u.draw_indirect_byte_count_ext.instance_count;
3428    state->info.index_size = 0;
3429 
3430    draw.count /= cmd->u.draw_indirect_byte_count_ext.vertex_stride;
3431    state->pctx->draw_vbo(state->pctx, &state->info, 0, NULL, &draw, 1);
3432 }
3433 
handle_begin_conditional_rendering(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3434 static void handle_begin_conditional_rendering(struct vk_cmd_queue_entry *cmd,
3435                                                struct rendering_state *state)
3436 {
3437    struct VkConditionalRenderingBeginInfoEXT *bcr = cmd->u.begin_conditional_rendering_ext.conditional_rendering_begin;
3438    state->render_cond = true;
3439    state->pctx->render_condition_mem(state->pctx,
3440                                      lvp_buffer_from_handle(bcr->buffer)->bo,
3441                                      bcr->offset,
3442                                      bcr->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT);
3443 }
3444 
handle_end_conditional_rendering(struct rendering_state * state)3445 static void handle_end_conditional_rendering(struct rendering_state *state)
3446 {
3447    state->render_cond = false;
3448    state->pctx->render_condition_mem(state->pctx, NULL, 0, false);
3449 }
3450 
handle_set_vertex_input(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3451 static void handle_set_vertex_input(struct vk_cmd_queue_entry *cmd,
3452                                     struct rendering_state *state)
3453 {
3454    const struct vk_cmd_set_vertex_input_ext *vertex_input = &cmd->u.set_vertex_input_ext;
3455    const struct VkVertexInputBindingDescription2EXT *bindings = vertex_input->vertex_binding_descriptions;
3456    const struct VkVertexInputAttributeDescription2EXT *attrs = vertex_input->vertex_attribute_descriptions;
3457    int max_location = -1;
3458    for (unsigned i = 0; i < vertex_input->vertex_attribute_description_count; i++) {
3459       const struct VkVertexInputBindingDescription2EXT *binding = NULL;
3460       unsigned location = attrs[i].location;
3461 
3462       for (unsigned j = 0; j < vertex_input->vertex_binding_description_count; j++) {
3463          const struct VkVertexInputBindingDescription2EXT *b = &bindings[j];
3464          if (b->binding == attrs[i].binding) {
3465             binding = b;
3466             break;
3467          }
3468       }
3469       assert(binding);
3470       state->velem.velems[location].src_offset = attrs[i].offset;
3471       state->vertex_buffer_index[location] = attrs[i].binding;
3472       state->velem.velems[location].src_format = lvp_vk_format_to_pipe_format(attrs[i].format);
3473       state->velem.velems[location].src_stride = binding->stride;
3474       uint32_t d = binding->divisor;
3475       switch (binding->inputRate) {
3476       case VK_VERTEX_INPUT_RATE_VERTEX:
3477          state->velem.velems[location].instance_divisor = 0;
3478          break;
3479       case VK_VERTEX_INPUT_RATE_INSTANCE:
3480          state->velem.velems[location].instance_divisor = d ? d : UINT32_MAX;
3481          break;
3482       default:
3483          assert(0);
3484          break;
3485       }
3486 
3487       if ((int)location > max_location)
3488          max_location = location;
3489    }
3490    state->velem.count = max_location + 1;
3491    state->vb_strides_dirty = false;
3492    state->vb_dirty = true;
3493    state->ve_dirty = true;
3494 }
3495 
handle_set_cull_mode(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3496 static void handle_set_cull_mode(struct vk_cmd_queue_entry *cmd,
3497                                  struct rendering_state *state)
3498 {
3499    state->rs_state.cull_face = vk_cull_to_pipe(cmd->u.set_cull_mode.cull_mode);
3500    state->rs_dirty = true;
3501 }
3502 
handle_set_front_face(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3503 static void handle_set_front_face(struct vk_cmd_queue_entry *cmd,
3504                                   struct rendering_state *state)
3505 {
3506    state->rs_state.front_ccw = (cmd->u.set_front_face.front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE);
3507    state->rs_dirty = true;
3508 }
3509 
handle_set_primitive_topology(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3510 static void handle_set_primitive_topology(struct vk_cmd_queue_entry *cmd,
3511                                           struct rendering_state *state)
3512 {
3513    state->info.mode = vk_conv_topology(cmd->u.set_primitive_topology.primitive_topology);
3514    state->rs_dirty = true;
3515 }
3516 
handle_set_depth_test_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3517 static void handle_set_depth_test_enable(struct vk_cmd_queue_entry *cmd,
3518                                          struct rendering_state *state)
3519 {
3520    state->dsa_dirty |= state->dsa_state.depth_enabled != cmd->u.set_depth_test_enable.depth_test_enable;
3521    state->dsa_state.depth_enabled = cmd->u.set_depth_test_enable.depth_test_enable;
3522 }
3523 
handle_set_depth_write_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3524 static void handle_set_depth_write_enable(struct vk_cmd_queue_entry *cmd,
3525                                           struct rendering_state *state)
3526 {
3527    state->dsa_dirty |= state->dsa_state.depth_writemask != cmd->u.set_depth_write_enable.depth_write_enable;
3528    state->dsa_state.depth_writemask = cmd->u.set_depth_write_enable.depth_write_enable;
3529 }
3530 
handle_set_depth_compare_op(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3531 static void handle_set_depth_compare_op(struct vk_cmd_queue_entry *cmd,
3532                                         struct rendering_state *state)
3533 {
3534    state->dsa_dirty |= state->dsa_state.depth_func != cmd->u.set_depth_compare_op.depth_compare_op;
3535    state->dsa_state.depth_func = cmd->u.set_depth_compare_op.depth_compare_op;
3536 }
3537 
handle_set_depth_bounds_test_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3538 static void handle_set_depth_bounds_test_enable(struct vk_cmd_queue_entry *cmd,
3539                                                 struct rendering_state *state)
3540 {
3541    state->dsa_dirty |= state->dsa_state.depth_bounds_test != cmd->u.set_depth_bounds_test_enable.depth_bounds_test_enable;
3542    state->dsa_state.depth_bounds_test = cmd->u.set_depth_bounds_test_enable.depth_bounds_test_enable;
3543 }
3544 
handle_set_stencil_test_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3545 static void handle_set_stencil_test_enable(struct vk_cmd_queue_entry *cmd,
3546                                            struct rendering_state *state)
3547 {
3548    state->dsa_dirty |= state->dsa_state.stencil[0].enabled != cmd->u.set_stencil_test_enable.stencil_test_enable ||
3549                        state->dsa_state.stencil[1].enabled != cmd->u.set_stencil_test_enable.stencil_test_enable;
3550    state->dsa_state.stencil[0].enabled = cmd->u.set_stencil_test_enable.stencil_test_enable;
3551    state->dsa_state.stencil[1].enabled = cmd->u.set_stencil_test_enable.stencil_test_enable;
3552 }
3553 
handle_set_stencil_op(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3554 static void handle_set_stencil_op(struct vk_cmd_queue_entry *cmd,
3555                                   struct rendering_state *state)
3556 {
3557    if (cmd->u.set_stencil_op.face_mask & VK_STENCIL_FACE_FRONT_BIT) {
3558       state->dsa_state.stencil[0].func = cmd->u.set_stencil_op.compare_op;
3559       state->dsa_state.stencil[0].fail_op = vk_conv_stencil_op(cmd->u.set_stencil_op.fail_op);
3560       state->dsa_state.stencil[0].zpass_op = vk_conv_stencil_op(cmd->u.set_stencil_op.pass_op);
3561       state->dsa_state.stencil[0].zfail_op = vk_conv_stencil_op(cmd->u.set_stencil_op.depth_fail_op);
3562    }
3563 
3564    if (cmd->u.set_stencil_op.face_mask & VK_STENCIL_FACE_BACK_BIT) {
3565       state->dsa_state.stencil[1].func = cmd->u.set_stencil_op.compare_op;
3566       state->dsa_state.stencil[1].fail_op = vk_conv_stencil_op(cmd->u.set_stencil_op.fail_op);
3567       state->dsa_state.stencil[1].zpass_op = vk_conv_stencil_op(cmd->u.set_stencil_op.pass_op);
3568       state->dsa_state.stencil[1].zfail_op = vk_conv_stencil_op(cmd->u.set_stencil_op.depth_fail_op);
3569    }
3570    state->dsa_dirty = true;
3571 }
3572 
handle_set_line_stipple(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3573 static void handle_set_line_stipple(struct vk_cmd_queue_entry *cmd,
3574                                     struct rendering_state *state)
3575 {
3576    state->rs_state.line_stipple_factor = cmd->u.set_line_stipple_khr.line_stipple_factor - 1;
3577    state->rs_state.line_stipple_pattern = cmd->u.set_line_stipple_khr.line_stipple_pattern;
3578    state->rs_dirty = true;
3579 }
3580 
handle_set_depth_bias_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3581 static void handle_set_depth_bias_enable(struct vk_cmd_queue_entry *cmd,
3582                                          struct rendering_state *state)
3583 {
3584    state->rs_dirty |= state->depth_bias.enabled != cmd->u.set_depth_bias_enable.depth_bias_enable;
3585    state->depth_bias.enabled = cmd->u.set_depth_bias_enable.depth_bias_enable;
3586 }
3587 
handle_set_logic_op(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3588 static void handle_set_logic_op(struct vk_cmd_queue_entry *cmd,
3589                                 struct rendering_state *state)
3590 {
3591    unsigned op = vk_logic_op_to_pipe(cmd->u.set_logic_op_ext.logic_op);
3592    state->rs_dirty |= state->blend_state.logicop_func != op;
3593    state->blend_state.logicop_func = op;
3594 }
3595 
handle_set_patch_control_points(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3596 static void handle_set_patch_control_points(struct vk_cmd_queue_entry *cmd,
3597                                             struct rendering_state *state)
3598 {
3599    if (state->patch_vertices != cmd->u.set_patch_control_points_ext.patch_control_points)
3600       state->pctx->set_patch_vertices(state->pctx, cmd->u.set_patch_control_points_ext.patch_control_points);
3601    state->patch_vertices = cmd->u.set_patch_control_points_ext.patch_control_points;
3602 }
3603 
handle_set_primitive_restart_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3604 static void handle_set_primitive_restart_enable(struct vk_cmd_queue_entry *cmd,
3605                                                 struct rendering_state *state)
3606 {
3607    state->info.primitive_restart = cmd->u.set_primitive_restart_enable.primitive_restart_enable;
3608 }
3609 
handle_set_rasterizer_discard_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3610 static void handle_set_rasterizer_discard_enable(struct vk_cmd_queue_entry *cmd,
3611                                                  struct rendering_state *state)
3612 {
3613    state->rs_dirty |= state->rs_state.rasterizer_discard != cmd->u.set_rasterizer_discard_enable.rasterizer_discard_enable;
3614    state->rs_state.rasterizer_discard = cmd->u.set_rasterizer_discard_enable.rasterizer_discard_enable;
3615 }
3616 
handle_set_color_write_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3617 static void handle_set_color_write_enable(struct vk_cmd_queue_entry *cmd,
3618                                           struct rendering_state *state)
3619 {
3620    uint8_t disable_mask = 0; //PIPE_MAX_COLOR_BUFS is max attachment count
3621 
3622    for (unsigned i = 0; i < cmd->u.set_color_write_enable_ext.attachment_count; i++) {
3623       /* this is inverted because cmdbufs are zero-initialized, meaning only 'true'
3624        * can be detected with a bool, and the default is to enable color writes
3625        */
3626       if (cmd->u.set_color_write_enable_ext.color_write_enables[i] != VK_TRUE)
3627          disable_mask |= BITFIELD_BIT(i);
3628    }
3629 
3630    state->blend_dirty |= state->color_write_disables != disable_mask;
3631    state->color_write_disables = disable_mask;
3632 }
3633 
handle_set_polygon_mode(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3634 static void handle_set_polygon_mode(struct vk_cmd_queue_entry *cmd,
3635                                     struct rendering_state *state)
3636 {
3637    unsigned polygon_mode = vk_polygon_mode_to_pipe(cmd->u.set_polygon_mode_ext.polygon_mode);
3638    if (state->rs_state.fill_front != polygon_mode)
3639       state->rs_dirty = true;
3640    state->rs_state.fill_front = polygon_mode;
3641    if (state->rs_state.fill_back != polygon_mode)
3642       state->rs_dirty = true;
3643    state->rs_state.fill_back = polygon_mode;
3644 }
3645 
handle_set_tessellation_domain_origin(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3646 static void handle_set_tessellation_domain_origin(struct vk_cmd_queue_entry *cmd,
3647                                                   struct rendering_state *state)
3648 {
3649    bool tess_ccw = cmd->u.set_tessellation_domain_origin_ext.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT;
3650    if (tess_ccw == state->tess_ccw)
3651       return;
3652    state->tess_ccw = tess_ccw;
3653    if (state->tess_states[state->tess_ccw])
3654       state->pctx->bind_tes_state(state->pctx, state->tess_states[state->tess_ccw]);
3655 }
3656 
handle_set_depth_clamp_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3657 static void handle_set_depth_clamp_enable(struct vk_cmd_queue_entry *cmd,
3658                                           struct rendering_state *state)
3659 {
3660    state->rs_dirty |= state->rs_state.depth_clamp != cmd->u.set_depth_clamp_enable_ext.depth_clamp_enable;
3661    state->rs_state.depth_clamp = !!cmd->u.set_depth_clamp_enable_ext.depth_clamp_enable;
3662    if (state->depth_clamp_sets_clip)
3663       state->rs_state.depth_clip_near = state->rs_state.depth_clip_far = !state->rs_state.depth_clamp;
3664 }
3665 
handle_set_depth_clip_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3666 static void handle_set_depth_clip_enable(struct vk_cmd_queue_entry *cmd,
3667                                          struct rendering_state *state)
3668 {
3669    state->rs_dirty |= state->rs_state.depth_clip_far != !!cmd->u.set_depth_clip_enable_ext.depth_clip_enable;
3670    state->rs_state.depth_clip_near = state->rs_state.depth_clip_far = !!cmd->u.set_depth_clip_enable_ext.depth_clip_enable;
3671 }
3672 
handle_set_logic_op_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3673 static void handle_set_logic_op_enable(struct vk_cmd_queue_entry *cmd,
3674                                          struct rendering_state *state)
3675 {
3676    state->blend_dirty |= state->blend_state.logicop_enable != !!cmd->u.set_logic_op_enable_ext.logic_op_enable;
3677    state->blend_state.logicop_enable = !!cmd->u.set_logic_op_enable_ext.logic_op_enable;
3678 }
3679 
handle_set_sample_mask(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3680 static void handle_set_sample_mask(struct vk_cmd_queue_entry *cmd,
3681                                    struct rendering_state *state)
3682 {
3683    unsigned mask = cmd->u.set_sample_mask_ext.sample_mask ? cmd->u.set_sample_mask_ext.sample_mask[0] : 0xffffffff;
3684    state->sample_mask_dirty |= state->sample_mask != mask;
3685    state->sample_mask = mask;
3686 }
3687 
handle_set_samples(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3688 static void handle_set_samples(struct vk_cmd_queue_entry *cmd,
3689                                struct rendering_state *state)
3690 {
3691    update_samples(state, cmd->u.set_rasterization_samples_ext.rasterization_samples);
3692 }
3693 
handle_set_alpha_to_coverage(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3694 static void handle_set_alpha_to_coverage(struct vk_cmd_queue_entry *cmd,
3695                                          struct rendering_state *state)
3696 {
3697    state->blend_dirty |=
3698       state->blend_state.alpha_to_coverage != !!cmd->u.set_alpha_to_coverage_enable_ext.alpha_to_coverage_enable;
3699    state->blend_state.alpha_to_coverage = !!cmd->u.set_alpha_to_coverage_enable_ext.alpha_to_coverage_enable;
3700 }
3701 
handle_set_alpha_to_one(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3702 static void handle_set_alpha_to_one(struct vk_cmd_queue_entry *cmd,
3703                                          struct rendering_state *state)
3704 {
3705    state->blend_dirty |=
3706       state->blend_state.alpha_to_one != !!cmd->u.set_alpha_to_one_enable_ext.alpha_to_one_enable;
3707    state->blend_state.alpha_to_one = !!cmd->u.set_alpha_to_one_enable_ext.alpha_to_one_enable;
3708    if (state->blend_state.alpha_to_one)
3709       state->rs_state.multisample = true;
3710 }
3711 
handle_set_halfz(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3712 static void handle_set_halfz(struct vk_cmd_queue_entry *cmd,
3713                              struct rendering_state *state)
3714 {
3715    if (state->rs_state.clip_halfz == !cmd->u.set_depth_clip_negative_one_to_one_ext.negative_one_to_one)
3716       return;
3717    state->rs_dirty = true;
3718    state->rs_state.clip_halfz = !cmd->u.set_depth_clip_negative_one_to_one_ext.negative_one_to_one;
3719    /* handle dynamic state: convert from one transform to the other */
3720    for (unsigned i = 0; i < state->num_viewports; i++)
3721       set_viewport_depth_xform(state, i);
3722    state->vp_dirty = true;
3723 }
3724 
handle_set_line_rasterization_mode(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3725 static void handle_set_line_rasterization_mode(struct vk_cmd_queue_entry *cmd,
3726                                                struct rendering_state *state)
3727 {
3728    VkLineRasterizationModeKHR lineRasterizationMode = cmd->u.set_line_rasterization_mode_ext.line_rasterization_mode;
3729    /* not even going to bother trying dirty tracking on this */
3730    state->rs_dirty = true;
3731    state->rs_state.line_smooth = lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
3732    state->rs_state.line_rectangular = lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;;
3733    state->disable_multisample = lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR ||
3734                                 lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
3735 }
3736 
handle_set_line_stipple_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3737 static void handle_set_line_stipple_enable(struct vk_cmd_queue_entry *cmd,
3738                                            struct rendering_state *state)
3739 {
3740    state->rs_dirty |= state->rs_state.line_stipple_enable != !!cmd->u.set_line_stipple_enable_ext.stippled_line_enable;
3741    state->rs_state.line_stipple_enable = cmd->u.set_line_stipple_enable_ext.stippled_line_enable;
3742 }
3743 
handle_set_provoking_vertex_mode(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3744 static void handle_set_provoking_vertex_mode(struct vk_cmd_queue_entry *cmd,
3745                                              struct rendering_state *state)
3746 {
3747    bool flatshade_first = cmd->u.set_provoking_vertex_mode_ext.provoking_vertex_mode != VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
3748    state->rs_dirty |= state->rs_state.flatshade_first != flatshade_first;
3749    state->rs_state.flatshade_first = flatshade_first;
3750 }
3751 
handle_set_color_blend_enable(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3752 static void handle_set_color_blend_enable(struct vk_cmd_queue_entry *cmd,
3753                                           struct rendering_state *state)
3754 {
3755    for (unsigned i = 0; i < cmd->u.set_color_blend_enable_ext.attachment_count; i++) {
3756       if (state->blend_state.rt[cmd->u.set_color_blend_enable_ext.first_attachment + i].blend_enable != !!cmd->u.set_color_blend_enable_ext.color_blend_enables[i]) {
3757          state->blend_dirty = true;
3758       }
3759       state->blend_state.rt[cmd->u.set_color_blend_enable_ext.first_attachment + i].blend_enable = !!cmd->u.set_color_blend_enable_ext.color_blend_enables[i];
3760    }
3761 }
3762 
handle_set_color_write_mask(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3763 static void handle_set_color_write_mask(struct vk_cmd_queue_entry *cmd,
3764                                         struct rendering_state *state)
3765 {
3766    for (unsigned i = 0; i < cmd->u.set_color_write_mask_ext.attachment_count; i++) {
3767       if (state->blend_state.rt[cmd->u.set_color_write_mask_ext.first_attachment + i].colormask != cmd->u.set_color_write_mask_ext.color_write_masks[i])
3768          state->blend_dirty = true;
3769       state->blend_state.rt[cmd->u.set_color_write_mask_ext.first_attachment + i].colormask = cmd->u.set_color_write_mask_ext.color_write_masks[i];
3770    }
3771 }
3772 
handle_set_color_blend_equation(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3773 static void handle_set_color_blend_equation(struct vk_cmd_queue_entry *cmd,
3774                                             struct rendering_state *state)
3775 {
3776    const VkColorBlendEquationEXT *cb = cmd->u.set_color_blend_equation_ext.color_blend_equations;
3777    state->blend_dirty = true;
3778    for (unsigned i = 0; i < cmd->u.set_color_blend_equation_ext.attachment_count; i++) {
3779       state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_func = vk_blend_op_to_pipe(cb[i].colorBlendOp);
3780       state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_src_factor = vk_blend_factor_to_pipe(cb[i].srcColorBlendFactor);
3781       state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_dst_factor = vk_blend_factor_to_pipe(cb[i].dstColorBlendFactor);
3782       state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_func = vk_blend_op_to_pipe(cb[i].alphaBlendOp);
3783       state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_src_factor = vk_blend_factor_to_pipe(cb[i].srcAlphaBlendFactor);
3784       state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_dst_factor = vk_blend_factor_to_pipe(cb[i].dstAlphaBlendFactor);
3785 
3786       /* At least llvmpipe applies the blend factor prior to the blend function,
3787        * regardless of what function is used. (like i965 hardware).
3788        * It means for MIN/MAX the blend factor has to be stomped to ONE.
3789        */
3790       if (cb[i].colorBlendOp == VK_BLEND_OP_MIN ||
3791           cb[i].colorBlendOp == VK_BLEND_OP_MAX) {
3792          state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
3793          state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
3794       }
3795 
3796       if (cb[i].alphaBlendOp == VK_BLEND_OP_MIN ||
3797           cb[i].alphaBlendOp == VK_BLEND_OP_MAX) {
3798          state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
3799          state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
3800       }
3801    }
3802 }
3803 
3804 static void
handle_shaders(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3805 handle_shaders(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
3806 {
3807    struct vk_cmd_bind_shaders_ext *bind = &cmd->u.bind_shaders_ext;
3808 
3809    bool gfx = false;
3810    VkShaderStageFlagBits vkstages = 0;
3811    unsigned new_stages = 0;
3812    unsigned null_stages = 0;
3813    for (unsigned i = 0; i < bind->stage_count; i++) {
3814       gl_shader_stage stage = vk_to_mesa_shader_stage(bind->stages[i]);
3815       assert(stage != MESA_SHADER_NONE && stage <= MESA_SHADER_MESH);
3816       LVP_FROM_HANDLE(lvp_shader, shader, bind->shaders ? bind->shaders[i] : VK_NULL_HANDLE);
3817       if (stage == MESA_SHADER_FRAGMENT) {
3818          if (shader) {
3819             state->force_min_sample = shader->pipeline_nir->nir->info.fs.uses_sample_shading;
3820             state->sample_shading = state->force_min_sample;
3821             update_samples(state, state->rast_samples);
3822          } else {
3823             state->force_min_sample = false;
3824             state->sample_shading = false;
3825          }
3826       }
3827       if (shader) {
3828          vkstages |= bind->stages[i];
3829          new_stages |= BITFIELD_BIT(stage);
3830          state->shaders[stage] = shader;
3831       } else {
3832          if (state->shaders[stage])
3833             null_stages |= bind->stages[i];
3834       }
3835 
3836       if (stage != MESA_SHADER_COMPUTE) {
3837          state->gfx_push_sizes[stage] = shader ? shader->layout->push_constant_size : 0;
3838          gfx = true;
3839       } else {
3840          state->push_size[1] = shader ? shader->layout->push_constant_size : 0;
3841       }
3842    }
3843 
3844    if ((new_stages | null_stages) & LVP_STAGE_MASK_GFX) {
3845       VkShaderStageFlags all_gfx = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT;
3846       unbind_graphics_stages(state, null_stages & all_gfx);
3847       handle_graphics_stages(state, vkstages & all_gfx, true);
3848       u_foreach_bit(i, new_stages) {
3849          handle_graphics_layout(state, i, state->shaders[i]->layout);
3850       }
3851    }
3852    /* ignore compute unbinds */
3853    if (new_stages & BITFIELD_BIT(MESA_SHADER_COMPUTE)) {
3854       handle_compute_shader(state, state->shaders[MESA_SHADER_COMPUTE], state->shaders[MESA_SHADER_COMPUTE]->layout);
3855    }
3856 
3857    if (gfx) {
3858       state->push_size[0] = 0;
3859       for (unsigned i = 0; i < ARRAY_SIZE(state->gfx_push_sizes); i++)
3860          state->push_size[0] += state->gfx_push_sizes[i];
3861    }
3862 }
3863 
3864 static void
update_mesh_state(struct rendering_state * state)3865 update_mesh_state(struct rendering_state *state)
3866 {
3867    if (state->shaders[MESA_SHADER_TASK]) {
3868       state->dispatch_info.block[0] = state->shaders[MESA_SHADER_TASK]->pipeline_nir->nir->info.workgroup_size[0];
3869       state->dispatch_info.block[1] = state->shaders[MESA_SHADER_TASK]->pipeline_nir->nir->info.workgroup_size[1];
3870       state->dispatch_info.block[2] = state->shaders[MESA_SHADER_TASK]->pipeline_nir->nir->info.workgroup_size[2];
3871    } else {
3872       state->dispatch_info.block[0] = state->shaders[MESA_SHADER_MESH]->pipeline_nir->nir->info.workgroup_size[0];
3873       state->dispatch_info.block[1] = state->shaders[MESA_SHADER_MESH]->pipeline_nir->nir->info.workgroup_size[1];
3874       state->dispatch_info.block[2] = state->shaders[MESA_SHADER_MESH]->pipeline_nir->nir->info.workgroup_size[2];
3875    }
3876 }
3877 
handle_draw_mesh_tasks(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3878 static void handle_draw_mesh_tasks(struct vk_cmd_queue_entry *cmd,
3879                                    struct rendering_state *state)
3880 {
3881    update_mesh_state(state);
3882    state->dispatch_info.grid[0] = cmd->u.draw_mesh_tasks_ext.group_count_x;
3883    state->dispatch_info.grid[1] = cmd->u.draw_mesh_tasks_ext.group_count_y;
3884    state->dispatch_info.grid[2] = cmd->u.draw_mesh_tasks_ext.group_count_z;
3885    state->dispatch_info.grid_base[0] = 0;
3886    state->dispatch_info.grid_base[1] = 0;
3887    state->dispatch_info.grid_base[2] = 0;
3888    state->dispatch_info.draw_count = 1;
3889    state->dispatch_info.indirect = NULL;
3890    state->pctx->draw_mesh_tasks(state->pctx, 0, &state->dispatch_info);
3891 }
3892 
handle_draw_mesh_tasks_indirect(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3893 static void handle_draw_mesh_tasks_indirect(struct vk_cmd_queue_entry *cmd,
3894                                             struct rendering_state *state)
3895 {
3896    update_mesh_state(state);
3897    state->dispatch_info.indirect = lvp_buffer_from_handle(cmd->u.draw_mesh_tasks_indirect_ext.buffer)->bo;
3898    state->dispatch_info.indirect_offset = cmd->u.draw_mesh_tasks_indirect_ext.offset;
3899    state->dispatch_info.indirect_stride = cmd->u.draw_mesh_tasks_indirect_ext.stride;
3900    state->dispatch_info.draw_count = cmd->u.draw_mesh_tasks_indirect_ext.draw_count;
3901    state->pctx->draw_mesh_tasks(state->pctx, 0, &state->dispatch_info);
3902 }
3903 
handle_draw_mesh_tasks_indirect_count(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)3904 static void handle_draw_mesh_tasks_indirect_count(struct vk_cmd_queue_entry *cmd,
3905                                                   struct rendering_state *state)
3906 {
3907    update_mesh_state(state);
3908    state->dispatch_info.indirect = lvp_buffer_from_handle(cmd->u.draw_mesh_tasks_indirect_count_ext.buffer)->bo;
3909    state->dispatch_info.indirect_offset = cmd->u.draw_mesh_tasks_indirect_count_ext.offset;
3910    state->dispatch_info.indirect_stride = cmd->u.draw_mesh_tasks_indirect_count_ext.stride;
3911    state->dispatch_info.draw_count = cmd->u.draw_mesh_tasks_indirect_count_ext.max_draw_count;
3912    state->dispatch_info.indirect_draw_count_offset = cmd->u.draw_mesh_tasks_indirect_count_ext.count_buffer_offset;
3913    state->dispatch_info.indirect_draw_count = lvp_buffer_from_handle(cmd->u.draw_mesh_tasks_indirect_count_ext.count_buffer)->bo;
3914    state->pctx->draw_mesh_tasks(state->pctx, 0, &state->dispatch_info);
3915 }
3916 
3917 static VkBuffer
get_buffer(struct rendering_state * state,const uint8_t * ptr,size_t * offset)3918 get_buffer(struct rendering_state *state, const uint8_t *ptr, size_t *offset)
3919 {
3920    simple_mtx_lock(&state->device->bda_lock);
3921    hash_table_foreach(&state->device->bda, he) {
3922       const uint8_t *bda = he->key;
3923       if (ptr < bda)
3924          continue;
3925       struct lvp_buffer *buffer = he->data;
3926       if (bda + buffer->vk.size > ptr) {
3927          *offset = ptr - bda;
3928          simple_mtx_unlock(&state->device->bda_lock);
3929          return lvp_buffer_to_handle(buffer);
3930       }
3931    }
3932    fprintf(stderr, "unrecognized BDA!\n");
3933    abort();
3934 }
3935 
3936 static size_t
process_sequence(struct rendering_state * state,VkPipeline pipeline,struct lvp_indirect_command_layout_nv * dlayout,struct list_head * list,uint8_t * pbuf,size_t max_size,uint8_t ** map_streams,const VkIndirectCommandsStreamNV * pstreams,uint32_t seq,bool print_cmds)3937 process_sequence(struct rendering_state *state,
3938                  VkPipeline pipeline, struct lvp_indirect_command_layout_nv *dlayout,
3939                  struct list_head *list, uint8_t *pbuf, size_t max_size,
3940                  uint8_t **map_streams, const VkIndirectCommandsStreamNV *pstreams, uint32_t seq, bool print_cmds)
3941 {
3942    size_t size = 0;
3943 
3944    for (uint32_t t = 0; t < dlayout->token_count; t++){
3945       const VkIndirectCommandsLayoutTokenNV *token = &dlayout->tokens[t];
3946       uint32_t stride = dlayout->stream_strides[token->stream];
3947       uint8_t *stream = map_streams[token->stream];
3948       uint32_t offset = stride * seq + token->offset;
3949       uint32_t draw_offset = offset + pstreams[token->stream].offset;
3950       void *input = stream + offset;
3951 
3952       struct vk_cmd_queue_entry *cmd = (struct vk_cmd_queue_entry*)(pbuf + size);
3953       size_t cmd_size = vk_cmd_queue_type_sizes[lvp_nv_dgc_token_to_cmd_type(token)];
3954       uint8_t *cmdptr = (void*)(pbuf + size + cmd_size);
3955 
3956       if (print_cmds)
3957          fprintf(stderr, "DGC %s\n", vk_IndirectCommandsTokenTypeNV_to_str(token->tokenType));
3958 
3959       if (max_size < size + cmd_size)
3960          abort();
3961       cmd->type = lvp_nv_dgc_token_to_cmd_type(token);
3962 
3963       switch (token->tokenType) {
3964       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV: {
3965          VkBindShaderGroupIndirectCommandNV *bind = input;
3966          cmd->u.bind_pipeline_shader_group_nv.pipeline_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
3967          cmd->u.bind_pipeline_shader_group_nv.pipeline = pipeline;
3968          cmd->u.bind_pipeline_shader_group_nv.group_index = bind->groupIndex;
3969          break;
3970       }
3971       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV: {
3972          VkSetStateFlagsIndirectCommandNV *state = input;
3973          if (token->indirectStateFlags & VK_INDIRECT_STATE_FLAG_FRONTFACE_BIT_NV) {
3974             if (state->data & BITFIELD_BIT(VK_FRONT_FACE_CLOCKWISE)) {
3975                cmd->u.set_front_face.front_face = VK_FRONT_FACE_CLOCKWISE;
3976             } else {
3977                cmd->u.set_front_face.front_face = VK_FRONT_FACE_COUNTER_CLOCKWISE;
3978             }
3979          } else {
3980             /* skip this if unrecognized state flag */
3981             continue;
3982          }
3983          break;
3984       }
3985       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: {
3986          uint32_t *data = input;
3987          cmd_size += token->pushconstantSize + sizeof(VkPushConstantsInfoKHR);
3988          if (max_size < size + cmd_size)
3989             abort();
3990          cmd->u.push_constants2_khr.push_constants_info = (void*)cmdptr;
3991          VkPushConstantsInfoKHR *pci = cmd->u.push_constants2_khr.push_constants_info;
3992          pci->layout = token->pushconstantPipelineLayout;
3993          pci->stageFlags = token->pushconstantShaderStageFlags;
3994          pci->offset = token->pushconstantOffset;
3995          pci->size = token->pushconstantSize;
3996          pci->pValues = (void*)((uint8_t*)cmdptr + sizeof(VkPushConstantsInfoKHR));
3997          memcpy((void*)pci->pValues, data, token->pushconstantSize);
3998          break;
3999       }
4000       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: {
4001          VkBindIndexBufferIndirectCommandNV *data = input;
4002          cmd->u.bind_index_buffer.offset = 0;
4003          if (data->bufferAddress)
4004             cmd->u.bind_index_buffer.buffer = get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_index_buffer.offset);
4005          else
4006             cmd->u.bind_index_buffer.buffer = VK_NULL_HANDLE;
4007          cmd->u.bind_index_buffer.index_type = data->indexType;
4008          for (unsigned i = 0; i < token->indexTypeCount; i++) {
4009             if (data->indexType == token->pIndexTypeValues[i]) {
4010                cmd->u.bind_index_buffer.index_type = token->pIndexTypes[i];
4011                break;
4012             }
4013          }
4014          break;
4015       }
4016       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: {
4017          VkBindVertexBufferIndirectCommandNV *data = input;
4018          cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.buffers) + sizeof(*cmd->u.bind_vertex_buffers2.offsets);
4019          cmd_size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides);
4020          if (max_size < size + cmd_size)
4021             abort();
4022 
4023          cmd->u.bind_vertex_buffers2.first_binding = token->vertexBindingUnit;
4024          cmd->u.bind_vertex_buffers2.binding_count = 1;
4025 
4026          cmd->u.bind_vertex_buffers2.buffers = (void*)cmdptr;
4027          uint32_t alloc_offset = sizeof(*cmd->u.bind_vertex_buffers2.buffers);
4028 
4029          cmd->u.bind_vertex_buffers2.offsets = (void*)(cmdptr + alloc_offset);
4030          alloc_offset += sizeof(*cmd->u.bind_vertex_buffers2.offsets);
4031 
4032          cmd->u.bind_vertex_buffers2.sizes = (void*)(cmdptr + alloc_offset);
4033          alloc_offset += sizeof(*cmd->u.bind_vertex_buffers2.sizes);
4034 
4035          cmd->u.bind_vertex_buffers2.offsets[0] = 0;
4036          cmd->u.bind_vertex_buffers2.buffers[0] = data->bufferAddress ? get_buffer(state, (void*)(uintptr_t)data->bufferAddress, (size_t*)&cmd->u.bind_vertex_buffers2.offsets[0]) : VK_NULL_HANDLE;
4037          cmd->u.bind_vertex_buffers2.sizes[0] = data->size;
4038 
4039          if (token->vertexDynamicStride) {
4040             cmd->u.bind_vertex_buffers2.strides = (void*)(cmdptr + alloc_offset);
4041             cmd->u.bind_vertex_buffers2.strides[0] = data->stride;
4042          } else {
4043             cmd->u.bind_vertex_buffers2.strides = NULL;
4044          }
4045          break;
4046       }
4047       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: {
4048          cmd->u.draw_indexed_indirect.buffer = pstreams[token->stream].buffer;
4049          cmd->u.draw_indexed_indirect.offset = draw_offset;
4050          cmd->u.draw_indexed_indirect.draw_count = 1;
4051          cmd->u.draw_indexed_indirect.stride = 0;
4052          break;
4053       }
4054       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: {
4055          cmd->u.draw_indirect.buffer = pstreams[token->stream].buffer;
4056          cmd->u.draw_indirect.offset = draw_offset;
4057          cmd->u.draw_indirect.draw_count = 1;
4058          cmd->u.draw_indirect.stride = 0;
4059          break;
4060       }
4061       // only available if VK_EXT_mesh_shader is supported
4062       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV: {
4063          cmd->u.draw_mesh_tasks_indirect_ext.buffer = pstreams[token->stream].buffer;
4064          cmd->u.draw_mesh_tasks_indirect_ext.offset = draw_offset;
4065          cmd->u.draw_mesh_tasks_indirect_ext.draw_count = 1;
4066          cmd->u.draw_mesh_tasks_indirect_ext.stride = 0;
4067          break;
4068       }
4069       // only available if VK_NV_mesh_shader is supported
4070       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_TASKS_NV:
4071          unreachable("NV_mesh_shader unsupported!");
4072       default:
4073          unreachable("unknown token type");
4074          break;
4075       }
4076       size += cmd_size;
4077       list_addtail(&cmd->cmd_link, list);
4078    }
4079    return size;
4080 }
4081 
4082 static void
handle_preprocess_generated_commands(struct vk_cmd_queue_entry * cmd,struct rendering_state * state,bool print_cmds)4083 handle_preprocess_generated_commands(struct vk_cmd_queue_entry *cmd, struct rendering_state *state, bool print_cmds)
4084 {
4085    VkGeneratedCommandsInfoNV *pre = cmd->u.preprocess_generated_commands_nv.generated_commands_info;
4086    VK_FROM_HANDLE(lvp_indirect_command_layout_nv, dlayout, pre->indirectCommandsLayout);
4087    struct pipe_transfer *stream_maps[16];
4088    uint8_t *streams[16];
4089    for (unsigned i = 0; i < pre->streamCount; i++) {
4090       struct lvp_buffer *buf = lvp_buffer_from_handle(pre->pStreams[i].buffer);
4091       streams[i] = pipe_buffer_map(state->pctx, buf->bo, PIPE_MAP_READ, &stream_maps[i]);
4092       streams[i] += pre->pStreams[i].offset;
4093    }
4094    LVP_FROM_HANDLE(lvp_buffer, pbuf, pre->preprocessBuffer);
4095    LVP_FROM_HANDLE(lvp_buffer, seqc, pre->sequencesCountBuffer);
4096    LVP_FROM_HANDLE(lvp_buffer, seqi, pre->sequencesIndexBuffer);
4097 
4098    unsigned seq_count = pre->sequencesCount;
4099    if (seqc) {
4100       unsigned count = 0;
4101       pipe_buffer_read(state->pctx, seqc->bo, pre->sequencesCountOffset, sizeof(uint32_t), &count);
4102       seq_count = MIN2(count, seq_count);
4103    }
4104    uint32_t *seq = NULL;
4105    struct pipe_transfer *seq_map = NULL;
4106    if (seqi) {
4107       seq = pipe_buffer_map(state->pctx, seqi->bo, PIPE_MAP_READ, &seq_map);
4108       seq = (uint32_t*)(((uint8_t*)seq) + pre->sequencesIndexOffset);
4109    }
4110 
4111    struct pipe_transfer *pmap;
4112    uint8_t *p = pipe_buffer_map(state->pctx, pbuf->bo, PIPE_MAP_WRITE, &pmap);
4113    p += pre->preprocessOffset;
4114    struct list_head *list = (void*)p;
4115    size_t size = sizeof(struct list_head);
4116    size_t max_size = pre->preprocessSize;
4117    if (size > max_size)
4118       abort();
4119    list_inithead(list);
4120 
4121    size_t offset = size;
4122    for (unsigned i = 0; i < seq_count; i++) {
4123       uint32_t s = seq ? seq[i] : i;
4124       offset += process_sequence(state, pre->pipeline, dlayout, list, p + offset, max_size, streams, pre->pStreams, s, print_cmds);
4125    }
4126 
4127    /* vk_cmd_queue will copy the binary and break the list, so null the tail pointer */
4128    list->prev->next = NULL;
4129 
4130    for (unsigned i = 0; i < pre->streamCount; i++)
4131       state->pctx->buffer_unmap(state->pctx, stream_maps[i]);
4132    state->pctx->buffer_unmap(state->pctx, pmap);
4133    if (seq_map)
4134       state->pctx->buffer_unmap(state->pctx, seq_map);
4135 }
4136 
4137 static void
handle_execute_generated_commands(struct vk_cmd_queue_entry * cmd,struct rendering_state * state,bool print_cmds)4138 handle_execute_generated_commands(struct vk_cmd_queue_entry *cmd, struct rendering_state *state, bool print_cmds)
4139 {
4140    VkGeneratedCommandsInfoNV *gen = cmd->u.execute_generated_commands_nv.generated_commands_info;
4141    struct vk_cmd_execute_generated_commands_nv *exec = &cmd->u.execute_generated_commands_nv;
4142    if (!exec->is_preprocessed) {
4143       struct vk_cmd_queue_entry pre;
4144       pre.u.preprocess_generated_commands_nv.generated_commands_info = exec->generated_commands_info;
4145       handle_preprocess_generated_commands(&pre, state, print_cmds);
4146    }
4147    LVP_FROM_HANDLE(lvp_buffer, pbuf, gen->preprocessBuffer);
4148    struct pipe_transfer *pmap;
4149    uint8_t *p = pipe_buffer_map(state->pctx, pbuf->bo, PIPE_MAP_WRITE, &pmap);
4150    p += gen->preprocessOffset;
4151    struct list_head *list = (void*)p;
4152 
4153    lvp_execute_cmd_buffer(list, state, print_cmds);
4154 
4155    state->pctx->buffer_unmap(state->pctx, pmap);
4156 }
4157 
4158 static void
handle_descriptor_buffers(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4159 handle_descriptor_buffers(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4160 {
4161    const struct vk_cmd_bind_descriptor_buffers_ext *bind = &cmd->u.bind_descriptor_buffers_ext;
4162    for (unsigned i = 0; i < bind->buffer_count; i++) {
4163       struct pipe_resource *pres = get_buffer_resource(state->pctx, (void *)(uintptr_t)bind->binding_infos[i].address);
4164       state->desc_buffer_addrs[i] = (void *)(uintptr_t)bind->binding_infos[i].address;
4165       pipe_resource_reference(&state->desc_buffers[i], pres);
4166       /* leave only one ref on rendering_state */
4167       pipe_resource_reference(&pres, NULL);
4168    }
4169 }
4170 
4171 static bool
descriptor_layouts_equal(const struct lvp_descriptor_set_layout * a,const struct lvp_descriptor_set_layout * b)4172 descriptor_layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b)
4173 {
4174    const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b;
4175    uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout);
4176    uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding);
4177    /* base equal */
4178    if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset))
4179       return false;
4180 
4181    /* bindings equal */
4182    if (a->binding_count != b->binding_count)
4183       return false;
4184    size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout);
4185    const struct lvp_descriptor_set_binding_layout *la = a->binding;
4186    const struct lvp_descriptor_set_binding_layout *lb = b->binding;
4187    if (memcmp(la, lb, binding_size)) {
4188       for (unsigned i = 0; i < a->binding_count; i++) {
4189          if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers)))
4190             return false;
4191       }
4192    }
4193 
4194    /* immutable sampler equal */
4195    if (a->immutable_sampler_count != b->immutable_sampler_count)
4196       return false;
4197    if (a->immutable_sampler_count) {
4198       size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *);
4199       if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) {
4200          struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset);
4201          struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset);
4202          for (unsigned i = 0; i < a->immutable_sampler_count; i++) {
4203             if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler)))
4204                return false;
4205          }
4206       }
4207    }
4208    return true;
4209 }
4210 
4211 static void
bind_db_samplers(struct rendering_state * state,enum lvp_pipeline_type pipeline_type,unsigned set)4212 bind_db_samplers(struct rendering_state *state, enum lvp_pipeline_type pipeline_type, unsigned set)
4213 {
4214    const struct lvp_descriptor_set_layout *set_layout = state->desc_buffer_offsets[pipeline_type][set].sampler_layout;
4215    if (!set_layout)
4216       return;
4217    unsigned buffer_index = state->desc_buffer_offsets[pipeline_type][set].buffer_index;
4218    if (!state->desc_buffer_addrs[buffer_index]) {
4219       if (set_layout->immutable_set) {
4220          state->desc_sets[pipeline_type][set] = set_layout->immutable_set;
4221          if (pipeline_type == LVP_PIPELINE_RAY_TRACING) {
4222             handle_set_stage_buffer(state, set_layout->immutable_set->bo, 0, MESA_SHADER_RAYGEN, set);
4223          } else {
4224             u_foreach_bit(stage, set_layout->shader_stages)
4225                handle_set_stage_buffer(state, set_layout->immutable_set->bo, 0, vk_to_mesa_shader_stage(1<<stage), set);
4226          }
4227       }
4228       return;
4229    }
4230    uint8_t *db = state->desc_buffer_addrs[buffer_index] + state->desc_buffer_offsets[pipeline_type][set].offset;
4231    uint8_t did_update = 0;
4232    for (uint32_t binding_index = 0; binding_index < set_layout->binding_count; binding_index++) {
4233       const struct lvp_descriptor_set_binding_layout *bind_layout = &set_layout->binding[binding_index];
4234       if (!bind_layout->immutable_samplers)
4235          continue;
4236 
4237       struct lp_descriptor *desc = (void*)db;
4238       desc += bind_layout->descriptor_index;
4239 
4240       for (uint32_t sampler_index = 0; sampler_index < bind_layout->array_size; sampler_index++) {
4241          if (bind_layout->immutable_samplers[sampler_index]) {
4242             struct lp_descriptor *immutable_desc = &bind_layout->immutable_samplers[sampler_index]->desc;
4243             desc[sampler_index].sampler = immutable_desc->sampler;
4244             desc[sampler_index].texture.sampler_index = immutable_desc->texture.sampler_index;
4245             if (pipeline_type == LVP_PIPELINE_RAY_TRACING) {
4246                did_update |= BITFIELD_BIT(MESA_SHADER_RAYGEN);
4247             } else {
4248                u_foreach_bit(stage, set_layout->shader_stages)
4249                   did_update |= BITFIELD_BIT(vk_to_mesa_shader_stage(1<<stage));
4250             }
4251          }
4252       }
4253    }
4254    u_foreach_bit(stage, did_update)
4255       state->constbuf_dirty[stage] = true;
4256 }
4257 
4258 static void
handle_descriptor_buffer_embedded_samplers(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4259 handle_descriptor_buffer_embedded_samplers(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4260 {
4261    const VkBindDescriptorBufferEmbeddedSamplersInfoEXT *bind = cmd->u.bind_descriptor_buffer_embedded_samplers2_ext.bind_descriptor_buffer_embedded_samplers_info;
4262    LVP_FROM_HANDLE(lvp_pipeline_layout, layout, bind->layout);
4263 
4264    if (!layout->vk.set_layouts[bind->set])
4265       return;
4266 
4267    const struct lvp_descriptor_set_layout *set_layout = get_set_layout(layout, bind->set);
4268    if (!set_layout->immutable_sampler_count)
4269       return;
4270    uint32_t types = lvp_pipeline_types_from_shader_stages(bind->stageFlags);
4271    u_foreach_bit(pipeline_type, types) {
4272       state->desc_buffer_offsets[pipeline_type][bind->set].sampler_layout = set_layout;
4273       bind_db_samplers(state, pipeline_type, bind->set);
4274    }
4275 }
4276 
4277 static void
handle_descriptor_buffer_offsets(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4278 handle_descriptor_buffer_offsets(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4279 {
4280    VkSetDescriptorBufferOffsetsInfoEXT *dbo = cmd->u.set_descriptor_buffer_offsets2_ext.set_descriptor_buffer_offsets_info;
4281    uint32_t types = lvp_pipeline_types_from_shader_stages(dbo->stageFlags);
4282    u_foreach_bit(pipeline_type, types) {
4283       for (unsigned i = 0; i < dbo->setCount; i++) {
4284          LVP_FROM_HANDLE(lvp_pipeline_layout, layout, dbo->layout);
4285          unsigned idx = dbo->firstSet + i;
4286          state->desc_buffer_offsets[pipeline_type][idx].buffer_index = dbo->pBufferIndices[i];
4287          state->desc_buffer_offsets[pipeline_type][idx].offset = dbo->pOffsets[i];
4288          const struct lvp_descriptor_set_layout *set_layout = get_set_layout(layout, idx);
4289 
4290          if (pipeline_type == LVP_PIPELINE_RAY_TRACING) {
4291             handle_set_stage_buffer(state, state->desc_buffers[dbo->pBufferIndices[i]], dbo->pOffsets[i], MESA_SHADER_RAYGEN, idx);
4292          } else {
4293             /* set for all stages */
4294             u_foreach_bit(stage, set_layout->shader_stages) {
4295                gl_shader_stage pstage = vk_to_mesa_shader_stage(1<<stage);
4296                handle_set_stage_buffer(state, state->desc_buffers[dbo->pBufferIndices[i]], dbo->pOffsets[i], pstage, idx);
4297             }
4298          }
4299          bind_db_samplers(state, pipeline_type, idx);
4300       }
4301    }
4302 }
4303 
4304 static void *
lvp_push_internal_buffer(struct rendering_state * state,gl_shader_stage stage,uint32_t size)4305 lvp_push_internal_buffer(struct rendering_state *state, gl_shader_stage stage, uint32_t size)
4306 {
4307    if (!size)
4308       return NULL;
4309 
4310    struct pipe_shader_buffer buffer = {
4311       .buffer_size = size,
4312    };
4313 
4314    uint8_t *mem;
4315    u_upload_alloc(state->uploader, 0, size, 64, &buffer.buffer_offset, &buffer.buffer, (void**)&mem);
4316 
4317    state->pctx->set_shader_buffers(state->pctx, stage, 0, 1, &buffer, 0x1);
4318 
4319    util_dynarray_append(&state->internal_buffers, struct pipe_resource *, buffer.buffer);
4320 
4321    return mem;
4322 }
4323 
4324 #ifdef VK_ENABLE_BETA_EXTENSIONS
4325 
4326 static void
dispatch_graph(struct rendering_state * state,const VkDispatchGraphInfoAMDX * info,void * scratch)4327 dispatch_graph(struct rendering_state *state, const VkDispatchGraphInfoAMDX *info, void *scratch)
4328 {
4329    VK_FROM_HANDLE(lvp_pipeline, pipeline, state->exec_graph->groups[info->nodeIndex]);
4330    struct lvp_shader *shader = &pipeline->shaders[MESA_SHADER_COMPUTE];
4331    nir_shader *nir = shader->pipeline_nir->nir;
4332 
4333    VkPipelineShaderStageNodeCreateInfoAMDX enqueue_node_info = {
4334       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX,
4335       .pName = pipeline->exec_graph.next_name,
4336    };
4337 
4338    for (uint32_t i = 0; i < info->payloadCount; i++) {
4339       const void *payload = (const void *)((const uint8_t *)info->payloads.hostAddress + i * info->payloadStride);
4340 
4341       /* The spec doesn't specify any useful limits for enqueued payloads.
4342        * Since we allocate them in scratch memory (provided to the dispatch entrypoint),
4343        * we need to execute recursive shaders one to keep scratch requirements finite.
4344        */
4345       VkDispatchIndirectCommand dispatch = *(const VkDispatchIndirectCommand *)payload;
4346       if (nir->info.cs.workgroup_count[0]) {
4347          dispatch.x = nir->info.cs.workgroup_count[0];
4348          dispatch.y = nir->info.cs.workgroup_count[1];
4349          dispatch.z = nir->info.cs.workgroup_count[2];
4350       }
4351 
4352       state->dispatch_info.indirect = NULL;
4353       state->dispatch_info.grid[0] = 1;
4354       state->dispatch_info.grid[1] = 1;
4355       state->dispatch_info.grid[2] = 1;
4356 
4357       for (uint32_t z = 0; z < dispatch.z; z++) {
4358          for (uint32_t y = 0; y < dispatch.y; y++) {
4359             for (uint32_t x = 0; x < dispatch.x; x++) {
4360                handle_compute_shader(state, shader, pipeline->layout);
4361                emit_compute_state(state);
4362 
4363                state->dispatch_info.grid_base[0] = x;
4364                state->dispatch_info.grid_base[1] = y;
4365                state->dispatch_info.grid_base[2] = z;
4366 
4367                struct lvp_exec_graph_internal_data *internal_data =
4368                   lvp_push_internal_buffer(state, MESA_SHADER_COMPUTE, sizeof(struct lvp_exec_graph_internal_data));
4369                internal_data->payload_in = (void *)payload;
4370                internal_data->payloads = (void *)scratch;
4371 
4372                state->pctx->launch_grid(state->pctx, &state->dispatch_info);
4373 
4374                /* Amazing performance. */
4375                finish_fence(state);
4376 
4377                for (uint32_t enqueue = 0; enqueue < ARRAY_SIZE(internal_data->outputs); enqueue++) {
4378                   struct lvp_exec_graph_shader_output *output = &internal_data->outputs[enqueue];
4379                   if (!output->payload_count)
4380                      continue;
4381 
4382                   VkDispatchGraphInfoAMDX enqueue_info = {
4383                      .payloadCount = output->payload_count,
4384                      .payloads.hostAddress = (uint8_t *)scratch + enqueue * nir->info.cs.node_payloads_size,
4385                      .payloadStride = nir->info.cs.node_payloads_size,
4386                   };
4387 
4388                   enqueue_node_info.index = output->node_index;
4389 
4390                   ASSERTED VkResult result = lvp_GetExecutionGraphPipelineNodeIndexAMDX(
4391                      lvp_device_to_handle(state->device), lvp_pipeline_to_handle(state->exec_graph),
4392                      &enqueue_node_info, &enqueue_info.nodeIndex);
4393                   assert(result == VK_SUCCESS);
4394 
4395                   dispatch_graph(state, &enqueue_info, (uint8_t *)scratch + pipeline->exec_graph.scratch_size);
4396                }
4397             }
4398          }
4399       }
4400    }
4401 }
4402 
4403 static void
handle_dispatch_graph(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4404 handle_dispatch_graph(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4405 {
4406    const struct vk_cmd_dispatch_graph_amdx *dispatch = &cmd->u.dispatch_graph_amdx;
4407 
4408    for (uint32_t i = 0; i < dispatch->count_info->count; i++) {
4409       const VkDispatchGraphInfoAMDX *info = (const void *)((const uint8_t *)dispatch->count_info->infos.hostAddress +
4410                                                            i * dispatch->count_info->stride);
4411 
4412       dispatch_graph(state, info, (void *)(uintptr_t)dispatch->scratch);
4413    }
4414 }
4415 #endif
4416 
4417 static struct pipe_resource *
get_buffer_pipe(struct rendering_state * state,const void * ptr)4418 get_buffer_pipe(struct rendering_state *state, const void *ptr)
4419 {
4420    size_t offset;
4421    VK_FROM_HANDLE(lvp_buffer, buffer, get_buffer(state, ptr, &offset));
4422    return buffer->bo;
4423 }
4424 
4425 static void
handle_copy_acceleration_structure(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4426 handle_copy_acceleration_structure(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4427 {
4428    struct vk_cmd_copy_acceleration_structure_khr *copy = &cmd->u.copy_acceleration_structure_khr;
4429 
4430    VK_FROM_HANDLE(vk_acceleration_structure, src, copy->info->src);
4431    VK_FROM_HANDLE(vk_acceleration_structure, dst, copy->info->dst);
4432 
4433    struct pipe_box box = { 0 };
4434    u_box_1d(src->offset, MIN2(src->size, dst->size), &box);
4435    state->pctx->resource_copy_region(state->pctx, lvp_buffer_from_handle(dst->buffer)->bo, 0,
4436                                      dst->offset, 0, 0,
4437                                      lvp_buffer_from_handle(src->buffer)->bo, 0, &box);
4438 }
4439 
4440 static void
handle_copy_memory_to_acceleration_structure(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4441 handle_copy_memory_to_acceleration_structure(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4442 {
4443    struct vk_cmd_copy_memory_to_acceleration_structure_khr *copy = &cmd->u.copy_memory_to_acceleration_structure_khr;
4444 
4445    VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, copy->info->dst);
4446 
4447    struct lvp_bvh_header *dst = (void *)(uintptr_t)vk_acceleration_structure_get_va(accel_struct);
4448    const struct lvp_accel_struct_serialization_header *src = copy->info->src.hostAddress;
4449 
4450    memcpy(dst, &src->instances[src->instance_count], src->compacted_size);
4451 
4452    for (uint32_t i = 0; i < src->instance_count; i++) {
4453       uint8_t *leaf_nodes = (uint8_t *)dst;
4454       leaf_nodes += dst->leaf_nodes_offset;
4455       struct lvp_bvh_instance_node *node = (struct lvp_bvh_instance_node *)leaf_nodes;
4456       node[i].bvh_ptr = src->instances[i];
4457    }
4458 }
4459 
4460 static void
handle_copy_acceleration_structure_to_memory(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4461 handle_copy_acceleration_structure_to_memory(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4462 {
4463    struct vk_cmd_copy_acceleration_structure_to_memory_khr *copy = &cmd->u.copy_acceleration_structure_to_memory_khr;
4464 
4465    VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, copy->info->src);
4466 
4467    struct lvp_bvh_header *src = (void *)(uintptr_t)vk_acceleration_structure_get_va(accel_struct);
4468    struct lvp_accel_struct_serialization_header *dst = copy->info->dst.hostAddress;
4469 
4470    lvp_device_get_cache_uuid(dst->driver_uuid);
4471    lvp_device_get_cache_uuid(dst->accel_struct_compat);
4472    dst->serialization_size = src->serialization_size;
4473    dst->compacted_size = accel_struct->size;
4474    dst->instance_count = src->instance_count;
4475 
4476    for (uint32_t i = 0; i < src->instance_count; i++) {
4477       uint8_t *leaf_nodes = (uint8_t *)src;
4478       leaf_nodes += src->leaf_nodes_offset;
4479       struct lvp_bvh_instance_node *node = (struct lvp_bvh_instance_node *)leaf_nodes;
4480       dst->instances[i] = node[i].bvh_ptr;
4481    }
4482 
4483    memcpy(&dst->instances[dst->instance_count], src, accel_struct->size);
4484 }
4485 
4486 static void
handle_build_acceleration_structures(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4487 handle_build_acceleration_structures(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4488 {
4489    struct vk_cmd_build_acceleration_structures_khr *build = &cmd->u.build_acceleration_structures_khr;
4490 
4491    for (uint32_t i = 0; i < build->info_count; i++)
4492       lvp_build_acceleration_structure(&build->infos[i], build->pp_build_range_infos[i]);
4493 }
4494 
4495 static void
handle_write_acceleration_structures_properties(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4496 handle_write_acceleration_structures_properties(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4497 {
4498    struct vk_cmd_write_acceleration_structures_properties_khr *write = &cmd->u.write_acceleration_structures_properties_khr;
4499 
4500    VK_FROM_HANDLE(lvp_query_pool, pool, write->query_pool);
4501 
4502    uint64_t *dst = pool->data;
4503    dst += write->first_query;
4504 
4505    for (uint32_t i = 0; i < write->acceleration_structure_count; i++) {
4506       VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, write->acceleration_structures[i]);
4507 
4508       switch ((uint32_t)pool->base_type) {
4509       case LVP_QUERY_ACCELERATION_STRUCTURE_COMPACTED_SIZE:
4510          dst[i] = accel_struct->size;
4511          break;
4512       case LVP_QUERY_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE: {
4513          struct lvp_bvh_header *header = (void *)(uintptr_t)vk_acceleration_structure_get_va(accel_struct);
4514          dst[i] = header->serialization_size;
4515          break;
4516       }
4517       case LVP_QUERY_ACCELERATION_STRUCTURE_SIZE:
4518          dst[i] = accel_struct->size;
4519          break;
4520       case LVP_QUERY_ACCELERATION_STRUCTURE_INSTANCE_COUNT: {
4521          struct lvp_bvh_header *header = (void *)(uintptr_t)vk_acceleration_structure_get_va(accel_struct);
4522          dst[i] = header->instance_count;
4523          break;
4524       }
4525       default:
4526          unreachable("Unsupported query type");
4527       }
4528    }
4529 }
4530 
emit_ray_tracing_state(struct rendering_state * state)4531 static void emit_ray_tracing_state(struct rendering_state *state)
4532 {
4533    bool pcbuf_dirty = state->pcbuf_dirty[MESA_SHADER_RAYGEN];
4534    if (pcbuf_dirty)
4535       update_pcbuf(state, MESA_SHADER_COMPUTE, MESA_SHADER_RAYGEN);
4536 
4537    if (state->constbuf_dirty[MESA_SHADER_RAYGEN]) {
4538       for (unsigned i = 0; i < state->num_const_bufs[MESA_SHADER_RAYGEN]; i++)
4539          state->pctx->set_constant_buffer(state->pctx, MESA_SHADER_COMPUTE,
4540                                           i + 1, false, &state->const_buffer[MESA_SHADER_RAYGEN][i]);
4541       state->constbuf_dirty[MESA_SHADER_RAYGEN] = false;
4542    }
4543 
4544    state->pctx->bind_compute_state(state->pctx, state->shaders[MESA_SHADER_RAYGEN]->shader_cso);
4545 
4546    state->pcbuf_dirty[MESA_SHADER_COMPUTE] = true;
4547    state->constbuf_dirty[MESA_SHADER_COMPUTE] = true;
4548    state->compute_shader_dirty = true;
4549 }
4550 
4551 static void
handle_trace_rays(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4552 handle_trace_rays(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4553 {
4554    struct vk_cmd_trace_rays_khr *trace = &cmd->u.trace_rays_khr;
4555 
4556    emit_ray_tracing_state(state);
4557 
4558    VkTraceRaysIndirectCommand2KHR *command = lvp_push_internal_buffer(
4559       state, MESA_SHADER_COMPUTE, sizeof(VkTraceRaysIndirectCommand2KHR));
4560 
4561    *command = (VkTraceRaysIndirectCommand2KHR) {
4562       .raygenShaderRecordAddress = trace->raygen_shader_binding_table->deviceAddress,
4563       .raygenShaderRecordSize = trace->raygen_shader_binding_table->size,
4564       .missShaderBindingTableAddress = trace->miss_shader_binding_table->deviceAddress,
4565       .missShaderBindingTableSize = trace->miss_shader_binding_table->size,
4566       .missShaderBindingTableStride = trace->miss_shader_binding_table->stride,
4567       .hitShaderBindingTableAddress = trace->hit_shader_binding_table->deviceAddress,
4568       .hitShaderBindingTableSize = trace->hit_shader_binding_table->size,
4569       .hitShaderBindingTableStride = trace->hit_shader_binding_table->stride,
4570       .callableShaderBindingTableAddress = trace->callable_shader_binding_table->deviceAddress,
4571       .callableShaderBindingTableSize = trace->callable_shader_binding_table->size,
4572       .callableShaderBindingTableStride = trace->callable_shader_binding_table->stride,
4573       .width = trace->width,
4574       .height = trace->height,
4575       .depth = trace->depth,
4576    };
4577 
4578    state->trace_rays_info.grid[0] = DIV_ROUND_UP(trace->width, state->trace_rays_info.block[0]);
4579    state->trace_rays_info.grid[1] = DIV_ROUND_UP(trace->height, state->trace_rays_info.block[1]);
4580    state->trace_rays_info.grid[2] = DIV_ROUND_UP(trace->depth, state->trace_rays_info.block[2]);
4581 
4582    state->pctx->launch_grid(state->pctx, &state->trace_rays_info);
4583 }
4584 
4585 static void
handle_trace_rays_indirect(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4586 handle_trace_rays_indirect(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4587 {
4588    struct vk_cmd_trace_rays_indirect_khr *trace = &cmd->u.trace_rays_indirect_khr;
4589 
4590    emit_ray_tracing_state(state);
4591 
4592    size_t indirect_offset;
4593    VkBuffer _indirect = get_buffer(state, (void *)(uintptr_t)trace->indirect_device_address, &indirect_offset);
4594    VK_FROM_HANDLE(lvp_buffer, indirect, _indirect);
4595 
4596    struct pipe_transfer *transfer;
4597    const uint8_t *map = pipe_buffer_map(state->pctx, indirect->bo, PIPE_MAP_READ, &transfer);
4598    map += indirect_offset;
4599    const VkTraceRaysIndirectCommandKHR *src = (const void *)map;
4600 
4601    VkTraceRaysIndirectCommand2KHR *command = lvp_push_internal_buffer(
4602       state, MESA_SHADER_COMPUTE, sizeof(VkTraceRaysIndirectCommand2KHR));
4603 
4604    *command = (VkTraceRaysIndirectCommand2KHR) {
4605       .raygenShaderRecordAddress = trace->raygen_shader_binding_table->deviceAddress,
4606       .raygenShaderRecordSize = trace->raygen_shader_binding_table->size,
4607       .missShaderBindingTableAddress = trace->miss_shader_binding_table->deviceAddress,
4608       .missShaderBindingTableSize = trace->miss_shader_binding_table->size,
4609       .missShaderBindingTableStride = trace->miss_shader_binding_table->stride,
4610       .hitShaderBindingTableAddress = trace->hit_shader_binding_table->deviceAddress,
4611       .hitShaderBindingTableSize = trace->hit_shader_binding_table->size,
4612       .hitShaderBindingTableStride = trace->hit_shader_binding_table->stride,
4613       .callableShaderBindingTableAddress = trace->callable_shader_binding_table->deviceAddress,
4614       .callableShaderBindingTableSize = trace->callable_shader_binding_table->size,
4615       .callableShaderBindingTableStride = trace->callable_shader_binding_table->stride,
4616       .width = src->width,
4617       .height = src->height,
4618       .depth = src->depth,
4619    };
4620 
4621    state->trace_rays_info.grid[0] = DIV_ROUND_UP(src->width, state->trace_rays_info.block[0]);
4622    state->trace_rays_info.grid[1] = DIV_ROUND_UP(src->height, state->trace_rays_info.block[1]);
4623    state->trace_rays_info.grid[2] = DIV_ROUND_UP(src->depth, state->trace_rays_info.block[2]);
4624 
4625    state->pctx->buffer_unmap(state->pctx, transfer);
4626 
4627    state->pctx->launch_grid(state->pctx, &state->trace_rays_info);
4628 }
4629 
4630 static void
handle_trace_rays_indirect2(struct vk_cmd_queue_entry * cmd,struct rendering_state * state)4631 handle_trace_rays_indirect2(struct vk_cmd_queue_entry *cmd, struct rendering_state *state)
4632 {
4633    struct vk_cmd_trace_rays_indirect2_khr *trace = &cmd->u.trace_rays_indirect2_khr;
4634 
4635    emit_ray_tracing_state(state);
4636 
4637    size_t indirect_offset;
4638    VkBuffer _indirect = get_buffer(state, (void *)(uintptr_t)trace->indirect_device_address, &indirect_offset);
4639    VK_FROM_HANDLE(lvp_buffer, indirect, _indirect);
4640 
4641    struct pipe_transfer *transfer;
4642    const uint8_t *map = pipe_buffer_map(state->pctx, indirect->bo, PIPE_MAP_READ, &transfer);
4643    map += indirect_offset;
4644    const VkTraceRaysIndirectCommand2KHR *src = (const void *)map;
4645 
4646    VkTraceRaysIndirectCommand2KHR *command = lvp_push_internal_buffer(
4647       state, MESA_SHADER_COMPUTE, sizeof(VkTraceRaysIndirectCommand2KHR));
4648    *command = *src;
4649 
4650    state->trace_rays_info.grid[0] = DIV_ROUND_UP(src->width, state->trace_rays_info.block[0]);
4651    state->trace_rays_info.grid[1] = DIV_ROUND_UP(src->height, state->trace_rays_info.block[1]);
4652    state->trace_rays_info.grid[2] = DIV_ROUND_UP(src->depth, state->trace_rays_info.block[2]);
4653 
4654    state->pctx->buffer_unmap(state->pctx, transfer);
4655 
4656    state->pctx->launch_grid(state->pctx, &state->trace_rays_info);
4657 }
4658 
lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table * disp)4659 void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp)
4660 {
4661    struct vk_device_dispatch_table cmd_enqueue_dispatch;
4662    vk_device_dispatch_table_from_entrypoints(&cmd_enqueue_dispatch,
4663       &vk_cmd_enqueue_device_entrypoints, true);
4664 
4665 #define ENQUEUE_CMD(CmdName) \
4666    assert(cmd_enqueue_dispatch.CmdName != NULL); \
4667    disp->CmdName = cmd_enqueue_dispatch.CmdName;
4668 
4669    /* This list needs to match what's in lvp_execute_cmd_buffer exactly */
4670    ENQUEUE_CMD(CmdBindPipeline)
4671    ENQUEUE_CMD(CmdSetViewport)
4672    ENQUEUE_CMD(CmdSetViewportWithCount)
4673    ENQUEUE_CMD(CmdSetScissor)
4674    ENQUEUE_CMD(CmdSetScissorWithCount)
4675    ENQUEUE_CMD(CmdSetLineWidth)
4676    ENQUEUE_CMD(CmdSetDepthBias)
4677    ENQUEUE_CMD(CmdSetBlendConstants)
4678    ENQUEUE_CMD(CmdSetDepthBounds)
4679    ENQUEUE_CMD(CmdSetStencilCompareMask)
4680    ENQUEUE_CMD(CmdSetStencilWriteMask)
4681    ENQUEUE_CMD(CmdSetStencilReference)
4682    ENQUEUE_CMD(CmdBindDescriptorSets2KHR)
4683    ENQUEUE_CMD(CmdBindIndexBuffer)
4684    ENQUEUE_CMD(CmdBindIndexBuffer2KHR)
4685    ENQUEUE_CMD(CmdBindVertexBuffers2)
4686    ENQUEUE_CMD(CmdDraw)
4687    ENQUEUE_CMD(CmdDrawMultiEXT)
4688    ENQUEUE_CMD(CmdDrawIndexed)
4689    ENQUEUE_CMD(CmdDrawIndirect)
4690    ENQUEUE_CMD(CmdDrawIndexedIndirect)
4691    ENQUEUE_CMD(CmdDrawMultiIndexedEXT)
4692    ENQUEUE_CMD(CmdDispatch)
4693    ENQUEUE_CMD(CmdDispatchBase)
4694    ENQUEUE_CMD(CmdDispatchIndirect)
4695    ENQUEUE_CMD(CmdCopyBuffer2)
4696    ENQUEUE_CMD(CmdCopyImage2)
4697    ENQUEUE_CMD(CmdBlitImage2)
4698    ENQUEUE_CMD(CmdCopyBufferToImage2)
4699    ENQUEUE_CMD(CmdCopyImageToBuffer2)
4700    ENQUEUE_CMD(CmdUpdateBuffer)
4701    ENQUEUE_CMD(CmdFillBuffer)
4702    ENQUEUE_CMD(CmdClearColorImage)
4703    ENQUEUE_CMD(CmdClearDepthStencilImage)
4704    ENQUEUE_CMD(CmdClearAttachments)
4705    ENQUEUE_CMD(CmdResolveImage2)
4706    ENQUEUE_CMD(CmdBeginQueryIndexedEXT)
4707    ENQUEUE_CMD(CmdEndQueryIndexedEXT)
4708    ENQUEUE_CMD(CmdBeginQuery)
4709    ENQUEUE_CMD(CmdEndQuery)
4710    ENQUEUE_CMD(CmdResetQueryPool)
4711    ENQUEUE_CMD(CmdCopyQueryPoolResults)
4712    ENQUEUE_CMD(CmdExecuteCommands)
4713    ENQUEUE_CMD(CmdDrawIndirectCount)
4714    ENQUEUE_CMD(CmdDrawIndexedIndirectCount)
4715    ENQUEUE_CMD(CmdBindTransformFeedbackBuffersEXT)
4716    ENQUEUE_CMD(CmdBeginTransformFeedbackEXT)
4717    ENQUEUE_CMD(CmdEndTransformFeedbackEXT)
4718    ENQUEUE_CMD(CmdDrawIndirectByteCountEXT)
4719    ENQUEUE_CMD(CmdBeginConditionalRenderingEXT)
4720    ENQUEUE_CMD(CmdEndConditionalRenderingEXT)
4721    ENQUEUE_CMD(CmdSetVertexInputEXT)
4722    ENQUEUE_CMD(CmdSetCullMode)
4723    ENQUEUE_CMD(CmdSetFrontFace)
4724    ENQUEUE_CMD(CmdSetPrimitiveTopology)
4725    ENQUEUE_CMD(CmdSetDepthTestEnable)
4726    ENQUEUE_CMD(CmdSetDepthWriteEnable)
4727    ENQUEUE_CMD(CmdSetDepthCompareOp)
4728    ENQUEUE_CMD(CmdSetDepthBoundsTestEnable)
4729    ENQUEUE_CMD(CmdSetStencilTestEnable)
4730    ENQUEUE_CMD(CmdSetStencilOp)
4731    ENQUEUE_CMD(CmdSetLineStippleEXT)
4732    ENQUEUE_CMD(CmdSetLineStippleKHR)
4733    ENQUEUE_CMD(CmdSetDepthBiasEnable)
4734    ENQUEUE_CMD(CmdSetLogicOpEXT)
4735    ENQUEUE_CMD(CmdSetPatchControlPointsEXT)
4736    ENQUEUE_CMD(CmdSetPrimitiveRestartEnable)
4737    ENQUEUE_CMD(CmdSetRasterizerDiscardEnable)
4738    ENQUEUE_CMD(CmdSetColorWriteEnableEXT)
4739    ENQUEUE_CMD(CmdBeginRendering)
4740    ENQUEUE_CMD(CmdEndRendering)
4741    ENQUEUE_CMD(CmdSetDeviceMask)
4742    ENQUEUE_CMD(CmdPipelineBarrier2)
4743    ENQUEUE_CMD(CmdResetEvent2)
4744    ENQUEUE_CMD(CmdSetEvent2)
4745    ENQUEUE_CMD(CmdWaitEvents2)
4746    ENQUEUE_CMD(CmdWriteTimestamp2)
4747    ENQUEUE_CMD(CmdPushConstants2KHR)
4748    ENQUEUE_CMD(CmdPushDescriptorSet2KHR)
4749    ENQUEUE_CMD(CmdPushDescriptorSetWithTemplate2KHR)
4750    ENQUEUE_CMD(CmdBindDescriptorBuffersEXT)
4751    ENQUEUE_CMD(CmdSetDescriptorBufferOffsets2EXT)
4752    ENQUEUE_CMD(CmdBindDescriptorBufferEmbeddedSamplers2EXT)
4753 
4754    ENQUEUE_CMD(CmdSetPolygonModeEXT)
4755    ENQUEUE_CMD(CmdSetTessellationDomainOriginEXT)
4756    ENQUEUE_CMD(CmdSetDepthClampEnableEXT)
4757    ENQUEUE_CMD(CmdSetDepthClipEnableEXT)
4758    ENQUEUE_CMD(CmdSetLogicOpEnableEXT)
4759    ENQUEUE_CMD(CmdSetSampleMaskEXT)
4760    ENQUEUE_CMD(CmdSetRasterizationSamplesEXT)
4761    ENQUEUE_CMD(CmdSetAlphaToCoverageEnableEXT)
4762    ENQUEUE_CMD(CmdSetAlphaToOneEnableEXT)
4763    ENQUEUE_CMD(CmdSetDepthClipNegativeOneToOneEXT)
4764    ENQUEUE_CMD(CmdSetLineRasterizationModeEXT)
4765    ENQUEUE_CMD(CmdSetLineStippleEnableEXT)
4766    ENQUEUE_CMD(CmdSetProvokingVertexModeEXT)
4767    ENQUEUE_CMD(CmdSetColorBlendEnableEXT)
4768    ENQUEUE_CMD(CmdSetColorBlendEquationEXT)
4769    ENQUEUE_CMD(CmdSetColorWriteMaskEXT)
4770 
4771    ENQUEUE_CMD(CmdBindShadersEXT)
4772    /* required for EXT_shader_object */
4773    ENQUEUE_CMD(CmdSetCoverageModulationModeNV)
4774    ENQUEUE_CMD(CmdSetCoverageModulationTableEnableNV)
4775    ENQUEUE_CMD(CmdSetCoverageModulationTableNV)
4776    ENQUEUE_CMD(CmdSetCoverageReductionModeNV)
4777    ENQUEUE_CMD(CmdSetCoverageToColorEnableNV)
4778    ENQUEUE_CMD(CmdSetCoverageToColorLocationNV)
4779    ENQUEUE_CMD(CmdSetRepresentativeFragmentTestEnableNV)
4780    ENQUEUE_CMD(CmdSetShadingRateImageEnableNV)
4781    ENQUEUE_CMD(CmdSetViewportSwizzleNV)
4782    ENQUEUE_CMD(CmdSetViewportWScalingEnableNV)
4783    ENQUEUE_CMD(CmdSetAttachmentFeedbackLoopEnableEXT)
4784    ENQUEUE_CMD(CmdDrawMeshTasksEXT)
4785    ENQUEUE_CMD(CmdDrawMeshTasksIndirectEXT)
4786    ENQUEUE_CMD(CmdDrawMeshTasksIndirectCountEXT)
4787 
4788    ENQUEUE_CMD(CmdBindPipelineShaderGroupNV)
4789    ENQUEUE_CMD(CmdPreprocessGeneratedCommandsNV)
4790    ENQUEUE_CMD(CmdExecuteGeneratedCommandsNV)
4791 
4792 #ifdef VK_ENABLE_BETA_EXTENSIONS
4793    ENQUEUE_CMD(CmdInitializeGraphScratchMemoryAMDX)
4794    ENQUEUE_CMD(CmdDispatchGraphIndirectCountAMDX)
4795    ENQUEUE_CMD(CmdDispatchGraphIndirectAMDX)
4796    ENQUEUE_CMD(CmdDispatchGraphAMDX)
4797 #endif
4798 
4799    ENQUEUE_CMD(CmdSetRenderingAttachmentLocationsKHR)
4800    ENQUEUE_CMD(CmdSetRenderingInputAttachmentIndicesKHR)
4801 
4802    ENQUEUE_CMD(CmdCopyAccelerationStructureKHR)
4803    ENQUEUE_CMD(CmdCopyMemoryToAccelerationStructureKHR)
4804    ENQUEUE_CMD(CmdCopyAccelerationStructureToMemoryKHR)
4805    ENQUEUE_CMD(CmdBuildAccelerationStructuresKHR)
4806    ENQUEUE_CMD(CmdBuildAccelerationStructuresIndirectKHR)
4807    ENQUEUE_CMD(CmdWriteAccelerationStructuresPropertiesKHR)
4808 
4809    ENQUEUE_CMD(CmdSetRayTracingPipelineStackSizeKHR)
4810    ENQUEUE_CMD(CmdTraceRaysIndirect2KHR)
4811    ENQUEUE_CMD(CmdTraceRaysIndirectKHR)
4812    ENQUEUE_CMD(CmdTraceRaysKHR)
4813 
4814 #undef ENQUEUE_CMD
4815 }
4816 
lvp_execute_cmd_buffer(struct list_head * cmds,struct rendering_state * state,bool print_cmds)4817 static void lvp_execute_cmd_buffer(struct list_head *cmds,
4818                                    struct rendering_state *state, bool print_cmds)
4819 {
4820    struct vk_cmd_queue_entry *cmd;
4821    bool did_flush = false;
4822 
4823    LIST_FOR_EACH_ENTRY(cmd, cmds, cmd_link) {
4824       if (print_cmds)
4825          fprintf(stderr, "%s\n", vk_cmd_queue_type_names[cmd->type]);
4826       switch (cmd->type) {
4827       case VK_CMD_BIND_PIPELINE:
4828          handle_pipeline(cmd, state);
4829          break;
4830       case VK_CMD_SET_VIEWPORT:
4831          handle_set_viewport(cmd, state);
4832          break;
4833       case VK_CMD_SET_VIEWPORT_WITH_COUNT:
4834          handle_set_viewport_with_count(cmd, state);
4835          break;
4836       case VK_CMD_SET_SCISSOR:
4837          handle_set_scissor(cmd, state);
4838          break;
4839       case VK_CMD_SET_SCISSOR_WITH_COUNT:
4840          handle_set_scissor_with_count(cmd, state);
4841          break;
4842       case VK_CMD_SET_LINE_WIDTH:
4843          handle_set_line_width(cmd, state);
4844          break;
4845       case VK_CMD_SET_DEPTH_BIAS:
4846          handle_set_depth_bias(cmd, state);
4847          break;
4848       case VK_CMD_SET_BLEND_CONSTANTS:
4849          handle_set_blend_constants(cmd, state);
4850          break;
4851       case VK_CMD_SET_DEPTH_BOUNDS:
4852          handle_set_depth_bounds(cmd, state);
4853          break;
4854       case VK_CMD_SET_STENCIL_COMPARE_MASK:
4855          handle_set_stencil_compare_mask(cmd, state);
4856          break;
4857       case VK_CMD_SET_STENCIL_WRITE_MASK:
4858          handle_set_stencil_write_mask(cmd, state);
4859          break;
4860       case VK_CMD_SET_STENCIL_REFERENCE:
4861          handle_set_stencil_reference(cmd, state);
4862          break;
4863       case VK_CMD_BIND_DESCRIPTOR_SETS2_KHR:
4864          handle_descriptor_sets_cmd(cmd, state);
4865          break;
4866       case VK_CMD_BIND_INDEX_BUFFER:
4867          handle_index_buffer(cmd, state);
4868          break;
4869       case VK_CMD_BIND_INDEX_BUFFER2_KHR:
4870          handle_index_buffer2(cmd, state);
4871          break;
4872       case VK_CMD_BIND_VERTEX_BUFFERS2:
4873          handle_vertex_buffers2(cmd, state);
4874          break;
4875       case VK_CMD_DRAW:
4876          emit_state(state);
4877          handle_draw(cmd, state);
4878          break;
4879       case VK_CMD_DRAW_MULTI_EXT:
4880          emit_state(state);
4881          handle_draw_multi(cmd, state);
4882          break;
4883       case VK_CMD_DRAW_INDEXED:
4884          emit_state(state);
4885          handle_draw_indexed(cmd, state);
4886          break;
4887       case VK_CMD_DRAW_INDIRECT:
4888          emit_state(state);
4889          handle_draw_indirect(cmd, state, false);
4890          break;
4891       case VK_CMD_DRAW_INDEXED_INDIRECT:
4892          emit_state(state);
4893          handle_draw_indirect(cmd, state, true);
4894          break;
4895       case VK_CMD_DRAW_MULTI_INDEXED_EXT:
4896          emit_state(state);
4897          handle_draw_multi_indexed(cmd, state);
4898          break;
4899       case VK_CMD_DISPATCH:
4900          emit_compute_state(state);
4901          handle_dispatch(cmd, state);
4902          break;
4903       case VK_CMD_DISPATCH_BASE:
4904          emit_compute_state(state);
4905          handle_dispatch_base(cmd, state);
4906          break;
4907       case VK_CMD_DISPATCH_INDIRECT:
4908          emit_compute_state(state);
4909          handle_dispatch_indirect(cmd, state);
4910          break;
4911       case VK_CMD_COPY_BUFFER2:
4912          handle_copy_buffer(cmd, state);
4913          break;
4914       case VK_CMD_COPY_IMAGE2:
4915          handle_copy_image(cmd, state);
4916          break;
4917       case VK_CMD_BLIT_IMAGE2:
4918          handle_blit_image(cmd, state);
4919          break;
4920       case VK_CMD_COPY_BUFFER_TO_IMAGE2:
4921          handle_copy_buffer_to_image(cmd, state);
4922          break;
4923       case VK_CMD_COPY_IMAGE_TO_BUFFER2:
4924          handle_copy_image_to_buffer2(cmd, state);
4925          break;
4926       case VK_CMD_UPDATE_BUFFER:
4927          handle_update_buffer(cmd, state);
4928          break;
4929       case VK_CMD_FILL_BUFFER:
4930          handle_fill_buffer(cmd, state);
4931          break;
4932       case VK_CMD_CLEAR_COLOR_IMAGE:
4933          handle_clear_color_image(cmd, state);
4934          break;
4935       case VK_CMD_CLEAR_DEPTH_STENCIL_IMAGE:
4936          handle_clear_ds_image(cmd, state);
4937          break;
4938       case VK_CMD_CLEAR_ATTACHMENTS:
4939          handle_clear_attachments(cmd, state);
4940          break;
4941       case VK_CMD_RESOLVE_IMAGE2:
4942          handle_resolve_image(cmd, state);
4943          break;
4944       case VK_CMD_PIPELINE_BARRIER2:
4945          /* flushes are actually stalls, so multiple flushes are redundant */
4946          if (did_flush)
4947             continue;
4948          handle_pipeline_barrier(cmd, state);
4949          did_flush = true;
4950          continue;
4951       case VK_CMD_BEGIN_QUERY_INDEXED_EXT:
4952          handle_begin_query_indexed_ext(cmd, state);
4953          break;
4954       case VK_CMD_END_QUERY_INDEXED_EXT:
4955          handle_end_query_indexed_ext(cmd, state);
4956          break;
4957       case VK_CMD_BEGIN_QUERY:
4958          handle_begin_query(cmd, state);
4959          break;
4960       case VK_CMD_END_QUERY:
4961          handle_end_query(cmd, state);
4962          break;
4963       case VK_CMD_RESET_QUERY_POOL:
4964          handle_reset_query_pool(cmd, state);
4965          break;
4966       case VK_CMD_COPY_QUERY_POOL_RESULTS:
4967          handle_copy_query_pool_results(cmd, state);
4968          break;
4969       case VK_CMD_PUSH_CONSTANTS2_KHR:
4970          handle_push_constants(cmd, state);
4971          break;
4972       case VK_CMD_EXECUTE_COMMANDS:
4973          handle_execute_commands(cmd, state, print_cmds);
4974          break;
4975       case VK_CMD_DRAW_INDIRECT_COUNT:
4976          emit_state(state);
4977          handle_draw_indirect_count(cmd, state, false);
4978          break;
4979       case VK_CMD_DRAW_INDEXED_INDIRECT_COUNT:
4980          emit_state(state);
4981          handle_draw_indirect_count(cmd, state, true);
4982          break;
4983       case VK_CMD_PUSH_DESCRIPTOR_SET2_KHR:
4984          handle_push_descriptor_set(cmd, state);
4985          break;
4986       case VK_CMD_PUSH_DESCRIPTOR_SET_WITH_TEMPLATE2_KHR:
4987          handle_push_descriptor_set_with_template(cmd, state);
4988          break;
4989       case VK_CMD_BIND_TRANSFORM_FEEDBACK_BUFFERS_EXT:
4990          handle_bind_transform_feedback_buffers(cmd, state);
4991          break;
4992       case VK_CMD_BEGIN_TRANSFORM_FEEDBACK_EXT:
4993          handle_begin_transform_feedback(cmd, state);
4994          break;
4995       case VK_CMD_END_TRANSFORM_FEEDBACK_EXT:
4996          handle_end_transform_feedback(cmd, state);
4997          break;
4998       case VK_CMD_DRAW_INDIRECT_BYTE_COUNT_EXT:
4999          emit_state(state);
5000          handle_draw_indirect_byte_count(cmd, state);
5001          break;
5002       case VK_CMD_BEGIN_CONDITIONAL_RENDERING_EXT:
5003          handle_begin_conditional_rendering(cmd, state);
5004          break;
5005       case VK_CMD_END_CONDITIONAL_RENDERING_EXT:
5006          handle_end_conditional_rendering(state);
5007          break;
5008       case VK_CMD_SET_VERTEX_INPUT_EXT:
5009          handle_set_vertex_input(cmd, state);
5010          break;
5011       case VK_CMD_SET_CULL_MODE:
5012          handle_set_cull_mode(cmd, state);
5013          break;
5014       case VK_CMD_SET_FRONT_FACE:
5015          handle_set_front_face(cmd, state);
5016          break;
5017       case VK_CMD_SET_PRIMITIVE_TOPOLOGY:
5018          handle_set_primitive_topology(cmd, state);
5019          break;
5020       case VK_CMD_SET_DEPTH_TEST_ENABLE:
5021          handle_set_depth_test_enable(cmd, state);
5022          break;
5023       case VK_CMD_SET_DEPTH_WRITE_ENABLE:
5024          handle_set_depth_write_enable(cmd, state);
5025          break;
5026       case VK_CMD_SET_DEPTH_COMPARE_OP:
5027          handle_set_depth_compare_op(cmd, state);
5028          break;
5029       case VK_CMD_SET_DEPTH_BOUNDS_TEST_ENABLE:
5030          handle_set_depth_bounds_test_enable(cmd, state);
5031          break;
5032       case VK_CMD_SET_STENCIL_TEST_ENABLE:
5033          handle_set_stencil_test_enable(cmd, state);
5034          break;
5035       case VK_CMD_SET_STENCIL_OP:
5036          handle_set_stencil_op(cmd, state);
5037          break;
5038       case VK_CMD_SET_LINE_STIPPLE_KHR:
5039          handle_set_line_stipple(cmd, state);
5040          break;
5041       case VK_CMD_SET_DEPTH_BIAS_ENABLE:
5042          handle_set_depth_bias_enable(cmd, state);
5043          break;
5044       case VK_CMD_SET_LOGIC_OP_EXT:
5045          handle_set_logic_op(cmd, state);
5046          break;
5047       case VK_CMD_SET_PATCH_CONTROL_POINTS_EXT:
5048          handle_set_patch_control_points(cmd, state);
5049          break;
5050       case VK_CMD_SET_PRIMITIVE_RESTART_ENABLE:
5051          handle_set_primitive_restart_enable(cmd, state);
5052          break;
5053       case VK_CMD_SET_RASTERIZER_DISCARD_ENABLE:
5054          handle_set_rasterizer_discard_enable(cmd, state);
5055          break;
5056       case VK_CMD_SET_COLOR_WRITE_ENABLE_EXT:
5057          handle_set_color_write_enable(cmd, state);
5058          break;
5059       case VK_CMD_BEGIN_RENDERING:
5060          handle_begin_rendering(cmd, state);
5061          break;
5062       case VK_CMD_END_RENDERING:
5063          handle_end_rendering(cmd, state);
5064          break;
5065       case VK_CMD_SET_DEVICE_MASK:
5066          /* no-op */
5067          break;
5068       case VK_CMD_RESET_EVENT2:
5069          handle_event_reset2(cmd, state);
5070          break;
5071       case VK_CMD_SET_EVENT2:
5072          handle_event_set2(cmd, state);
5073          break;
5074       case VK_CMD_WAIT_EVENTS2:
5075          handle_wait_events2(cmd, state);
5076          break;
5077       case VK_CMD_WRITE_TIMESTAMP2:
5078          handle_write_timestamp2(cmd, state);
5079          break;
5080       case VK_CMD_SET_POLYGON_MODE_EXT:
5081          handle_set_polygon_mode(cmd, state);
5082          break;
5083       case VK_CMD_SET_TESSELLATION_DOMAIN_ORIGIN_EXT:
5084          handle_set_tessellation_domain_origin(cmd, state);
5085          break;
5086       case VK_CMD_SET_DEPTH_CLAMP_ENABLE_EXT:
5087          handle_set_depth_clamp_enable(cmd, state);
5088          break;
5089       case VK_CMD_SET_DEPTH_CLIP_ENABLE_EXT:
5090          handle_set_depth_clip_enable(cmd, state);
5091          break;
5092       case VK_CMD_SET_LOGIC_OP_ENABLE_EXT:
5093          handle_set_logic_op_enable(cmd, state);
5094          break;
5095       case VK_CMD_SET_SAMPLE_MASK_EXT:
5096          handle_set_sample_mask(cmd, state);
5097          break;
5098       case VK_CMD_SET_RASTERIZATION_SAMPLES_EXT:
5099          handle_set_samples(cmd, state);
5100          break;
5101       case VK_CMD_SET_ALPHA_TO_COVERAGE_ENABLE_EXT:
5102          handle_set_alpha_to_coverage(cmd, state);
5103          break;
5104       case VK_CMD_SET_ALPHA_TO_ONE_ENABLE_EXT:
5105          handle_set_alpha_to_one(cmd, state);
5106          break;
5107       case VK_CMD_SET_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT:
5108          handle_set_halfz(cmd, state);
5109          break;
5110       case VK_CMD_SET_LINE_RASTERIZATION_MODE_EXT:
5111          handle_set_line_rasterization_mode(cmd, state);
5112          break;
5113       case VK_CMD_SET_LINE_STIPPLE_ENABLE_EXT:
5114          handle_set_line_stipple_enable(cmd, state);
5115          break;
5116       case VK_CMD_SET_PROVOKING_VERTEX_MODE_EXT:
5117          handle_set_provoking_vertex_mode(cmd, state);
5118          break;
5119       case VK_CMD_SET_COLOR_BLEND_ENABLE_EXT:
5120          handle_set_color_blend_enable(cmd, state);
5121          break;
5122       case VK_CMD_SET_COLOR_WRITE_MASK_EXT:
5123          handle_set_color_write_mask(cmd, state);
5124          break;
5125       case VK_CMD_SET_COLOR_BLEND_EQUATION_EXT:
5126          handle_set_color_blend_equation(cmd, state);
5127          break;
5128       case VK_CMD_BIND_SHADERS_EXT:
5129          handle_shaders(cmd, state);
5130          break;
5131       case VK_CMD_SET_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT:
5132          break;
5133       case VK_CMD_DRAW_MESH_TASKS_EXT:
5134          emit_state(state);
5135          handle_draw_mesh_tasks(cmd, state);
5136          break;
5137       case VK_CMD_DRAW_MESH_TASKS_INDIRECT_EXT:
5138          emit_state(state);
5139          handle_draw_mesh_tasks_indirect(cmd, state);
5140          break;
5141       case VK_CMD_DRAW_MESH_TASKS_INDIRECT_COUNT_EXT:
5142          emit_state(state);
5143          handle_draw_mesh_tasks_indirect_count(cmd, state);
5144          break;
5145       case VK_CMD_BIND_PIPELINE_SHADER_GROUP_NV:
5146          handle_graphics_pipeline_group(cmd, state);
5147          break;
5148       case VK_CMD_PREPROCESS_GENERATED_COMMANDS_NV:
5149          handle_preprocess_generated_commands(cmd, state, print_cmds);
5150          break;
5151       case VK_CMD_EXECUTE_GENERATED_COMMANDS_NV:
5152          handle_execute_generated_commands(cmd, state, print_cmds);
5153          break;
5154       case VK_CMD_BIND_DESCRIPTOR_BUFFERS_EXT:
5155          handle_descriptor_buffers(cmd, state);
5156          break;
5157       case VK_CMD_SET_DESCRIPTOR_BUFFER_OFFSETS2_EXT:
5158          handle_descriptor_buffer_offsets(cmd, state);
5159          break;
5160       case VK_CMD_BIND_DESCRIPTOR_BUFFER_EMBEDDED_SAMPLERS2_EXT:
5161          handle_descriptor_buffer_embedded_samplers(cmd, state);
5162          break;
5163 #ifdef VK_ENABLE_BETA_EXTENSIONS
5164       case VK_CMD_INITIALIZE_GRAPH_SCRATCH_MEMORY_AMDX:
5165          break;
5166       case VK_CMD_DISPATCH_GRAPH_INDIRECT_COUNT_AMDX:
5167          break;
5168       case VK_CMD_DISPATCH_GRAPH_INDIRECT_AMDX:
5169          break;
5170       case VK_CMD_DISPATCH_GRAPH_AMDX:
5171          handle_dispatch_graph(cmd, state);
5172          break;
5173 #endif
5174       case VK_CMD_SET_RENDERING_ATTACHMENT_LOCATIONS_KHR:
5175          handle_rendering_attachment_locations(cmd, state);
5176          break;
5177       case VK_CMD_SET_RENDERING_INPUT_ATTACHMENT_INDICES_KHR:
5178          handle_rendering_input_attachment_indices(cmd, state);
5179          break;
5180       case VK_CMD_COPY_ACCELERATION_STRUCTURE_KHR:
5181          handle_copy_acceleration_structure(cmd, state);
5182          break;
5183       case VK_CMD_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_KHR:
5184          handle_copy_memory_to_acceleration_structure(cmd, state);
5185          break;
5186       case VK_CMD_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_KHR:
5187          handle_copy_acceleration_structure_to_memory(cmd, state);
5188          break;
5189       case VK_CMD_BUILD_ACCELERATION_STRUCTURES_KHR:
5190          handle_build_acceleration_structures(cmd, state);
5191          break;
5192       case VK_CMD_BUILD_ACCELERATION_STRUCTURES_INDIRECT_KHR:
5193          break;
5194       case VK_CMD_WRITE_ACCELERATION_STRUCTURES_PROPERTIES_KHR:
5195          handle_write_acceleration_structures_properties(cmd, state);
5196          break;
5197       case VK_CMD_SET_RAY_TRACING_PIPELINE_STACK_SIZE_KHR:
5198          break;
5199       case VK_CMD_TRACE_RAYS_INDIRECT2_KHR:
5200          handle_trace_rays_indirect2(cmd, state);
5201          break;
5202       case VK_CMD_TRACE_RAYS_INDIRECT_KHR:
5203          handle_trace_rays_indirect(cmd, state);
5204          break;
5205       case VK_CMD_TRACE_RAYS_KHR:
5206          handle_trace_rays(cmd, state);
5207          break;
5208       default:
5209          fprintf(stderr, "Unsupported command %s\n", vk_cmd_queue_type_names[cmd->type]);
5210          unreachable("Unsupported command");
5211          break;
5212       }
5213       did_flush = false;
5214       if (!cmd->cmd_link.next)
5215          break;
5216    }
5217 }
5218 
lvp_execute_cmds(struct lvp_device * device,struct lvp_queue * queue,struct lvp_cmd_buffer * cmd_buffer)5219 VkResult lvp_execute_cmds(struct lvp_device *device,
5220                           struct lvp_queue *queue,
5221                           struct lvp_cmd_buffer *cmd_buffer)
5222 {
5223    struct rendering_state *state = queue->state;
5224    memset(state, 0, sizeof(*state));
5225    state->pctx = queue->ctx;
5226    state->device = device;
5227    state->uploader = queue->uploader;
5228    state->cso = queue->cso;
5229    state->blend_dirty = true;
5230    state->dsa_dirty = true;
5231    state->rs_dirty = true;
5232    state->vp_dirty = true;
5233    state->rs_state.point_line_tri_clip = true;
5234    state->rs_state.unclamped_fragment_depth_values = device->vk.enabled_extensions.EXT_depth_range_unrestricted;
5235    state->sample_mask_dirty = true;
5236    state->min_samples_dirty = true;
5237    state->sample_mask = UINT32_MAX;
5238    state->poison_mem = device->poison_mem;
5239    util_dynarray_init(&state->push_desc_sets, NULL);
5240    util_dynarray_init(&state->internal_buffers, NULL);
5241 
5242    /* default values */
5243    state->min_sample_shading = 1;
5244    state->num_viewports = 1;
5245    state->num_scissors = 1;
5246    state->rs_state.line_width = 1.0;
5247    state->rs_state.flatshade_first = true;
5248    state->rs_state.clip_halfz = true;
5249    state->rs_state.front_ccw = true;
5250    state->rs_state.point_size_per_vertex = true;
5251    state->rs_state.point_quad_rasterization = true;
5252    state->rs_state.half_pixel_center = true;
5253    state->rs_state.scissor = true;
5254    state->rs_state.no_ms_sample_mask_out = true;
5255    state->blend_state.independent_blend_enable = true;
5256 
5257    state->index_size = 4;
5258    state->index_buffer_size = sizeof(uint32_t);
5259    state->index_buffer = state->device->zero_buffer;
5260 
5261    /* create a gallium context */
5262    lvp_execute_cmd_buffer(&cmd_buffer->vk.cmd_queue.cmds, state, device->print_cmds);
5263 
5264    state->start_vb = -1;
5265    state->num_vb = 0;
5266    cso_unbind_context(queue->cso);
5267    for (unsigned i = 0; i < ARRAY_SIZE(state->so_targets); i++) {
5268       if (state->so_targets[i]) {
5269          state->pctx->stream_output_target_destroy(state->pctx, state->so_targets[i]);
5270       }
5271    }
5272 
5273    finish_fence(state);
5274 
5275    util_dynarray_foreach (&state->push_desc_sets, struct lvp_descriptor_set *, set)
5276       lvp_descriptor_set_destroy(device, *set);
5277 
5278    util_dynarray_fini(&state->push_desc_sets);
5279 
5280    util_dynarray_foreach (&state->internal_buffers, struct pipe_resource *, buffer)
5281       pipe_resource_reference(buffer, NULL);
5282 
5283    util_dynarray_fini(&state->internal_buffers);
5284 
5285    for (unsigned i = 0; i < ARRAY_SIZE(state->desc_buffers); i++)
5286       pipe_resource_reference(&state->desc_buffers[i], NULL);
5287 
5288    return VK_SUCCESS;
5289 }
5290 
5291 size_t
lvp_get_rendering_state_size(void)5292 lvp_get_rendering_state_size(void)
5293 {
5294    return sizeof(struct rendering_state);
5295 }
5296