xref: /aosp_15_r20/external/mesa3d/src/broadcom/vulkan/v3dv_meta_clear.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26 
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 #include "vk_common_entrypoints.h"
30 
31 static void
get_hw_clear_color(struct v3dv_device * device,const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)32 get_hw_clear_color(struct v3dv_device *device,
33                    const VkClearColorValue *color,
34                    VkFormat fb_format,
35                    VkFormat image_format,
36                    uint32_t internal_type,
37                    uint32_t internal_bpp,
38                    uint32_t *hw_color)
39 {
40    const uint32_t internal_size = 4 << internal_bpp;
41 
42    /* If the image format doesn't match the framebuffer format, then we are
43     * trying to clear an unsupported tlb format using a compatible
44     * format for the framebuffer. In this case, we want to make sure that
45     * we pack the clear value according to the original format semantics,
46     * not the compatible format.
47     */
48    if (fb_format == image_format) {
49       v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
50                                          hw_color);
51    } else {
52       union util_color uc;
53       enum pipe_format pipe_image_format =
54          vk_format_to_pipe_format(image_format);
55       util_pack_color(color->float32, pipe_image_format, &uc);
56       memcpy(hw_color, uc.ui, internal_size);
57    }
58 }
59 
60 /* Returns true if the implementation is able to handle the case, false
61  * otherwise.
62 */
63 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)64 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
65                 struct v3dv_image *image,
66                 const VkClearValue *clear_value,
67                 const VkImageSubresourceRange *range)
68 {
69    const VkOffset3D origin = { 0, 0, 0 };
70    VkFormat fb_format;
71 
72    /* From vkCmdClearColorImage spec:
73     *  "image must not use any of the formats that require a sampler YCBCR
74     *   conversion"
75     */
76    assert(image->plane_count == 1);
77    if (!v3dv_meta_can_use_tlb(image, 0, 0, &origin, NULL, &fb_format))
78       return false;
79 
80    uint32_t internal_type, internal_bpp;
81    v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
82       (fb_format, range->aspectMask,
83        &internal_type, &internal_bpp);
84 
85    union v3dv_clear_value hw_clear_value = { 0 };
86    if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
87       get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
88                          image->vk.format, internal_type, internal_bpp,
89                          &hw_clear_value.color[0]);
90    } else {
91       assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
92              (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
93       hw_clear_value.z = clear_value->depthStencil.depth;
94       hw_clear_value.s = clear_value->depthStencil.stencil;
95    }
96 
97    uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
98    uint32_t min_level = range->baseMipLevel;
99    uint32_t max_level = range->baseMipLevel + level_count;
100 
101    /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
102     * Instead, we need to consider the full depth dimension of the image, which
103     * goes from 0 up to the level's depth extent.
104     */
105    uint32_t min_layer;
106    uint32_t max_layer;
107    if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
108       min_layer = range->baseArrayLayer;
109       max_layer = range->baseArrayLayer +
110                   vk_image_subresource_layer_count(&image->vk, range);
111    } else {
112       min_layer = 0;
113       max_layer = 0;
114    }
115 
116    for (uint32_t level = min_level; level < max_level; level++) {
117       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
118          max_layer = u_minify(image->vk.extent.depth, level);
119 
120       uint32_t width = u_minify(image->vk.extent.width, level);
121       uint32_t height = u_minify(image->vk.extent.height, level);
122 
123       struct v3dv_job *job =
124          v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
125 
126       if (!job)
127          return true;
128 
129       v3dv_job_start_frame(job, width, height, max_layer,
130                            false, true, 1, internal_bpp,
131                            4 * v3d_internal_bpp_words(internal_bpp),
132                            image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
133 
134       struct v3dv_meta_framebuffer framebuffer;
135       v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
136                                                  internal_type,
137                                                  &job->frame_tiling);
138 
139       v3dv_X(job->device, job_emit_binning_flush)(job);
140 
141       /* If this triggers it is an application bug: the spec requires
142        * that any aspects to clear are present in the image.
143        */
144       assert(range->aspectMask & image->vk.aspects);
145 
146       v3dv_X(job->device, meta_emit_clear_image_rcl)
147          (job, image, &framebuffer, &hw_clear_value,
148           range->aspectMask, min_layer, max_layer, level);
149 
150       v3dv_cmd_buffer_finish_job(cmd_buffer);
151    }
152 
153    return true;
154 }
155 
156 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)157 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
158                         VkImage _image,
159                         VkImageLayout imageLayout,
160                         const VkClearColorValue *pColor,
161                         uint32_t rangeCount,
162                         const VkImageSubresourceRange *pRanges)
163 {
164    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
165    V3DV_FROM_HANDLE(v3dv_image, image, _image);
166 
167    const VkClearValue clear_value = {
168       .color = *pColor,
169    };
170 
171    cmd_buffer->state.is_transfer = true;
172 
173    for (uint32_t i = 0; i < rangeCount; i++) {
174       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
175          continue;
176       unreachable("Unsupported color clear.");
177    }
178 
179    cmd_buffer->state.is_transfer = false;
180 }
181 
182 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)183 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
184                                VkImage _image,
185                                VkImageLayout imageLayout,
186                                const VkClearDepthStencilValue *pDepthStencil,
187                                uint32_t rangeCount,
188                                const VkImageSubresourceRange *pRanges)
189 {
190    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
191    V3DV_FROM_HANDLE(v3dv_image, image, _image);
192 
193    const VkClearValue clear_value = {
194       .depthStencil = *pDepthStencil,
195    };
196 
197    cmd_buffer->state.is_transfer = true;
198 
199    for (uint32_t i = 0; i < rangeCount; i++) {
200       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
201          continue;
202       unreachable("Unsupported depth/stencil clear.");
203    }
204 
205    cmd_buffer->state.is_transfer = false;
206 }
207 
208 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)209 destroy_color_clear_pipeline(VkDevice _device,
210                              uint64_t pipeline,
211                              VkAllocationCallbacks *alloc)
212 {
213    struct v3dv_meta_color_clear_pipeline *p =
214       (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
215    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
216    if (p->cached)
217       v3dv_DestroyRenderPass(_device, p->pass, alloc);
218    vk_free(alloc, p);
219 }
220 
221 static void
destroy_depth_clear_pipeline(VkDevice _device,struct v3dv_meta_depth_clear_pipeline * p,VkAllocationCallbacks * alloc)222 destroy_depth_clear_pipeline(VkDevice _device,
223                              struct v3dv_meta_depth_clear_pipeline *p,
224                              VkAllocationCallbacks *alloc)
225 {
226    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
227    vk_free(alloc, p);
228 }
229 
230 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)231 create_color_clear_pipeline_layout(struct v3dv_device *device,
232                                    VkPipelineLayout *pipeline_layout)
233 {
234    /* FIXME: this is abusing a bit the API, since not all of our clear
235     * pipelines have a geometry shader. We could create 2 different pipeline
236     * layouts, but this works for us for now.
237     */
238    VkPushConstantRange ranges[2] = {
239       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
240       { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
241    };
242 
243    VkPipelineLayoutCreateInfo info = {
244       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
245       .setLayoutCount = 0,
246       .pushConstantRangeCount = 2,
247       .pPushConstantRanges = ranges,
248    };
249 
250    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
251                                     &info, &device->vk.alloc, pipeline_layout);
252 }
253 
254 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)255 create_depth_clear_pipeline_layout(struct v3dv_device *device,
256                                    VkPipelineLayout *pipeline_layout)
257 {
258    /* FIXME: this is abusing a bit the API, since not all of our clear
259     * pipelines have a geometry shader. We could create 2 different pipeline
260     * layouts, but this works for us for now.
261     */
262    VkPushConstantRange ranges[2] = {
263       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
264       { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
265    };
266 
267    VkPipelineLayoutCreateInfo info = {
268       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
269       .setLayoutCount = 0,
270       .pushConstantRangeCount = 2,
271       .pPushConstantRanges = ranges
272    };
273 
274    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
275                                     &info, &device->vk.alloc, pipeline_layout);
276 }
277 
278 void
v3dv_meta_clear_init(struct v3dv_device * device)279 v3dv_meta_clear_init(struct v3dv_device *device)
280 {
281    if (device->instance->meta_cache_enabled) {
282       device->meta.color_clear.cache =
283          _mesa_hash_table_create(NULL, u64_hash, u64_compare);
284 
285       device->meta.depth_clear.cache =
286          _mesa_hash_table_create(NULL, u64_hash, u64_compare);
287    }
288 
289    create_color_clear_pipeline_layout(device,
290                                       &device->meta.color_clear.p_layout);
291    create_depth_clear_pipeline_layout(device,
292                                       &device->meta.depth_clear.p_layout);
293 }
294 
295 void
v3dv_meta_clear_finish(struct v3dv_device * device)296 v3dv_meta_clear_finish(struct v3dv_device *device)
297 {
298    VkDevice _device = v3dv_device_to_handle(device);
299 
300    if (device->instance->meta_cache_enabled) {
301       hash_table_foreach(device->meta.color_clear.cache, entry) {
302          struct v3dv_meta_color_clear_pipeline *item = entry->data;
303          destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
304       }
305       _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
306 
307       hash_table_foreach(device->meta.depth_clear.cache, entry) {
308          struct v3dv_meta_depth_clear_pipeline *item = entry->data;
309          destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);
310       }
311       _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
312    }
313 
314    if (device->meta.color_clear.p_layout) {
315       v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
316                                  &device->vk.alloc);
317    }
318 
319    if (device->meta.depth_clear.p_layout) {
320       v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
321                                  &device->vk.alloc);
322    }
323 }
324 
325 static nir_shader *
get_clear_rect_vs(const nir_shader_compiler_options * options)326 get_clear_rect_vs(const nir_shader_compiler_options *options)
327 {
328    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
329                                                   "meta clear vs");
330 
331    const struct glsl_type *vec4 = glsl_vec4_type();
332    nir_variable *vs_out_pos =
333       nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
334    vs_out_pos->data.location = VARYING_SLOT_POS;
335 
336    nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
337    nir_store_var(&b, vs_out_pos, pos, 0xf);
338 
339    return b.shader;
340 }
341 
342 static nir_shader *
get_clear_rect_gs(const nir_shader_compiler_options * options,uint32_t push_constant_layer_base)343 get_clear_rect_gs(const nir_shader_compiler_options *options,
344                   uint32_t push_constant_layer_base)
345 {
346    /* FIXME: this creates a geometry shader that takes the index of a single
347     * layer to clear from push constants, so we need to emit a draw call for
348     * each layer that we want to clear. We could actually do better and have it
349     * take a range of layers and then emit one triangle per layer to clear,
350     * however, if we were to do this we would need to be careful not to exceed
351     * the maximum number of output vertices allowed in a geometry shader.
352     */
353    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
354                                                   "meta clear gs");
355    nir_shader *nir = b.shader;
356    nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
357    nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
358                                (1ull << VARYING_SLOT_LAYER);
359    nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
360    nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
361    nir->info.gs.vertices_in = 3;
362    nir->info.gs.vertices_out = 3;
363    nir->info.gs.invocations = 1;
364    nir->info.gs.active_stream_mask = 0x1;
365 
366    /* in vec4 gl_Position[3] */
367    nir_variable *gs_in_pos =
368       nir_variable_create(b.shader, nir_var_shader_in,
369                           glsl_array_type(glsl_vec4_type(), 3, 0),
370                           "in_gl_Position");
371    gs_in_pos->data.location = VARYING_SLOT_POS;
372 
373    /* out vec4 gl_Position */
374    nir_variable *gs_out_pos =
375       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
376                           "out_gl_Position");
377    gs_out_pos->data.location = VARYING_SLOT_POS;
378 
379    /* out float gl_Layer */
380    nir_variable *gs_out_layer =
381       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
382                           "out_gl_Layer");
383    gs_out_layer->data.location = VARYING_SLOT_LAYER;
384 
385    /* Emit output triangle */
386    for (uint32_t i = 0; i < 3; i++) {
387       /* gl_Position from shader input */
388       nir_deref_instr *in_pos_i =
389          nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
390       nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
391 
392       /* gl_Layer from push constants */
393       nir_def *layer =
394          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
395                                 .base = push_constant_layer_base, .range = 4);
396       nir_store_var(&b, gs_out_layer, layer, 0x1);
397 
398       nir_emit_vertex(&b, 0);
399    }
400 
401    nir_end_primitive(&b, 0);
402 
403    return nir;
404 }
405 
406 static nir_shader *
get_color_clear_rect_fs(const nir_shader_compiler_options * options,uint32_t rt_idx,VkFormat format)407 get_color_clear_rect_fs(const nir_shader_compiler_options *options,
408                         uint32_t rt_idx, VkFormat format)
409 {
410    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
411                                                   "meta clear fs");
412 
413    enum pipe_format pformat = vk_format_to_pipe_format(format);
414    const struct glsl_type *fs_out_type =
415       util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
416 
417    nir_variable *fs_out_color =
418       nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
419    fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
420 
421    nir_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
422    nir_store_var(&b, fs_out_color, color_load, 0xf);
423 
424    return b.shader;
425 }
426 
427 static nir_shader *
get_depth_clear_rect_fs(const nir_shader_compiler_options * options)428 get_depth_clear_rect_fs(const nir_shader_compiler_options *options)
429 {
430    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
431                                                   "meta depth clear fs");
432 
433    nir_variable *fs_out_depth =
434       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
435                           "out_depth");
436    fs_out_depth->data.location = FRAG_RESULT_DEPTH;
437 
438    nir_def *depth_load =
439       nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
440 
441    nir_store_var(&b, fs_out_depth, depth_load, 0x1);
442 
443    return b.shader;
444 }
445 
446 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)447 create_pipeline(struct v3dv_device *device,
448                 struct v3dv_render_pass *pass,
449                 uint32_t subpass_idx,
450                 uint32_t samples,
451                 struct nir_shader *vs_nir,
452                 struct nir_shader *gs_nir,
453                 struct nir_shader *fs_nir,
454                 const VkPipelineVertexInputStateCreateInfo *vi_state,
455                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
456                 const VkPipelineColorBlendStateCreateInfo *cb_state,
457                 const VkPipelineLayout layout,
458                 VkPipeline *pipeline)
459 {
460    VkPipelineShaderStageCreateInfo stages[3] = { 0 };
461    struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
462    struct vk_shader_module gs_m;
463    struct vk_shader_module fs_m;
464 
465    uint32_t stage_count = 0;
466    stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
467    stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
468    stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
469    stages[stage_count].pName = "main";
470    stage_count++;
471 
472    if (gs_nir) {
473       gs_m = vk_shader_module_from_nir(gs_nir);
474       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
475       stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
476       stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
477       stages[stage_count].pName = "main";
478       stage_count++;
479    }
480 
481    if (fs_nir) {
482       fs_m = vk_shader_module_from_nir(fs_nir);
483       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
484       stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
485       stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
486       stages[stage_count].pName = "main";
487       stage_count++;
488    }
489 
490    VkGraphicsPipelineCreateInfo info = {
491       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
492 
493       .stageCount = stage_count,
494       .pStages = stages,
495 
496       .pVertexInputState = vi_state,
497 
498       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
499          .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
500          .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
501          .primitiveRestartEnable = false,
502       },
503 
504       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
505          .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
506          .viewportCount = 1,
507          .scissorCount = 1,
508       },
509 
510       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
511          .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
512          .rasterizerDiscardEnable = false,
513          .polygonMode = VK_POLYGON_MODE_FILL,
514          .cullMode = VK_CULL_MODE_NONE,
515          .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
516          .depthBiasEnable = false,
517       },
518 
519       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
520          .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
521          .rasterizationSamples = samples,
522          .sampleShadingEnable = false,
523          .pSampleMask = NULL,
524          .alphaToCoverageEnable = false,
525          .alphaToOneEnable = false,
526       },
527 
528       .pDepthStencilState = ds_state,
529 
530       .pColorBlendState = cb_state,
531 
532       /* The meta clear pipeline declares all state as dynamic.
533        * As a consequence, vkCmdBindPipeline writes no dynamic state
534        * to the cmd buffer. Therefore, at the end of the meta clear,
535        * we need only restore dynamic state that was vkCmdSet.
536        */
537       .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
538          .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
539          .dynamicStateCount = 6,
540          .pDynamicStates = (VkDynamicState[]) {
541             VK_DYNAMIC_STATE_VIEWPORT,
542             VK_DYNAMIC_STATE_SCISSOR,
543             VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
544             VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
545             VK_DYNAMIC_STATE_STENCIL_REFERENCE,
546             VK_DYNAMIC_STATE_BLEND_CONSTANTS,
547             VK_DYNAMIC_STATE_DEPTH_BIAS,
548             VK_DYNAMIC_STATE_LINE_WIDTH,
549          },
550       },
551 
552       .flags = 0,
553       .layout = layout,
554       .renderPass = v3dv_render_pass_to_handle(pass),
555       .subpass = subpass_idx,
556    };
557 
558    VkResult result =
559       v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
560                                    VK_NULL_HANDLE,
561                                    1, &info,
562                                    &device->vk.alloc,
563                                    pipeline);
564 
565    ralloc_free(vs_nir);
566    ralloc_free(gs_nir);
567    ralloc_free(fs_nir);
568 
569    return result;
570 }
571 
572 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)573 create_color_clear_pipeline(struct v3dv_device *device,
574                             struct v3dv_render_pass *pass,
575                             uint32_t subpass_idx,
576                             uint32_t rt_idx,
577                             VkFormat format,
578                             VkSampleCountFlagBits samples,
579                             uint32_t components,
580                             bool is_layered,
581                             VkPipelineLayout pipeline_layout,
582                             VkPipeline *pipeline)
583 {
584    const nir_shader_compiler_options *options =
585       v3dv_pipeline_get_nir_options(&device->devinfo);
586 
587    nir_shader *vs_nir = get_clear_rect_vs(options);
588    nir_shader *fs_nir = get_color_clear_rect_fs(options, rt_idx, format);
589    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(options, 16) : NULL;
590 
591    const VkPipelineVertexInputStateCreateInfo vi_state = {
592       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
593       .vertexBindingDescriptionCount = 0,
594       .vertexAttributeDescriptionCount = 0,
595    };
596 
597    const VkPipelineDepthStencilStateCreateInfo ds_state = {
598       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
599       .depthTestEnable = false,
600       .depthWriteEnable = false,
601       .depthBoundsTestEnable = false,
602       .stencilTestEnable = false,
603    };
604 
605    assert(subpass_idx < pass->subpass_count);
606    const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
607    assert(rt_idx < color_count);
608 
609    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
610    for (uint32_t i = 0; i < color_count; i++) {
611       blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
612          .blendEnable = false,
613          .colorWriteMask = i == rt_idx ? components : 0,
614       };
615    }
616 
617    const VkPipelineColorBlendStateCreateInfo cb_state = {
618       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
619       .logicOpEnable = false,
620       .attachmentCount = color_count,
621       .pAttachments = blend_att_state
622    };
623 
624    return create_pipeline(device,
625                           pass, subpass_idx,
626                           samples,
627                           vs_nir, gs_nir, fs_nir,
628                           &vi_state,
629                           &ds_state,
630                           &cb_state,
631                           pipeline_layout,
632                           pipeline);
633 }
634 
635 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)636 create_depth_clear_pipeline(struct v3dv_device *device,
637                             VkImageAspectFlags aspects,
638                             struct v3dv_render_pass *pass,
639                             uint32_t subpass_idx,
640                             uint32_t samples,
641                             bool is_layered,
642                             VkPipelineLayout pipeline_layout,
643                             VkPipeline *pipeline)
644 {
645    const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
646    const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
647    assert(has_depth || has_stencil);
648 
649    const nir_shader_compiler_options *options =
650       v3dv_pipeline_get_nir_options(&device->devinfo);
651 
652    nir_shader *vs_nir = get_clear_rect_vs(options);
653    nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs(options) : NULL;
654    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(options, 4) : NULL;
655 
656    const VkPipelineVertexInputStateCreateInfo vi_state = {
657       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
658       .vertexBindingDescriptionCount = 0,
659       .vertexAttributeDescriptionCount = 0,
660    };
661 
662    const VkPipelineDepthStencilStateCreateInfo ds_state = {
663       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
664       .depthTestEnable = has_depth,
665       .depthWriteEnable = has_depth,
666       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
667       .depthBoundsTestEnable = false,
668       .stencilTestEnable = has_stencil,
669       .front = {
670          .passOp = VK_STENCIL_OP_REPLACE,
671          .compareOp = VK_COMPARE_OP_ALWAYS,
672          /* compareMask, writeMask and reference are dynamic state */
673       },
674       .back = { 0 },
675    };
676 
677    assert(subpass_idx < pass->subpass_count);
678    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
679    const VkPipelineColorBlendStateCreateInfo cb_state = {
680       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
681       .logicOpEnable = false,
682       .attachmentCount = pass->subpasses[subpass_idx].color_count,
683       .pAttachments = blend_att_state,
684    };
685 
686    return create_pipeline(device,
687                           pass, subpass_idx,
688                           samples,
689                           vs_nir, gs_nir, fs_nir,
690                           &vi_state,
691                           &ds_state,
692                           &cb_state,
693                           pipeline_layout,
694                           pipeline);
695 }
696 
697 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,VkRenderPass * pass)698 create_color_clear_render_pass(struct v3dv_device *device,
699                                uint32_t rt_idx,
700                                VkFormat format,
701                                VkSampleCountFlagBits samples,
702                                VkRenderPass *pass)
703 {
704    VkAttachmentDescription2 att = {
705       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
706       .format = format,
707       .samples = samples,
708       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
709       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
710       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
711       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
712    };
713 
714    VkAttachmentReference2 att_ref = {
715       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
716       .attachment = rt_idx,
717       .layout = VK_IMAGE_LAYOUT_GENERAL,
718    };
719 
720    VkSubpassDescription2 subpass = {
721       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
722       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
723       .inputAttachmentCount = 0,
724       .colorAttachmentCount = 1,
725       .pColorAttachments = &att_ref,
726       .pResolveAttachments = NULL,
727       .pDepthStencilAttachment = NULL,
728       .preserveAttachmentCount = 0,
729       .pPreserveAttachments = NULL,
730    };
731 
732    VkRenderPassCreateInfo2 info = {
733       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
734       .attachmentCount = 1,
735       .pAttachments = &att,
736       .subpassCount = 1,
737       .pSubpasses = &subpass,
738       .dependencyCount = 0,
739       .pDependencies = NULL,
740    };
741 
742    return v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
743                                  &info, &device->vk.alloc, pass);
744 }
745 
746 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,bool has_multiview)747 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
748                                    VkFormat format,
749                                    VkSampleCountFlagBits samples,
750                                    uint32_t components,
751                                    bool is_layered,
752                                    bool has_multiview)
753 {
754    assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
755 
756    uint64_t key = 0;
757    uint32_t bit_offset = 0;
758 
759    key |= rt_idx;
760    bit_offset += 3;
761 
762    key |= ((uint64_t) format) << bit_offset;
763    bit_offset += 32;
764 
765    key |= ((uint64_t) samples) << bit_offset;
766    bit_offset += 4;
767 
768    key |= ((uint64_t) components) << bit_offset;
769    bit_offset += 4;
770 
771    key |= (is_layered ? 1ull : 0ull) << bit_offset;
772    bit_offset += 1;
773 
774    key |= (has_multiview ? 1ull : 0ull) << bit_offset;
775    bit_offset += 1;
776 
777    assert(bit_offset <= 64);
778    return key;
779 }
780 
781 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples,bool is_layered,bool has_multiview)782 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
783                                    VkFormat format,
784                                    uint32_t samples,
785                                    bool is_layered,
786                                    bool has_multiview)
787 {
788    uint64_t key = 0;
789    uint32_t bit_offset = 0;
790 
791    key |= format;
792    bit_offset += 32;
793 
794    key |= ((uint64_t) samples) << bit_offset;
795    bit_offset += 4;
796 
797    const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
798    key |= ((uint64_t) has_depth) << bit_offset;
799    bit_offset++;
800 
801    const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
802    key |= ((uint64_t) has_stencil) << bit_offset;
803    bit_offset++;;
804 
805    key |= (is_layered ? 1ull : 0ull) << bit_offset;
806    bit_offset += 1;
807 
808    key |= (has_multiview ? 1ull : 0ull) << bit_offset;
809    bit_offset += 1;
810 
811    assert(bit_offset <= 64);
812    return key;
813 }
814 
815 static VkResult
get_color_clear_pipeline(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,bool has_multiview,struct v3dv_meta_color_clear_pipeline ** pipeline)816 get_color_clear_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
817                          struct v3dv_render_pass *pass,
818                          uint32_t subpass_idx,
819                          uint32_t rt_idx,
820                          uint32_t attachment_idx,
821                          VkFormat format,
822                          VkSampleCountFlagBits samples,
823                          uint32_t components,
824                          bool is_layered,
825                          bool has_multiview,
826                          struct v3dv_meta_color_clear_pipeline **pipeline)
827 {
828    assert(vk_format_is_color(format));
829    struct v3dv_device *device = cmd_buffer->device;
830 
831    VkResult result = VK_SUCCESS;
832 
833    /* If pass != NULL it means that we are emitting the clear as a draw call
834     * in the current pass bound by the application. In that case, we can't
835     * cache the pipeline, since it will be referencing that pass and the
836     * application could be destroying it at any point. Hopefully, the perf
837     * impact is not too big since we still have the device pipeline cache
838     * around and we won't end up re-compiling the clear shader.
839     *
840     * FIXME: alternatively, we could refcount (or maybe clone) the render pass
841     * provided by the application and include it in the pipeline key setup
842     * to make caching safe in this scenario, however, based on tests with
843     * vkQuake3, the fact that we are not caching here doesn't seem to have
844     * any significant impact in performance, so it might not be worth it.
845     */
846    const bool can_cache_pipeline =
847       (pass == NULL) && (device->instance->meta_cache_enabled);
848 
849    uint64_t key;
850    if (can_cache_pipeline) {
851       key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
852                                                components, is_layered,
853                                                has_multiview);
854       mtx_lock(&device->meta.mtx);
855       struct hash_entry *entry =
856          _mesa_hash_table_search(device->meta.color_clear.cache, &key);
857       if (entry) {
858          mtx_unlock(&device->meta.mtx);
859          *pipeline = entry->data;
860          return VK_SUCCESS;
861       }
862    }
863 
864    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
865                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
866 
867    if (*pipeline == NULL) {
868       result = VK_ERROR_OUT_OF_HOST_MEMORY;
869       goto fail;
870    }
871 
872    if (!pass) {
873       result = create_color_clear_render_pass(device,
874                                               rt_idx,
875                                               format,
876                                               samples,
877                                               &(*pipeline)->pass);
878       if (result != VK_SUCCESS)
879          goto fail;
880 
881       pass = v3dv_render_pass_from_handle((*pipeline)->pass);
882    } else {
883       (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
884    }
885 
886    result = create_color_clear_pipeline(device,
887                                         pass,
888                                         subpass_idx,
889                                         rt_idx,
890                                         format,
891                                         samples,
892                                         components,
893                                         is_layered,
894                                         device->meta.color_clear.p_layout,
895                                         &(*pipeline)->pipeline);
896    if (result != VK_SUCCESS)
897       goto fail;
898 
899    if (can_cache_pipeline) {
900       (*pipeline)->key = key;
901       (*pipeline)->cached = true;
902       _mesa_hash_table_insert(device->meta.color_clear.cache,
903                               &(*pipeline)->key, *pipeline);
904 
905       mtx_unlock(&device->meta.mtx);
906    } else {
907       v3dv_cmd_buffer_add_private_obj(
908          cmd_buffer, (uintptr_t)*pipeline,
909          (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_color_clear_pipeline);
910    }
911 
912    return VK_SUCCESS;
913 
914 fail:
915    if (can_cache_pipeline)
916       mtx_unlock(&device->meta.mtx);
917 
918    VkDevice _device = v3dv_device_to_handle(device);
919    if (*pipeline) {
920       if ((*pipeline)->cached)
921          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
922       if ((*pipeline)->pipeline)
923          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
924       vk_free(&device->vk.alloc, *pipeline);
925       *pipeline = NULL;
926    }
927 
928    return result;
929 }
930 
931 static VkResult
get_depth_clear_pipeline(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,bool is_layered,bool has_multiview,struct v3dv_meta_depth_clear_pipeline ** pipeline)932 get_depth_clear_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
933                          VkImageAspectFlags aspects,
934                          struct v3dv_render_pass *pass,
935                          uint32_t subpass_idx,
936                          uint32_t attachment_idx,
937                          bool is_layered,
938                          bool has_multiview,
939                          struct v3dv_meta_depth_clear_pipeline **pipeline)
940 {
941    assert(subpass_idx < pass->subpass_count);
942    assert(attachment_idx != VK_ATTACHMENT_UNUSED);
943    assert(attachment_idx < pass->attachment_count);
944 
945    VkResult result = VK_SUCCESS;
946    struct v3dv_device *device = cmd_buffer->device;
947 
948    const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
949    const VkFormat format = pass->attachments[attachment_idx].desc.format;
950    assert(vk_format_is_depth_or_stencil(format));
951 
952    uint64_t key;
953    if (device->instance->meta_cache_enabled) {
954       key = get_depth_clear_pipeline_cache_key(aspects, format, samples,
955                                                is_layered, has_multiview);
956       mtx_lock(&device->meta.mtx);
957       struct hash_entry *entry =
958          _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
959       if (entry) {
960          mtx_unlock(&device->meta.mtx);
961          *pipeline = entry->data;
962          return VK_SUCCESS;
963       }
964    }
965 
966    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
967                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
968 
969    if (*pipeline == NULL) {
970       result = VK_ERROR_OUT_OF_HOST_MEMORY;
971       goto fail;
972    }
973 
974    result = create_depth_clear_pipeline(device,
975                                         aspects,
976                                         pass,
977                                         subpass_idx,
978                                         samples,
979                                         is_layered,
980                                         device->meta.depth_clear.p_layout,
981                                         &(*pipeline)->pipeline);
982    if (result != VK_SUCCESS)
983       goto fail;
984 
985    if (device->instance->meta_cache_enabled) {
986       (*pipeline)->key = key;
987       _mesa_hash_table_insert(device->meta.depth_clear.cache,
988                               &(*pipeline)->key, *pipeline);
989       mtx_unlock(&device->meta.mtx);
990    } else {
991       v3dv_cmd_buffer_add_private_obj(
992          cmd_buffer, (uintptr_t)*pipeline,
993          (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_depth_clear_pipeline);
994    }
995 
996    return VK_SUCCESS;
997 
998 fail:
999    if (device->instance->meta_cache_enabled)
1000       mtx_unlock(&device->meta.mtx);
1001 
1002    VkDevice _device = v3dv_device_to_handle(device);
1003    if (*pipeline) {
1004       if ((*pipeline)->pipeline)
1005          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
1006       vk_free(&device->vk.alloc, *pipeline);
1007       *pipeline = NULL;
1008    }
1009 
1010    return result;
1011 }
1012 
1013 /* Emits a scissored quad in the clear color */
1014 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1015 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1016                                struct v3dv_render_pass *pass,
1017                                struct v3dv_subpass *subpass,
1018                                uint32_t rt_idx,
1019                                const VkClearColorValue *clear_color,
1020                                bool is_layered,
1021                                bool all_rects_same_layers,
1022                                uint32_t rect_count,
1023                                const VkClearRect *rects)
1024 {
1025    /* Skip if attachment is unused in the current subpass */
1026    assert(rt_idx < subpass->color_count);
1027    const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1028    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1029       return;
1030 
1031    /* Obtain a pipeline for this clear */
1032    assert(attachment_idx < pass->attachment_count);
1033    const VkFormat format = pass->attachments[attachment_idx].desc.format;
1034    const VkSampleCountFlagBits samples =
1035       pass->attachments[attachment_idx].desc.samples;
1036    const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1037                                VK_COLOR_COMPONENT_G_BIT |
1038                                VK_COLOR_COMPONENT_B_BIT |
1039                                VK_COLOR_COMPONENT_A_BIT;
1040 
1041    struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1042    VkResult result = get_color_clear_pipeline(cmd_buffer,
1043                                               pass,
1044                                               cmd_buffer->state.subpass_idx,
1045                                               rt_idx,
1046                                               attachment_idx,
1047                                               format,
1048                                               samples,
1049                                               components,
1050                                               is_layered,
1051                                               pass->multiview_enabled,
1052                                               &pipeline);
1053    if (result != VK_SUCCESS) {
1054       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1055          v3dv_flag_oom(cmd_buffer, NULL);
1056       return;
1057    }
1058    assert(pipeline && pipeline->pipeline);
1059 
1060    /* Emit clear rects */
1061    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1062 
1063    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1064    v3dv_CmdPushConstants(cmd_buffer_handle,
1065                          cmd_buffer->device->meta.depth_clear.p_layout,
1066                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1067                          clear_color->float32);
1068 
1069    v3dv_CmdBindPipeline(cmd_buffer_handle,
1070                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1071                         pipeline->pipeline);
1072 
1073    for (uint32_t i = 0; i < rect_count; i++) {
1074       const VkViewport viewport = {
1075          .x = rects[i].rect.offset.x,
1076          .y = rects[i].rect.offset.y,
1077          .width = rects[i].rect.extent.width,
1078          .height = rects[i].rect.extent.height,
1079          .minDepth = 0.0f,
1080          .maxDepth = 1.0f
1081       };
1082       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1083       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1084 
1085       if (is_layered) {
1086          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1087               layer_offset++) {
1088             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1089             v3dv_CmdPushConstants(cmd_buffer_handle,
1090                                   cmd_buffer->device->meta.depth_clear.p_layout,
1091                                   VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1092             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1093          }
1094       } else {
1095          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1096          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1097       }
1098    }
1099 
1100    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1101 }
1102 
1103 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1104  * and the stencil aspect by using stencil testing.
1105  */
1106 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1107 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1108                             struct v3dv_render_pass *pass,
1109                             struct v3dv_subpass *subpass,
1110                             VkImageAspectFlags aspects,
1111                             const VkClearDepthStencilValue *clear_ds,
1112                             bool is_layered,
1113                             bool all_rects_same_layers,
1114                             uint32_t rect_count,
1115                             const VkClearRect *rects)
1116 {
1117    /* Skip if attachment is unused in the current subpass */
1118    const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1119    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1120       return;
1121 
1122    /* Obtain a pipeline for this clear */
1123    assert(attachment_idx < pass->attachment_count);
1124    struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1125 
1126    VkResult result = get_depth_clear_pipeline(cmd_buffer,
1127                                               aspects,
1128                                               pass,
1129                                               cmd_buffer->state.subpass_idx,
1130                                               attachment_idx,
1131                                               is_layered,
1132                                               pass->multiview_enabled,
1133                                               &pipeline);
1134    if (result != VK_SUCCESS) {
1135       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1136          v3dv_flag_oom(cmd_buffer, NULL);
1137       return;
1138    }
1139    assert(pipeline && pipeline->pipeline);
1140 
1141    /* Emit clear rects */
1142    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1143 
1144    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1145    v3dv_CmdPushConstants(cmd_buffer_handle,
1146                          cmd_buffer->device->meta.depth_clear.p_layout,
1147                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1148                          &clear_ds->depth);
1149 
1150    v3dv_CmdBindPipeline(cmd_buffer_handle,
1151                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1152                         pipeline->pipeline);
1153 
1154    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1155       vk_common_CmdSetStencilReference(cmd_buffer_handle,
1156                                        VK_STENCIL_FACE_FRONT_AND_BACK,
1157                                        clear_ds->stencil);
1158       vk_common_CmdSetStencilWriteMask(cmd_buffer_handle,
1159                                        VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1160       vk_common_CmdSetStencilCompareMask(cmd_buffer_handle,
1161                                          VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1162    }
1163 
1164    for (uint32_t i = 0; i < rect_count; i++) {
1165       const VkViewport viewport = {
1166          .x = rects[i].rect.offset.x,
1167          .y = rects[i].rect.offset.y,
1168          .width = rects[i].rect.extent.width,
1169          .height = rects[i].rect.extent.height,
1170          .minDepth = 0.0f,
1171          .maxDepth = 1.0f
1172       };
1173       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1174       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1175       if (is_layered) {
1176          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1177               layer_offset++) {
1178             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1179             v3dv_CmdPushConstants(cmd_buffer_handle,
1180                                   cmd_buffer->device->meta.depth_clear.p_layout,
1181                                   VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1182             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1183          }
1184       } else {
1185          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1186          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1187       }
1188    }
1189 
1190    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1191 }
1192 
1193 static void
gather_layering_info(uint32_t rect_count,const VkClearRect * rects,bool * is_layered,bool * all_rects_same_layers)1194 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1195                      bool *is_layered, bool *all_rects_same_layers)
1196 {
1197    *all_rects_same_layers = true;
1198 
1199    uint32_t min_layer = rects[0].baseArrayLayer;
1200    uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1201    for (uint32_t i = 1; i < rect_count; i++) {
1202       if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1203           rects[i].layerCount != rects[i - 1].layerCount) {
1204          *all_rects_same_layers = false;
1205          min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1206          max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1207                                      rects[i].layerCount - 1);
1208       }
1209    }
1210 
1211    *is_layered = !(min_layer == 0 && max_layer == 0);
1212 }
1213 
1214 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1215 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1216                          uint32_t attachmentCount,
1217                          const VkClearAttachment *pAttachments,
1218                          uint32_t rectCount,
1219                          const VkClearRect *pRects)
1220 {
1221    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1222 
1223    /* We can have at most max_color_RTs + 1 D/S attachments */
1224    assert(attachmentCount <=
1225           V3D_MAX_RENDER_TARGETS(cmd_buffer->device->devinfo.ver) + 1);
1226 
1227    /* We can only clear attachments in the current subpass */
1228    struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1229 
1230    assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1231    struct v3dv_subpass *subpass =
1232       &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1233 
1234    /* Emit a clear rect inside the current job for this subpass. For layered
1235     * framebuffers, we use a geometry shader to redirect clears to the
1236     * appropriate layers.
1237     */
1238 
1239    v3dv_cmd_buffer_pause_occlusion_query(cmd_buffer);
1240 
1241    bool is_layered, all_rects_same_layers;
1242    gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1243    for (uint32_t i = 0; i < attachmentCount; i++) {
1244       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1245          emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1246                                         pAttachments[i].colorAttachment,
1247                                         &pAttachments[i].clearValue.color,
1248                                         is_layered, all_rects_same_layers,
1249                                         rectCount, pRects);
1250       } else {
1251          emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1252                                      pAttachments[i].aspectMask,
1253                                      &pAttachments[i].clearValue.depthStencil,
1254                                      is_layered, all_rects_same_layers,
1255                                      rectCount, pRects);
1256       }
1257    }
1258 
1259    v3dv_cmd_buffer_resume_occlusion_query(cmd_buffer);
1260 }
1261