1 /*
2 * Copyright © 2020 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 #include "vk_common_entrypoints.h"
30
31 static void
get_hw_clear_color(struct v3dv_device * device,const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)32 get_hw_clear_color(struct v3dv_device *device,
33 const VkClearColorValue *color,
34 VkFormat fb_format,
35 VkFormat image_format,
36 uint32_t internal_type,
37 uint32_t internal_bpp,
38 uint32_t *hw_color)
39 {
40 const uint32_t internal_size = 4 << internal_bpp;
41
42 /* If the image format doesn't match the framebuffer format, then we are
43 * trying to clear an unsupported tlb format using a compatible
44 * format for the framebuffer. In this case, we want to make sure that
45 * we pack the clear value according to the original format semantics,
46 * not the compatible format.
47 */
48 if (fb_format == image_format) {
49 v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
50 hw_color);
51 } else {
52 union util_color uc;
53 enum pipe_format pipe_image_format =
54 vk_format_to_pipe_format(image_format);
55 util_pack_color(color->float32, pipe_image_format, &uc);
56 memcpy(hw_color, uc.ui, internal_size);
57 }
58 }
59
60 /* Returns true if the implementation is able to handle the case, false
61 * otherwise.
62 */
63 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)64 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
65 struct v3dv_image *image,
66 const VkClearValue *clear_value,
67 const VkImageSubresourceRange *range)
68 {
69 const VkOffset3D origin = { 0, 0, 0 };
70 VkFormat fb_format;
71
72 /* From vkCmdClearColorImage spec:
73 * "image must not use any of the formats that require a sampler YCBCR
74 * conversion"
75 */
76 assert(image->plane_count == 1);
77 if (!v3dv_meta_can_use_tlb(image, 0, 0, &origin, NULL, &fb_format))
78 return false;
79
80 uint32_t internal_type, internal_bpp;
81 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
82 (fb_format, range->aspectMask,
83 &internal_type, &internal_bpp);
84
85 union v3dv_clear_value hw_clear_value = { 0 };
86 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
87 get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
88 image->vk.format, internal_type, internal_bpp,
89 &hw_clear_value.color[0]);
90 } else {
91 assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
92 (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
93 hw_clear_value.z = clear_value->depthStencil.depth;
94 hw_clear_value.s = clear_value->depthStencil.stencil;
95 }
96
97 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
98 uint32_t min_level = range->baseMipLevel;
99 uint32_t max_level = range->baseMipLevel + level_count;
100
101 /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
102 * Instead, we need to consider the full depth dimension of the image, which
103 * goes from 0 up to the level's depth extent.
104 */
105 uint32_t min_layer;
106 uint32_t max_layer;
107 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
108 min_layer = range->baseArrayLayer;
109 max_layer = range->baseArrayLayer +
110 vk_image_subresource_layer_count(&image->vk, range);
111 } else {
112 min_layer = 0;
113 max_layer = 0;
114 }
115
116 for (uint32_t level = min_level; level < max_level; level++) {
117 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
118 max_layer = u_minify(image->vk.extent.depth, level);
119
120 uint32_t width = u_minify(image->vk.extent.width, level);
121 uint32_t height = u_minify(image->vk.extent.height, level);
122
123 struct v3dv_job *job =
124 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
125
126 if (!job)
127 return true;
128
129 v3dv_job_start_frame(job, width, height, max_layer,
130 false, true, 1, internal_bpp,
131 4 * v3d_internal_bpp_words(internal_bpp),
132 image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
133
134 struct v3dv_meta_framebuffer framebuffer;
135 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
136 internal_type,
137 &job->frame_tiling);
138
139 v3dv_X(job->device, job_emit_binning_flush)(job);
140
141 /* If this triggers it is an application bug: the spec requires
142 * that any aspects to clear are present in the image.
143 */
144 assert(range->aspectMask & image->vk.aspects);
145
146 v3dv_X(job->device, meta_emit_clear_image_rcl)
147 (job, image, &framebuffer, &hw_clear_value,
148 range->aspectMask, min_layer, max_layer, level);
149
150 v3dv_cmd_buffer_finish_job(cmd_buffer);
151 }
152
153 return true;
154 }
155
156 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)157 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
158 VkImage _image,
159 VkImageLayout imageLayout,
160 const VkClearColorValue *pColor,
161 uint32_t rangeCount,
162 const VkImageSubresourceRange *pRanges)
163 {
164 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
165 V3DV_FROM_HANDLE(v3dv_image, image, _image);
166
167 const VkClearValue clear_value = {
168 .color = *pColor,
169 };
170
171 cmd_buffer->state.is_transfer = true;
172
173 for (uint32_t i = 0; i < rangeCount; i++) {
174 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
175 continue;
176 unreachable("Unsupported color clear.");
177 }
178
179 cmd_buffer->state.is_transfer = false;
180 }
181
182 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)183 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
184 VkImage _image,
185 VkImageLayout imageLayout,
186 const VkClearDepthStencilValue *pDepthStencil,
187 uint32_t rangeCount,
188 const VkImageSubresourceRange *pRanges)
189 {
190 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
191 V3DV_FROM_HANDLE(v3dv_image, image, _image);
192
193 const VkClearValue clear_value = {
194 .depthStencil = *pDepthStencil,
195 };
196
197 cmd_buffer->state.is_transfer = true;
198
199 for (uint32_t i = 0; i < rangeCount; i++) {
200 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
201 continue;
202 unreachable("Unsupported depth/stencil clear.");
203 }
204
205 cmd_buffer->state.is_transfer = false;
206 }
207
208 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)209 destroy_color_clear_pipeline(VkDevice _device,
210 uint64_t pipeline,
211 VkAllocationCallbacks *alloc)
212 {
213 struct v3dv_meta_color_clear_pipeline *p =
214 (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
215 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
216 if (p->cached)
217 v3dv_DestroyRenderPass(_device, p->pass, alloc);
218 vk_free(alloc, p);
219 }
220
221 static void
destroy_depth_clear_pipeline(VkDevice _device,struct v3dv_meta_depth_clear_pipeline * p,VkAllocationCallbacks * alloc)222 destroy_depth_clear_pipeline(VkDevice _device,
223 struct v3dv_meta_depth_clear_pipeline *p,
224 VkAllocationCallbacks *alloc)
225 {
226 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
227 vk_free(alloc, p);
228 }
229
230 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)231 create_color_clear_pipeline_layout(struct v3dv_device *device,
232 VkPipelineLayout *pipeline_layout)
233 {
234 /* FIXME: this is abusing a bit the API, since not all of our clear
235 * pipelines have a geometry shader. We could create 2 different pipeline
236 * layouts, but this works for us for now.
237 */
238 VkPushConstantRange ranges[2] = {
239 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
240 { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
241 };
242
243 VkPipelineLayoutCreateInfo info = {
244 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
245 .setLayoutCount = 0,
246 .pushConstantRangeCount = 2,
247 .pPushConstantRanges = ranges,
248 };
249
250 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
251 &info, &device->vk.alloc, pipeline_layout);
252 }
253
254 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)255 create_depth_clear_pipeline_layout(struct v3dv_device *device,
256 VkPipelineLayout *pipeline_layout)
257 {
258 /* FIXME: this is abusing a bit the API, since not all of our clear
259 * pipelines have a geometry shader. We could create 2 different pipeline
260 * layouts, but this works for us for now.
261 */
262 VkPushConstantRange ranges[2] = {
263 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
264 { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
265 };
266
267 VkPipelineLayoutCreateInfo info = {
268 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
269 .setLayoutCount = 0,
270 .pushConstantRangeCount = 2,
271 .pPushConstantRanges = ranges
272 };
273
274 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
275 &info, &device->vk.alloc, pipeline_layout);
276 }
277
278 void
v3dv_meta_clear_init(struct v3dv_device * device)279 v3dv_meta_clear_init(struct v3dv_device *device)
280 {
281 if (device->instance->meta_cache_enabled) {
282 device->meta.color_clear.cache =
283 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
284
285 device->meta.depth_clear.cache =
286 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
287 }
288
289 create_color_clear_pipeline_layout(device,
290 &device->meta.color_clear.p_layout);
291 create_depth_clear_pipeline_layout(device,
292 &device->meta.depth_clear.p_layout);
293 }
294
295 void
v3dv_meta_clear_finish(struct v3dv_device * device)296 v3dv_meta_clear_finish(struct v3dv_device *device)
297 {
298 VkDevice _device = v3dv_device_to_handle(device);
299
300 if (device->instance->meta_cache_enabled) {
301 hash_table_foreach(device->meta.color_clear.cache, entry) {
302 struct v3dv_meta_color_clear_pipeline *item = entry->data;
303 destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
304 }
305 _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
306
307 hash_table_foreach(device->meta.depth_clear.cache, entry) {
308 struct v3dv_meta_depth_clear_pipeline *item = entry->data;
309 destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);
310 }
311 _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
312 }
313
314 if (device->meta.color_clear.p_layout) {
315 v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
316 &device->vk.alloc);
317 }
318
319 if (device->meta.depth_clear.p_layout) {
320 v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
321 &device->vk.alloc);
322 }
323 }
324
325 static nir_shader *
get_clear_rect_vs(const nir_shader_compiler_options * options)326 get_clear_rect_vs(const nir_shader_compiler_options *options)
327 {
328 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
329 "meta clear vs");
330
331 const struct glsl_type *vec4 = glsl_vec4_type();
332 nir_variable *vs_out_pos =
333 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
334 vs_out_pos->data.location = VARYING_SLOT_POS;
335
336 nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
337 nir_store_var(&b, vs_out_pos, pos, 0xf);
338
339 return b.shader;
340 }
341
342 static nir_shader *
get_clear_rect_gs(const nir_shader_compiler_options * options,uint32_t push_constant_layer_base)343 get_clear_rect_gs(const nir_shader_compiler_options *options,
344 uint32_t push_constant_layer_base)
345 {
346 /* FIXME: this creates a geometry shader that takes the index of a single
347 * layer to clear from push constants, so we need to emit a draw call for
348 * each layer that we want to clear. We could actually do better and have it
349 * take a range of layers and then emit one triangle per layer to clear,
350 * however, if we were to do this we would need to be careful not to exceed
351 * the maximum number of output vertices allowed in a geometry shader.
352 */
353 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
354 "meta clear gs");
355 nir_shader *nir = b.shader;
356 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
357 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
358 (1ull << VARYING_SLOT_LAYER);
359 nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
360 nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
361 nir->info.gs.vertices_in = 3;
362 nir->info.gs.vertices_out = 3;
363 nir->info.gs.invocations = 1;
364 nir->info.gs.active_stream_mask = 0x1;
365
366 /* in vec4 gl_Position[3] */
367 nir_variable *gs_in_pos =
368 nir_variable_create(b.shader, nir_var_shader_in,
369 glsl_array_type(glsl_vec4_type(), 3, 0),
370 "in_gl_Position");
371 gs_in_pos->data.location = VARYING_SLOT_POS;
372
373 /* out vec4 gl_Position */
374 nir_variable *gs_out_pos =
375 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
376 "out_gl_Position");
377 gs_out_pos->data.location = VARYING_SLOT_POS;
378
379 /* out float gl_Layer */
380 nir_variable *gs_out_layer =
381 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
382 "out_gl_Layer");
383 gs_out_layer->data.location = VARYING_SLOT_LAYER;
384
385 /* Emit output triangle */
386 for (uint32_t i = 0; i < 3; i++) {
387 /* gl_Position from shader input */
388 nir_deref_instr *in_pos_i =
389 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
390 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
391
392 /* gl_Layer from push constants */
393 nir_def *layer =
394 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
395 .base = push_constant_layer_base, .range = 4);
396 nir_store_var(&b, gs_out_layer, layer, 0x1);
397
398 nir_emit_vertex(&b, 0);
399 }
400
401 nir_end_primitive(&b, 0);
402
403 return nir;
404 }
405
406 static nir_shader *
get_color_clear_rect_fs(const nir_shader_compiler_options * options,uint32_t rt_idx,VkFormat format)407 get_color_clear_rect_fs(const nir_shader_compiler_options *options,
408 uint32_t rt_idx, VkFormat format)
409 {
410 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
411 "meta clear fs");
412
413 enum pipe_format pformat = vk_format_to_pipe_format(format);
414 const struct glsl_type *fs_out_type =
415 util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
416
417 nir_variable *fs_out_color =
418 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
419 fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
420
421 nir_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
422 nir_store_var(&b, fs_out_color, color_load, 0xf);
423
424 return b.shader;
425 }
426
427 static nir_shader *
get_depth_clear_rect_fs(const nir_shader_compiler_options * options)428 get_depth_clear_rect_fs(const nir_shader_compiler_options *options)
429 {
430 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
431 "meta depth clear fs");
432
433 nir_variable *fs_out_depth =
434 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
435 "out_depth");
436 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
437
438 nir_def *depth_load =
439 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
440
441 nir_store_var(&b, fs_out_depth, depth_load, 0x1);
442
443 return b.shader;
444 }
445
446 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)447 create_pipeline(struct v3dv_device *device,
448 struct v3dv_render_pass *pass,
449 uint32_t subpass_idx,
450 uint32_t samples,
451 struct nir_shader *vs_nir,
452 struct nir_shader *gs_nir,
453 struct nir_shader *fs_nir,
454 const VkPipelineVertexInputStateCreateInfo *vi_state,
455 const VkPipelineDepthStencilStateCreateInfo *ds_state,
456 const VkPipelineColorBlendStateCreateInfo *cb_state,
457 const VkPipelineLayout layout,
458 VkPipeline *pipeline)
459 {
460 VkPipelineShaderStageCreateInfo stages[3] = { 0 };
461 struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
462 struct vk_shader_module gs_m;
463 struct vk_shader_module fs_m;
464
465 uint32_t stage_count = 0;
466 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
467 stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
468 stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
469 stages[stage_count].pName = "main";
470 stage_count++;
471
472 if (gs_nir) {
473 gs_m = vk_shader_module_from_nir(gs_nir);
474 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
475 stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
476 stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
477 stages[stage_count].pName = "main";
478 stage_count++;
479 }
480
481 if (fs_nir) {
482 fs_m = vk_shader_module_from_nir(fs_nir);
483 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
484 stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
485 stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
486 stages[stage_count].pName = "main";
487 stage_count++;
488 }
489
490 VkGraphicsPipelineCreateInfo info = {
491 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
492
493 .stageCount = stage_count,
494 .pStages = stages,
495
496 .pVertexInputState = vi_state,
497
498 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
499 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
500 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
501 .primitiveRestartEnable = false,
502 },
503
504 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
505 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
506 .viewportCount = 1,
507 .scissorCount = 1,
508 },
509
510 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
511 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
512 .rasterizerDiscardEnable = false,
513 .polygonMode = VK_POLYGON_MODE_FILL,
514 .cullMode = VK_CULL_MODE_NONE,
515 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
516 .depthBiasEnable = false,
517 },
518
519 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
520 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
521 .rasterizationSamples = samples,
522 .sampleShadingEnable = false,
523 .pSampleMask = NULL,
524 .alphaToCoverageEnable = false,
525 .alphaToOneEnable = false,
526 },
527
528 .pDepthStencilState = ds_state,
529
530 .pColorBlendState = cb_state,
531
532 /* The meta clear pipeline declares all state as dynamic.
533 * As a consequence, vkCmdBindPipeline writes no dynamic state
534 * to the cmd buffer. Therefore, at the end of the meta clear,
535 * we need only restore dynamic state that was vkCmdSet.
536 */
537 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
538 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
539 .dynamicStateCount = 6,
540 .pDynamicStates = (VkDynamicState[]) {
541 VK_DYNAMIC_STATE_VIEWPORT,
542 VK_DYNAMIC_STATE_SCISSOR,
543 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
544 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
545 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
546 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
547 VK_DYNAMIC_STATE_DEPTH_BIAS,
548 VK_DYNAMIC_STATE_LINE_WIDTH,
549 },
550 },
551
552 .flags = 0,
553 .layout = layout,
554 .renderPass = v3dv_render_pass_to_handle(pass),
555 .subpass = subpass_idx,
556 };
557
558 VkResult result =
559 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
560 VK_NULL_HANDLE,
561 1, &info,
562 &device->vk.alloc,
563 pipeline);
564
565 ralloc_free(vs_nir);
566 ralloc_free(gs_nir);
567 ralloc_free(fs_nir);
568
569 return result;
570 }
571
572 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)573 create_color_clear_pipeline(struct v3dv_device *device,
574 struct v3dv_render_pass *pass,
575 uint32_t subpass_idx,
576 uint32_t rt_idx,
577 VkFormat format,
578 VkSampleCountFlagBits samples,
579 uint32_t components,
580 bool is_layered,
581 VkPipelineLayout pipeline_layout,
582 VkPipeline *pipeline)
583 {
584 const nir_shader_compiler_options *options =
585 v3dv_pipeline_get_nir_options(&device->devinfo);
586
587 nir_shader *vs_nir = get_clear_rect_vs(options);
588 nir_shader *fs_nir = get_color_clear_rect_fs(options, rt_idx, format);
589 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(options, 16) : NULL;
590
591 const VkPipelineVertexInputStateCreateInfo vi_state = {
592 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
593 .vertexBindingDescriptionCount = 0,
594 .vertexAttributeDescriptionCount = 0,
595 };
596
597 const VkPipelineDepthStencilStateCreateInfo ds_state = {
598 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
599 .depthTestEnable = false,
600 .depthWriteEnable = false,
601 .depthBoundsTestEnable = false,
602 .stencilTestEnable = false,
603 };
604
605 assert(subpass_idx < pass->subpass_count);
606 const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
607 assert(rt_idx < color_count);
608
609 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
610 for (uint32_t i = 0; i < color_count; i++) {
611 blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
612 .blendEnable = false,
613 .colorWriteMask = i == rt_idx ? components : 0,
614 };
615 }
616
617 const VkPipelineColorBlendStateCreateInfo cb_state = {
618 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
619 .logicOpEnable = false,
620 .attachmentCount = color_count,
621 .pAttachments = blend_att_state
622 };
623
624 return create_pipeline(device,
625 pass, subpass_idx,
626 samples,
627 vs_nir, gs_nir, fs_nir,
628 &vi_state,
629 &ds_state,
630 &cb_state,
631 pipeline_layout,
632 pipeline);
633 }
634
635 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)636 create_depth_clear_pipeline(struct v3dv_device *device,
637 VkImageAspectFlags aspects,
638 struct v3dv_render_pass *pass,
639 uint32_t subpass_idx,
640 uint32_t samples,
641 bool is_layered,
642 VkPipelineLayout pipeline_layout,
643 VkPipeline *pipeline)
644 {
645 const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
646 const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
647 assert(has_depth || has_stencil);
648
649 const nir_shader_compiler_options *options =
650 v3dv_pipeline_get_nir_options(&device->devinfo);
651
652 nir_shader *vs_nir = get_clear_rect_vs(options);
653 nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs(options) : NULL;
654 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(options, 4) : NULL;
655
656 const VkPipelineVertexInputStateCreateInfo vi_state = {
657 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
658 .vertexBindingDescriptionCount = 0,
659 .vertexAttributeDescriptionCount = 0,
660 };
661
662 const VkPipelineDepthStencilStateCreateInfo ds_state = {
663 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
664 .depthTestEnable = has_depth,
665 .depthWriteEnable = has_depth,
666 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
667 .depthBoundsTestEnable = false,
668 .stencilTestEnable = has_stencil,
669 .front = {
670 .passOp = VK_STENCIL_OP_REPLACE,
671 .compareOp = VK_COMPARE_OP_ALWAYS,
672 /* compareMask, writeMask and reference are dynamic state */
673 },
674 .back = { 0 },
675 };
676
677 assert(subpass_idx < pass->subpass_count);
678 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
679 const VkPipelineColorBlendStateCreateInfo cb_state = {
680 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
681 .logicOpEnable = false,
682 .attachmentCount = pass->subpasses[subpass_idx].color_count,
683 .pAttachments = blend_att_state,
684 };
685
686 return create_pipeline(device,
687 pass, subpass_idx,
688 samples,
689 vs_nir, gs_nir, fs_nir,
690 &vi_state,
691 &ds_state,
692 &cb_state,
693 pipeline_layout,
694 pipeline);
695 }
696
697 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,VkRenderPass * pass)698 create_color_clear_render_pass(struct v3dv_device *device,
699 uint32_t rt_idx,
700 VkFormat format,
701 VkSampleCountFlagBits samples,
702 VkRenderPass *pass)
703 {
704 VkAttachmentDescription2 att = {
705 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
706 .format = format,
707 .samples = samples,
708 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
709 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
710 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
711 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
712 };
713
714 VkAttachmentReference2 att_ref = {
715 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
716 .attachment = rt_idx,
717 .layout = VK_IMAGE_LAYOUT_GENERAL,
718 };
719
720 VkSubpassDescription2 subpass = {
721 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
722 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
723 .inputAttachmentCount = 0,
724 .colorAttachmentCount = 1,
725 .pColorAttachments = &att_ref,
726 .pResolveAttachments = NULL,
727 .pDepthStencilAttachment = NULL,
728 .preserveAttachmentCount = 0,
729 .pPreserveAttachments = NULL,
730 };
731
732 VkRenderPassCreateInfo2 info = {
733 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
734 .attachmentCount = 1,
735 .pAttachments = &att,
736 .subpassCount = 1,
737 .pSubpasses = &subpass,
738 .dependencyCount = 0,
739 .pDependencies = NULL,
740 };
741
742 return v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
743 &info, &device->vk.alloc, pass);
744 }
745
746 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,bool has_multiview)747 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
748 VkFormat format,
749 VkSampleCountFlagBits samples,
750 uint32_t components,
751 bool is_layered,
752 bool has_multiview)
753 {
754 assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
755
756 uint64_t key = 0;
757 uint32_t bit_offset = 0;
758
759 key |= rt_idx;
760 bit_offset += 3;
761
762 key |= ((uint64_t) format) << bit_offset;
763 bit_offset += 32;
764
765 key |= ((uint64_t) samples) << bit_offset;
766 bit_offset += 4;
767
768 key |= ((uint64_t) components) << bit_offset;
769 bit_offset += 4;
770
771 key |= (is_layered ? 1ull : 0ull) << bit_offset;
772 bit_offset += 1;
773
774 key |= (has_multiview ? 1ull : 0ull) << bit_offset;
775 bit_offset += 1;
776
777 assert(bit_offset <= 64);
778 return key;
779 }
780
781 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples,bool is_layered,bool has_multiview)782 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
783 VkFormat format,
784 uint32_t samples,
785 bool is_layered,
786 bool has_multiview)
787 {
788 uint64_t key = 0;
789 uint32_t bit_offset = 0;
790
791 key |= format;
792 bit_offset += 32;
793
794 key |= ((uint64_t) samples) << bit_offset;
795 bit_offset += 4;
796
797 const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
798 key |= ((uint64_t) has_depth) << bit_offset;
799 bit_offset++;
800
801 const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
802 key |= ((uint64_t) has_stencil) << bit_offset;
803 bit_offset++;;
804
805 key |= (is_layered ? 1ull : 0ull) << bit_offset;
806 bit_offset += 1;
807
808 key |= (has_multiview ? 1ull : 0ull) << bit_offset;
809 bit_offset += 1;
810
811 assert(bit_offset <= 64);
812 return key;
813 }
814
815 static VkResult
get_color_clear_pipeline(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,bool has_multiview,struct v3dv_meta_color_clear_pipeline ** pipeline)816 get_color_clear_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
817 struct v3dv_render_pass *pass,
818 uint32_t subpass_idx,
819 uint32_t rt_idx,
820 uint32_t attachment_idx,
821 VkFormat format,
822 VkSampleCountFlagBits samples,
823 uint32_t components,
824 bool is_layered,
825 bool has_multiview,
826 struct v3dv_meta_color_clear_pipeline **pipeline)
827 {
828 assert(vk_format_is_color(format));
829 struct v3dv_device *device = cmd_buffer->device;
830
831 VkResult result = VK_SUCCESS;
832
833 /* If pass != NULL it means that we are emitting the clear as a draw call
834 * in the current pass bound by the application. In that case, we can't
835 * cache the pipeline, since it will be referencing that pass and the
836 * application could be destroying it at any point. Hopefully, the perf
837 * impact is not too big since we still have the device pipeline cache
838 * around and we won't end up re-compiling the clear shader.
839 *
840 * FIXME: alternatively, we could refcount (or maybe clone) the render pass
841 * provided by the application and include it in the pipeline key setup
842 * to make caching safe in this scenario, however, based on tests with
843 * vkQuake3, the fact that we are not caching here doesn't seem to have
844 * any significant impact in performance, so it might not be worth it.
845 */
846 const bool can_cache_pipeline =
847 (pass == NULL) && (device->instance->meta_cache_enabled);
848
849 uint64_t key;
850 if (can_cache_pipeline) {
851 key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
852 components, is_layered,
853 has_multiview);
854 mtx_lock(&device->meta.mtx);
855 struct hash_entry *entry =
856 _mesa_hash_table_search(device->meta.color_clear.cache, &key);
857 if (entry) {
858 mtx_unlock(&device->meta.mtx);
859 *pipeline = entry->data;
860 return VK_SUCCESS;
861 }
862 }
863
864 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
865 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
866
867 if (*pipeline == NULL) {
868 result = VK_ERROR_OUT_OF_HOST_MEMORY;
869 goto fail;
870 }
871
872 if (!pass) {
873 result = create_color_clear_render_pass(device,
874 rt_idx,
875 format,
876 samples,
877 &(*pipeline)->pass);
878 if (result != VK_SUCCESS)
879 goto fail;
880
881 pass = v3dv_render_pass_from_handle((*pipeline)->pass);
882 } else {
883 (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
884 }
885
886 result = create_color_clear_pipeline(device,
887 pass,
888 subpass_idx,
889 rt_idx,
890 format,
891 samples,
892 components,
893 is_layered,
894 device->meta.color_clear.p_layout,
895 &(*pipeline)->pipeline);
896 if (result != VK_SUCCESS)
897 goto fail;
898
899 if (can_cache_pipeline) {
900 (*pipeline)->key = key;
901 (*pipeline)->cached = true;
902 _mesa_hash_table_insert(device->meta.color_clear.cache,
903 &(*pipeline)->key, *pipeline);
904
905 mtx_unlock(&device->meta.mtx);
906 } else {
907 v3dv_cmd_buffer_add_private_obj(
908 cmd_buffer, (uintptr_t)*pipeline,
909 (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_color_clear_pipeline);
910 }
911
912 return VK_SUCCESS;
913
914 fail:
915 if (can_cache_pipeline)
916 mtx_unlock(&device->meta.mtx);
917
918 VkDevice _device = v3dv_device_to_handle(device);
919 if (*pipeline) {
920 if ((*pipeline)->cached)
921 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
922 if ((*pipeline)->pipeline)
923 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
924 vk_free(&device->vk.alloc, *pipeline);
925 *pipeline = NULL;
926 }
927
928 return result;
929 }
930
931 static VkResult
get_depth_clear_pipeline(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,bool is_layered,bool has_multiview,struct v3dv_meta_depth_clear_pipeline ** pipeline)932 get_depth_clear_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
933 VkImageAspectFlags aspects,
934 struct v3dv_render_pass *pass,
935 uint32_t subpass_idx,
936 uint32_t attachment_idx,
937 bool is_layered,
938 bool has_multiview,
939 struct v3dv_meta_depth_clear_pipeline **pipeline)
940 {
941 assert(subpass_idx < pass->subpass_count);
942 assert(attachment_idx != VK_ATTACHMENT_UNUSED);
943 assert(attachment_idx < pass->attachment_count);
944
945 VkResult result = VK_SUCCESS;
946 struct v3dv_device *device = cmd_buffer->device;
947
948 const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
949 const VkFormat format = pass->attachments[attachment_idx].desc.format;
950 assert(vk_format_is_depth_or_stencil(format));
951
952 uint64_t key;
953 if (device->instance->meta_cache_enabled) {
954 key = get_depth_clear_pipeline_cache_key(aspects, format, samples,
955 is_layered, has_multiview);
956 mtx_lock(&device->meta.mtx);
957 struct hash_entry *entry =
958 _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
959 if (entry) {
960 mtx_unlock(&device->meta.mtx);
961 *pipeline = entry->data;
962 return VK_SUCCESS;
963 }
964 }
965
966 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
967 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
968
969 if (*pipeline == NULL) {
970 result = VK_ERROR_OUT_OF_HOST_MEMORY;
971 goto fail;
972 }
973
974 result = create_depth_clear_pipeline(device,
975 aspects,
976 pass,
977 subpass_idx,
978 samples,
979 is_layered,
980 device->meta.depth_clear.p_layout,
981 &(*pipeline)->pipeline);
982 if (result != VK_SUCCESS)
983 goto fail;
984
985 if (device->instance->meta_cache_enabled) {
986 (*pipeline)->key = key;
987 _mesa_hash_table_insert(device->meta.depth_clear.cache,
988 &(*pipeline)->key, *pipeline);
989 mtx_unlock(&device->meta.mtx);
990 } else {
991 v3dv_cmd_buffer_add_private_obj(
992 cmd_buffer, (uintptr_t)*pipeline,
993 (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_depth_clear_pipeline);
994 }
995
996 return VK_SUCCESS;
997
998 fail:
999 if (device->instance->meta_cache_enabled)
1000 mtx_unlock(&device->meta.mtx);
1001
1002 VkDevice _device = v3dv_device_to_handle(device);
1003 if (*pipeline) {
1004 if ((*pipeline)->pipeline)
1005 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
1006 vk_free(&device->vk.alloc, *pipeline);
1007 *pipeline = NULL;
1008 }
1009
1010 return result;
1011 }
1012
1013 /* Emits a scissored quad in the clear color */
1014 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1015 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1016 struct v3dv_render_pass *pass,
1017 struct v3dv_subpass *subpass,
1018 uint32_t rt_idx,
1019 const VkClearColorValue *clear_color,
1020 bool is_layered,
1021 bool all_rects_same_layers,
1022 uint32_t rect_count,
1023 const VkClearRect *rects)
1024 {
1025 /* Skip if attachment is unused in the current subpass */
1026 assert(rt_idx < subpass->color_count);
1027 const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1028 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1029 return;
1030
1031 /* Obtain a pipeline for this clear */
1032 assert(attachment_idx < pass->attachment_count);
1033 const VkFormat format = pass->attachments[attachment_idx].desc.format;
1034 const VkSampleCountFlagBits samples =
1035 pass->attachments[attachment_idx].desc.samples;
1036 const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1037 VK_COLOR_COMPONENT_G_BIT |
1038 VK_COLOR_COMPONENT_B_BIT |
1039 VK_COLOR_COMPONENT_A_BIT;
1040
1041 struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1042 VkResult result = get_color_clear_pipeline(cmd_buffer,
1043 pass,
1044 cmd_buffer->state.subpass_idx,
1045 rt_idx,
1046 attachment_idx,
1047 format,
1048 samples,
1049 components,
1050 is_layered,
1051 pass->multiview_enabled,
1052 &pipeline);
1053 if (result != VK_SUCCESS) {
1054 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1055 v3dv_flag_oom(cmd_buffer, NULL);
1056 return;
1057 }
1058 assert(pipeline && pipeline->pipeline);
1059
1060 /* Emit clear rects */
1061 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1062
1063 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1064 v3dv_CmdPushConstants(cmd_buffer_handle,
1065 cmd_buffer->device->meta.depth_clear.p_layout,
1066 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1067 clear_color->float32);
1068
1069 v3dv_CmdBindPipeline(cmd_buffer_handle,
1070 VK_PIPELINE_BIND_POINT_GRAPHICS,
1071 pipeline->pipeline);
1072
1073 for (uint32_t i = 0; i < rect_count; i++) {
1074 const VkViewport viewport = {
1075 .x = rects[i].rect.offset.x,
1076 .y = rects[i].rect.offset.y,
1077 .width = rects[i].rect.extent.width,
1078 .height = rects[i].rect.extent.height,
1079 .minDepth = 0.0f,
1080 .maxDepth = 1.0f
1081 };
1082 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1083 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1084
1085 if (is_layered) {
1086 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1087 layer_offset++) {
1088 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1089 v3dv_CmdPushConstants(cmd_buffer_handle,
1090 cmd_buffer->device->meta.depth_clear.p_layout,
1091 VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1092 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1093 }
1094 } else {
1095 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1096 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1097 }
1098 }
1099
1100 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1101 }
1102
1103 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1104 * and the stencil aspect by using stencil testing.
1105 */
1106 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1107 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1108 struct v3dv_render_pass *pass,
1109 struct v3dv_subpass *subpass,
1110 VkImageAspectFlags aspects,
1111 const VkClearDepthStencilValue *clear_ds,
1112 bool is_layered,
1113 bool all_rects_same_layers,
1114 uint32_t rect_count,
1115 const VkClearRect *rects)
1116 {
1117 /* Skip if attachment is unused in the current subpass */
1118 const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1119 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1120 return;
1121
1122 /* Obtain a pipeline for this clear */
1123 assert(attachment_idx < pass->attachment_count);
1124 struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1125
1126 VkResult result = get_depth_clear_pipeline(cmd_buffer,
1127 aspects,
1128 pass,
1129 cmd_buffer->state.subpass_idx,
1130 attachment_idx,
1131 is_layered,
1132 pass->multiview_enabled,
1133 &pipeline);
1134 if (result != VK_SUCCESS) {
1135 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1136 v3dv_flag_oom(cmd_buffer, NULL);
1137 return;
1138 }
1139 assert(pipeline && pipeline->pipeline);
1140
1141 /* Emit clear rects */
1142 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1143
1144 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1145 v3dv_CmdPushConstants(cmd_buffer_handle,
1146 cmd_buffer->device->meta.depth_clear.p_layout,
1147 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1148 &clear_ds->depth);
1149
1150 v3dv_CmdBindPipeline(cmd_buffer_handle,
1151 VK_PIPELINE_BIND_POINT_GRAPHICS,
1152 pipeline->pipeline);
1153
1154 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1155 vk_common_CmdSetStencilReference(cmd_buffer_handle,
1156 VK_STENCIL_FACE_FRONT_AND_BACK,
1157 clear_ds->stencil);
1158 vk_common_CmdSetStencilWriteMask(cmd_buffer_handle,
1159 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1160 vk_common_CmdSetStencilCompareMask(cmd_buffer_handle,
1161 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1162 }
1163
1164 for (uint32_t i = 0; i < rect_count; i++) {
1165 const VkViewport viewport = {
1166 .x = rects[i].rect.offset.x,
1167 .y = rects[i].rect.offset.y,
1168 .width = rects[i].rect.extent.width,
1169 .height = rects[i].rect.extent.height,
1170 .minDepth = 0.0f,
1171 .maxDepth = 1.0f
1172 };
1173 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1174 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1175 if (is_layered) {
1176 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1177 layer_offset++) {
1178 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1179 v3dv_CmdPushConstants(cmd_buffer_handle,
1180 cmd_buffer->device->meta.depth_clear.p_layout,
1181 VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1182 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1183 }
1184 } else {
1185 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1186 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1187 }
1188 }
1189
1190 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1191 }
1192
1193 static void
gather_layering_info(uint32_t rect_count,const VkClearRect * rects,bool * is_layered,bool * all_rects_same_layers)1194 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1195 bool *is_layered, bool *all_rects_same_layers)
1196 {
1197 *all_rects_same_layers = true;
1198
1199 uint32_t min_layer = rects[0].baseArrayLayer;
1200 uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1201 for (uint32_t i = 1; i < rect_count; i++) {
1202 if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1203 rects[i].layerCount != rects[i - 1].layerCount) {
1204 *all_rects_same_layers = false;
1205 min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1206 max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1207 rects[i].layerCount - 1);
1208 }
1209 }
1210
1211 *is_layered = !(min_layer == 0 && max_layer == 0);
1212 }
1213
1214 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1215 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1216 uint32_t attachmentCount,
1217 const VkClearAttachment *pAttachments,
1218 uint32_t rectCount,
1219 const VkClearRect *pRects)
1220 {
1221 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1222
1223 /* We can have at most max_color_RTs + 1 D/S attachments */
1224 assert(attachmentCount <=
1225 V3D_MAX_RENDER_TARGETS(cmd_buffer->device->devinfo.ver) + 1);
1226
1227 /* We can only clear attachments in the current subpass */
1228 struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1229
1230 assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1231 struct v3dv_subpass *subpass =
1232 &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1233
1234 /* Emit a clear rect inside the current job for this subpass. For layered
1235 * framebuffers, we use a geometry shader to redirect clears to the
1236 * appropriate layers.
1237 */
1238
1239 v3dv_cmd_buffer_pause_occlusion_query(cmd_buffer);
1240
1241 bool is_layered, all_rects_same_layers;
1242 gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1243 for (uint32_t i = 0; i < attachmentCount; i++) {
1244 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1245 emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1246 pAttachments[i].colorAttachment,
1247 &pAttachments[i].clearValue.color,
1248 is_layered, all_rects_same_layers,
1249 rectCount, pRects);
1250 } else {
1251 emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1252 pAttachments[i].aspectMask,
1253 &pAttachments[i].clearValue.depthStencil,
1254 is_layered, all_rects_same_layers,
1255 rectCount, pRects);
1256 }
1257 }
1258
1259 v3dv_cmd_buffer_resume_occlusion_query(cmd_buffer);
1260 }
1261