xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/meta/radv_meta_decompress.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <assert.h>
8 #include <stdbool.h>
9 
10 #include "radv_meta.h"
11 #include "sid.h"
12 
13 static nir_shader *
build_expand_depth_stencil_compute_shader(struct radv_device * dev)14 build_expand_depth_stencil_compute_shader(struct radv_device *dev)
15 {
16    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
17 
18    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "expand_depth_stencil_compute");
19 
20    /* We need at least 8/8/1 to cover an entire HTILE block in a single workgroup. */
21    b.shader->info.workgroup_size[0] = 8;
22    b.shader->info.workgroup_size[1] = 8;
23    nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
24    input_img->data.descriptor_set = 0;
25    input_img->data.binding = 0;
26 
27    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
28    output_img->data.descriptor_set = 0;
29    output_img->data.binding = 1;
30 
31    nir_def *invoc_id = nir_load_local_invocation_id(&b);
32    nir_def *wg_id = nir_load_workgroup_id(&b);
33    nir_def *block_size = nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
34                                        b.shader->info.workgroup_size[2], 0);
35 
36    nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
37 
38    nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, global_id,
39                                         nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
40 
41    /* We need a SCOPE_DEVICE memory_scope because ACO will avoid
42     * creating a vmcnt(0) because it expects the L1 cache to keep memory
43     * operations in-order for the same workgroup. The vmcnt(0) seems
44     * necessary however. */
45    nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
46                .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
47 
48    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_undef(&b, 1, 32), data,
49                          nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
50    return b.shader;
51 }
52 
53 static VkResult
create_pipeline_cs(struct radv_device * device,VkPipeline * pipeline)54 create_pipeline_cs(struct radv_device *device, VkPipeline *pipeline)
55 {
56    VkResult result = VK_SUCCESS;
57 
58    const VkDescriptorSetLayoutBinding bindings[] = {
59       {
60          .binding = 0,
61          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
62          .descriptorCount = 1,
63          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
64       },
65       {
66          .binding = 1,
67          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
68          .descriptorCount = 1,
69          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
70       },
71 
72    };
73 
74    result = radv_meta_create_descriptor_set_layout(device, 2, bindings,
75                                                    &device->meta_state.expand_depth_stencil_compute_ds_layout);
76    if (result != VK_SUCCESS)
77        return result;
78 
79    result = radv_meta_create_pipeline_layout(device, &device->meta_state.expand_depth_stencil_compute_ds_layout, 0,
80                                              NULL, &device->meta_state.expand_depth_stencil_compute_p_layout);
81    if (result != VK_SUCCESS)
82        return result;
83 
84    nir_shader *cs = build_expand_depth_stencil_compute_shader(device);
85 
86    result =
87       radv_meta_create_compute_pipeline(device, cs, device->meta_state.expand_depth_stencil_compute_p_layout, pipeline);
88 
89    ralloc_free(cs);
90    return result;
91 }
92 
93 static VkResult
create_pipeline_gfx(struct radv_device * device,uint32_t samples,VkPipelineLayout layout,VkPipeline * pipeline)94 create_pipeline_gfx(struct radv_device *device, uint32_t samples, VkPipelineLayout layout, VkPipeline *pipeline)
95 {
96    VkResult result;
97    VkDevice device_h = radv_device_to_handle(device);
98 
99    if (!device->meta_state.depth_decomp.p_layout) {
100       result = radv_meta_create_pipeline_layout(device, NULL, 0, NULL, &device->meta_state.depth_decomp.p_layout);
101       if (result != VK_SUCCESS)
102          return result;
103    }
104 
105    nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
106    nir_shader *fs_module = radv_meta_build_nir_fs_noop(device);
107 
108    const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
109       .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
110       .sampleLocationsEnable = false,
111    };
112 
113    const VkPipelineRenderingCreateInfo rendering_create_info = {
114       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
115       .depthAttachmentFormat = VK_FORMAT_D32_SFLOAT_S8_UINT,
116       .stencilAttachmentFormat = VK_FORMAT_D32_SFLOAT_S8_UINT,
117    };
118 
119    const VkGraphicsPipelineCreateInfo pipeline_create_info = {
120       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
121       .pNext = &rendering_create_info,
122       .stageCount = 2,
123       .pStages =
124          (VkPipelineShaderStageCreateInfo[]){
125             {
126                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
127                .stage = VK_SHADER_STAGE_VERTEX_BIT,
128                .module = vk_shader_module_handle_from_nir(vs_module),
129                .pName = "main",
130             },
131             {
132                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
133                .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
134                .module = vk_shader_module_handle_from_nir(fs_module),
135                .pName = "main",
136             },
137          },
138       .pVertexInputState =
139          &(VkPipelineVertexInputStateCreateInfo){
140             .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
141             .vertexBindingDescriptionCount = 0,
142             .vertexAttributeDescriptionCount = 0,
143          },
144       .pInputAssemblyState =
145          &(VkPipelineInputAssemblyStateCreateInfo){
146             .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
147             .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
148             .primitiveRestartEnable = false,
149          },
150       .pViewportState =
151          &(VkPipelineViewportStateCreateInfo){
152             .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
153             .viewportCount = 1,
154             .scissorCount = 1,
155          },
156       .pRasterizationState =
157          &(VkPipelineRasterizationStateCreateInfo){
158             .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
159             .depthClampEnable = false,
160             .rasterizerDiscardEnable = false,
161             .polygonMode = VK_POLYGON_MODE_FILL,
162             .cullMode = VK_CULL_MODE_NONE,
163             .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
164          },
165       .pMultisampleState =
166          &(VkPipelineMultisampleStateCreateInfo){
167             .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
168             .pNext = &sample_locs_create_info,
169             .rasterizationSamples = samples,
170             .sampleShadingEnable = false,
171             .pSampleMask = NULL,
172             .alphaToCoverageEnable = false,
173             .alphaToOneEnable = false,
174          },
175       .pColorBlendState =
176          &(VkPipelineColorBlendStateCreateInfo){
177             .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
178             .logicOpEnable = false,
179             .attachmentCount = 0,
180             .pAttachments = NULL,
181          },
182       .pDepthStencilState =
183          &(VkPipelineDepthStencilStateCreateInfo){
184             .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
185             .depthTestEnable = false,
186             .depthWriteEnable = false,
187             .depthBoundsTestEnable = false,
188             .stencilTestEnable = false,
189          },
190       .pDynamicState =
191          &(VkPipelineDynamicStateCreateInfo){
192             .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
193             .dynamicStateCount = 3,
194             .pDynamicStates =
195                (VkDynamicState[]){
196                   VK_DYNAMIC_STATE_VIEWPORT,
197                   VK_DYNAMIC_STATE_SCISSOR,
198                   VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
199                },
200          },
201       .layout = layout,
202       .renderPass = VK_NULL_HANDLE,
203       .subpass = 0,
204    };
205 
206    struct radv_graphics_pipeline_create_info extra = {
207       .use_rectlist = true,
208       .depth_compress_disable = true,
209       .stencil_compress_disable = true,
210    };
211 
212    result = radv_graphics_pipeline_create(device_h, device->meta_state.cache, &pipeline_create_info, &extra,
213                                           &device->meta_state.alloc, pipeline);
214 
215    ralloc_free(fs_module);
216    ralloc_free(vs_module);
217    return result;
218 }
219 
220 void
radv_device_finish_meta_depth_decomp_state(struct radv_device * device)221 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
222 {
223    struct radv_meta_state *state = &device->meta_state;
224 
225    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp.p_layout, &state->alloc);
226    for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp.decompress_pipeline); ++i) {
227       radv_DestroyPipeline(radv_device_to_handle(device), state->depth_decomp.decompress_pipeline[i], &state->alloc);
228    }
229 
230    radv_DestroyPipeline(radv_device_to_handle(device), state->expand_depth_stencil_compute_pipeline, &state->alloc);
231    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->expand_depth_stencil_compute_p_layout,
232                               &state->alloc);
233    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
234                                                         state->expand_depth_stencil_compute_ds_layout, &state->alloc);
235 }
236 
237 VkResult
radv_device_init_meta_depth_decomp_state(struct radv_device * device,bool on_demand)238 radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
239 {
240    struct radv_meta_state *state = &device->meta_state;
241    VkResult res = VK_SUCCESS;
242 
243    if (on_demand)
244       return res;
245 
246    for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp.decompress_pipeline); ++i) {
247       uint32_t samples = 1 << i;
248 
249       res = create_pipeline_gfx(device, samples, state->depth_decomp.p_layout,
250                                 &state->depth_decomp.decompress_pipeline[i]);
251       if (res != VK_SUCCESS)
252          return res;
253    }
254 
255    return create_pipeline_cs(device, &state->expand_depth_stencil_compute_pipeline);
256 }
257 
258 static VkResult
get_pipeline_gfx(struct radv_device * device,struct radv_image * image,VkPipeline * pipeline_out)259 get_pipeline_gfx(struct radv_device *device, struct radv_image *image, VkPipeline *pipeline_out)
260 {
261    struct radv_meta_state *state = &device->meta_state;
262    uint32_t samples = image->vk.samples;
263    uint32_t samples_log2 = ffs(samples) - 1;
264    VkResult result = VK_SUCCESS;
265 
266    mtx_lock(&state->mtx);
267    if (!state->depth_decomp.decompress_pipeline[samples_log2]) {
268       result = create_pipeline_gfx(device, samples, state->depth_decomp.p_layout,
269                                    &state->depth_decomp.decompress_pipeline[samples_log2]);
270       if (result != VK_SUCCESS)
271          goto fail;
272    }
273 
274    *pipeline_out = state->depth_decomp.decompress_pipeline[samples_log2];
275 
276 fail:
277    mtx_unlock(&state->mtx);
278    return result;
279 }
280 
281 static void
radv_process_depth_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,int level,int layer)282 radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
283                                const VkImageSubresourceRange *range, int level, int layer)
284 {
285    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
286    struct radv_image_view iview;
287    uint32_t width, height;
288 
289    width = u_minify(image->vk.extent.width, range->baseMipLevel + level);
290    height = u_minify(image->vk.extent.height, range->baseMipLevel + level);
291 
292    radv_image_view_init(&iview, device,
293                         &(VkImageViewCreateInfo){
294                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
295                            .image = radv_image_to_handle(image),
296                            .viewType = radv_meta_get_view_type(image),
297                            .format = image->vk.format,
298                            .subresourceRange =
299                               {
300                                  .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
301                                  .baseMipLevel = range->baseMipLevel + level,
302                                  .levelCount = 1,
303                                  .baseArrayLayer = range->baseArrayLayer + layer,
304                                  .layerCount = 1,
305                               },
306                         },
307                         0, NULL);
308 
309    const VkRenderingAttachmentInfo depth_att = {
310       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
311       .imageView = radv_image_view_to_handle(&iview),
312       .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
313       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
314       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
315    };
316 
317    const VkRenderingAttachmentInfo stencil_att = {
318       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
319       .imageView = radv_image_view_to_handle(&iview),
320       .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
321       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
322       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
323    };
324 
325    const VkRenderingInfo rendering_info = {
326       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
327       .flags = VK_RENDERING_INPUT_ATTACHMENT_NO_CONCURRENT_WRITES_BIT_MESA,
328       .renderArea = {.offset = {0, 0}, .extent = {width, height}},
329       .layerCount = 1,
330       .pDepthAttachment = &depth_att,
331       .pStencilAttachment = &stencil_att,
332    };
333 
334    radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
335 
336    radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
337 
338    radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
339 
340    radv_image_view_finish(&iview);
341 }
342 
343 static void
radv_process_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs)344 radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
345                            const VkImageSubresourceRange *subresourceRange,
346                            struct radv_sample_locations_state *sample_locs)
347 {
348    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
349    struct radv_meta_saved_state saved_state;
350    VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
351    VkPipeline pipeline;
352    VkResult result;
353 
354    result = get_pipeline_gfx(device, image, &pipeline);
355    if (result != VK_SUCCESS) {
356       vk_command_buffer_set_error(&cmd_buffer->vk, result);
357       return;
358    }
359 
360    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER);
361 
362    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
363 
364    if (sample_locs) {
365       assert(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
366 
367       /* Set the sample locations specified during explicit or
368        * automatic layout transitions, otherwise the depth decompress
369        * pass uses the default HW locations.
370        */
371       radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT){
372                                                      .sampleLocationsPerPixel = sample_locs->per_pixel,
373                                                      .sampleLocationGridSize = sample_locs->grid_size,
374                                                      .sampleLocationsCount = sample_locs->count,
375                                                      .pSampleLocations = sample_locs->locations,
376                                                   });
377    }
378 
379    for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); ++l) {
380 
381       /* Do not decompress levels without HTILE. */
382       if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
383          continue;
384 
385       uint32_t width = u_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l);
386       uint32_t height = u_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
387 
388       radv_CmdSetViewport(
389          cmd_buffer_h, 0, 1,
390          &(VkViewport){.x = 0, .y = 0, .width = width, .height = height, .minDepth = 0.0f, .maxDepth = 1.0f});
391 
392       radv_CmdSetScissor(cmd_buffer_h, 0, 1,
393                          &(VkRect2D){
394                             .offset = {0, 0},
395                             .extent = {width, height},
396                          });
397 
398       for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) {
399          radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);
400       }
401    }
402 
403    radv_meta_restore(&saved_state, cmd_buffer);
404 }
405 
406 static VkResult
get_pipeline_cs(struct radv_device * device,VkPipeline * pipeline_out)407 get_pipeline_cs(struct radv_device *device, VkPipeline *pipeline_out)
408 {
409    struct radv_meta_state *state = &device->meta_state;
410    VkResult result = VK_SUCCESS;
411 
412    mtx_lock(&state->mtx);
413    if (!state->expand_depth_stencil_compute_pipeline) {
414       result = create_pipeline_cs(device, &state->expand_depth_stencil_compute_pipeline);
415       if (result != VK_SUCCESS)
416          goto fail;
417    }
418 
419    *pipeline_out = state->expand_depth_stencil_compute_pipeline;
420 
421 fail:
422    mtx_unlock(&state->mtx);
423    return result;
424 }
425 
426 static void
radv_expand_depth_stencil_compute(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)427 radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
428                                   const VkImageSubresourceRange *subresourceRange)
429 {
430    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
431    struct radv_meta_saved_state saved_state;
432    struct radv_image_view load_iview = {0};
433    struct radv_image_view store_iview = {0};
434    VkPipeline pipeline;
435    VkResult result;
436 
437    assert(radv_image_is_tc_compat_htile(image));
438 
439    result = get_pipeline_cs(device, &pipeline);
440    if (result != VK_SUCCESS) {
441       vk_command_buffer_set_error(&cmd_buffer->vk, result);
442       return;
443    }
444 
445    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
446 
447    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
448 
449    for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); l++) {
450       uint32_t width, height;
451 
452       /* Do not decompress levels without HTILE. */
453       if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
454          continue;
455 
456       width = u_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l);
457       height = u_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
458 
459       for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) {
460          radv_image_view_init(&load_iview, device,
461                               &(VkImageViewCreateInfo){
462                                  .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
463                                  .image = radv_image_to_handle(image),
464                                  .viewType = VK_IMAGE_VIEW_TYPE_2D,
465                                  .format = image->vk.format,
466                                  .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
467                                                       .baseMipLevel = subresourceRange->baseMipLevel + l,
468                                                       .levelCount = 1,
469                                                       .baseArrayLayer = subresourceRange->baseArrayLayer + s,
470                                                       .layerCount = 1},
471                               },
472                               0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
473          radv_image_view_init(&store_iview, device,
474                               &(VkImageViewCreateInfo){
475                                  .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
476                                  .image = radv_image_to_handle(image),
477                                  .viewType = VK_IMAGE_VIEW_TYPE_2D,
478                                  .format = image->vk.format,
479                                  .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
480                                                       .baseMipLevel = subresourceRange->baseMipLevel + l,
481                                                       .levelCount = 1,
482                                                       .baseArrayLayer = subresourceRange->baseArrayLayer + s,
483                                                       .layerCount = 1},
484                               },
485                               0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
486 
487          radv_meta_push_descriptor_set(
488             cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.expand_depth_stencil_compute_p_layout, 0, 2,
489             (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
490                                       .dstBinding = 0,
491                                       .dstArrayElement = 0,
492                                       .descriptorCount = 1,
493                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
494                                       .pImageInfo =
495                                          (VkDescriptorImageInfo[]){
496                                             {
497                                                .sampler = VK_NULL_HANDLE,
498                                                .imageView = radv_image_view_to_handle(&load_iview),
499                                                .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
500                                             },
501                                          }},
502                                      {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
503                                       .dstBinding = 1,
504                                       .dstArrayElement = 0,
505                                       .descriptorCount = 1,
506                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
507                                       .pImageInfo = (VkDescriptorImageInfo[]){
508                                          {
509                                             .sampler = VK_NULL_HANDLE,
510                                             .imageView = radv_image_view_to_handle(&store_iview),
511                                             .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
512                                          },
513                                       }}});
514 
515          radv_unaligned_dispatch(cmd_buffer, width, height, 1);
516 
517          radv_image_view_finish(&load_iview);
518          radv_image_view_finish(&store_iview);
519       }
520    }
521 
522    radv_meta_restore(&saved_state, cmd_buffer);
523 
524    cmd_buffer->state.flush_bits |=
525       RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
526       radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, image);
527 
528    /* Initialize the HTILE metadata as "fully expanded". */
529    uint32_t htile_value = radv_get_htile_initial_value(device, image);
530 
531    cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, image, subresourceRange, htile_value);
532 }
533 
534 void
radv_expand_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs)535 radv_expand_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
536                           const VkImageSubresourceRange *subresourceRange,
537                           struct radv_sample_locations_state *sample_locs)
538 {
539    struct radv_barrier_data barrier = {0};
540 
541    barrier.layout_transitions.depth_stencil_expand = 1;
542    radv_describe_layout_transition(cmd_buffer, &barrier);
543 
544    if (cmd_buffer->qf == RADV_QUEUE_GENERAL) {
545       radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs);
546    } else {
547       radv_expand_depth_stencil_compute(cmd_buffer, image, subresourceRange);
548    }
549 }
550