xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/meta/radv_meta_resolve_cs.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Dave Airlie
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <assert.h>
8 #include <stdbool.h>
9 
10 #include "nir/nir_builder.h"
11 #include "nir/nir_format_convert.h"
12 
13 #include "radv_entrypoints.h"
14 #include "radv_formats.h"
15 #include "radv_meta.h"
16 #include "sid.h"
17 #include "vk_common_entrypoints.h"
18 #include "vk_format.h"
19 #include "vk_shader_module.h"
20 
21 static nir_def *
radv_meta_build_resolve_srgb_conversion(nir_builder * b,nir_def * input)22 radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_def *input)
23 {
24    unsigned i;
25    nir_def *comp[4];
26    for (i = 0; i < 3; i++)
27       comp[i] = nir_format_linear_to_srgb(b, nir_channel(b, input, i));
28    comp[3] = nir_channels(b, input, 1 << 3);
29    return nir_vec(b, comp, 4);
30 }
31 
32 static nir_shader *
build_resolve_compute_shader(struct radv_device * dev,bool is_integer,bool is_srgb,int samples)33 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
34 {
35    enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
36    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
37    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type);
38    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples,
39                                          is_integer ? "int" : (is_srgb ? "srgb" : "float"));
40    b.shader->info.workgroup_size[0] = 8;
41    b.shader->info.workgroup_size[1] = 8;
42 
43    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
44    input_img->data.descriptor_set = 0;
45    input_img->data.binding = 0;
46 
47    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
48    output_img->data.descriptor_set = 0;
49    output_img->data.binding = 1;
50 
51    nir_def *global_id = get_global_ids(&b, 2);
52 
53    nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
54    nir_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
55 
56    nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
57    nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
58 
59    nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
60 
61    radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, src_coord);
62 
63    nir_def *outval = nir_load_var(&b, color);
64    if (is_srgb)
65       outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
66 
67    nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1), nir_undef(&b, 1, 32),
68                                  nir_undef(&b, 1, 32));
69 
70    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), outval,
71                          nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
72    return b.shader;
73 }
74 
75 enum {
76    DEPTH_RESOLVE,
77    STENCIL_RESOLVE,
78 };
79 
80 static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)81 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
82 {
83    switch (resolve_mode) {
84    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
85       return "zero";
86    case VK_RESOLVE_MODE_AVERAGE_BIT:
87       return "average";
88    case VK_RESOLVE_MODE_MIN_BIT:
89       return "min";
90    case VK_RESOLVE_MODE_MAX_BIT:
91       return "max";
92    default:
93       unreachable("invalid resolve mode");
94    }
95 }
96 
97 static nir_shader *
build_depth_stencil_resolve_compute_shader(struct radv_device * dev,int samples,int index,VkResolveModeFlagBits resolve_mode)98 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
99                                            VkResolveModeFlagBits resolve_mode)
100 {
101    enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
102    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
103    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type);
104 
105    nir_builder b =
106       radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
107                             index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
108    b.shader->info.workgroup_size[0] = 8;
109    b.shader->info.workgroup_size[1] = 8;
110 
111    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
112    input_img->data.descriptor_set = 0;
113    input_img->data.binding = 0;
114 
115    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
116    output_img->data.descriptor_set = 0;
117    output_img->data.binding = 1;
118 
119    nir_def *global_id = get_global_ids(&b, 3);
120 
121    nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
122 
123    nir_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
124 
125    nir_def *img_coord =
126       nir_vec3(&b, nir_channel(&b, resolve_coord, 0), nir_channel(&b, resolve_coord, 1), nir_channel(&b, global_id, 2));
127 
128    nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
129    nir_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
130 
131    if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
132       for (int i = 1; i < samples; i++) {
133          nir_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
134 
135          switch (resolve_mode) {
136          case VK_RESOLVE_MODE_AVERAGE_BIT:
137             assert(index == DEPTH_RESOLVE);
138             outval = nir_fadd(&b, outval, si);
139             break;
140          case VK_RESOLVE_MODE_MIN_BIT:
141             if (index == DEPTH_RESOLVE)
142                outval = nir_fmin(&b, outval, si);
143             else
144                outval = nir_umin(&b, outval, si);
145             break;
146          case VK_RESOLVE_MODE_MAX_BIT:
147             if (index == DEPTH_RESOLVE)
148                outval = nir_fmax(&b, outval, si);
149             else
150                outval = nir_umax(&b, outval, si);
151             break;
152          default:
153             unreachable("invalid resolve mode");
154          }
155       }
156 
157       if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT)
158          outval = nir_fdiv_imm(&b, outval, samples);
159    }
160 
161    nir_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
162                              nir_channel(&b, img_coord, 2), nir_undef(&b, 1, 32));
163    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32), outval,
164                          nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
165    return b.shader;
166 }
167 
168 static VkResult
create_layout(struct radv_device * device)169 create_layout(struct radv_device *device)
170 {
171    VkResult result = VK_SUCCESS;
172 
173    if (!device->meta_state.resolve_compute.ds_layout) {
174       const VkDescriptorSetLayoutBinding bindings[] = {
175          {
176             .binding = 0,
177             .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
178             .descriptorCount = 1,
179             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
180          },
181          {
182             .binding = 1,
183             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
184             .descriptorCount = 1,
185             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
186          },
187       };
188 
189       result =
190          radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.resolve_compute.ds_layout);
191       if (result != VK_SUCCESS)
192          return result;
193    }
194 
195    if (!device->meta_state.resolve_compute.p_layout) {
196       const VkPushConstantRange pc_range = {
197          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
198          .size = 16,
199       };
200 
201       result = radv_meta_create_pipeline_layout(device, &device->meta_state.resolve_compute.ds_layout, 1, &pc_range,
202                                                 &device->meta_state.resolve_compute.p_layout);
203    }
204 
205    return result;
206 }
207 
208 static VkResult
create_color_resolve_pipeline(struct radv_device * device,int samples,bool is_integer,bool is_srgb,VkPipeline * pipeline)209 create_color_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
210                               VkPipeline *pipeline)
211 {
212    VkResult result;
213 
214    result = create_layout(device);
215    if (result != VK_SUCCESS)
216       return result;
217 
218    nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
219 
220    result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.resolve_compute.p_layout, pipeline);
221 
222    ralloc_free(cs);
223    return result;
224 }
225 
226 static VkResult
create_depth_stencil_resolve_pipeline(struct radv_device * device,int samples,int index,VkResolveModeFlagBits resolve_mode,VkPipeline * pipeline)227 create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
228                                       VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
229 {
230    VkResult result;
231 
232    result = create_layout(device);
233    if (result != VK_SUCCESS)
234       return result;
235 
236    nir_shader *cs = build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
237 
238    result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.resolve_compute.p_layout, pipeline);
239 
240    ralloc_free(cs);
241    return result;
242 }
243 
244 VkResult
radv_device_init_meta_resolve_compute_state(struct radv_device * device,bool on_demand)245 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
246 {
247    struct radv_meta_state *state = &device->meta_state;
248    VkResult res;
249 
250    if (on_demand)
251       return VK_SUCCESS;
252 
253    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
254       uint32_t samples = 1 << i;
255 
256       res = create_color_resolve_pipeline(device, samples, false, false, &state->resolve_compute.rc[i].pipeline);
257       if (res != VK_SUCCESS)
258          return res;
259 
260       res = create_color_resolve_pipeline(device, samples, true, false, &state->resolve_compute.rc[i].i_pipeline);
261       if (res != VK_SUCCESS)
262          return res;
263 
264       res = create_color_resolve_pipeline(device, samples, false, true, &state->resolve_compute.rc[i].srgb_pipeline);
265       if (res != VK_SUCCESS)
266          return res;
267 
268       res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT,
269                                                   &state->resolve_compute.depth[i].average_pipeline);
270       if (res != VK_SUCCESS)
271          return res;
272 
273       res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
274                                                   &state->resolve_compute.depth[i].max_pipeline);
275       if (res != VK_SUCCESS)
276          return res;
277 
278       res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
279                                                   &state->resolve_compute.depth[i].min_pipeline);
280       if (res != VK_SUCCESS)
281          return res;
282 
283       res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
284                                                   &state->resolve_compute.stencil[i].max_pipeline);
285       if (res != VK_SUCCESS)
286          return res;
287 
288       res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
289                                                   &state->resolve_compute.stencil[i].min_pipeline);
290       if (res != VK_SUCCESS)
291          return res;
292    }
293 
294    res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
295                                                &state->resolve_compute.depth_zero_pipeline);
296    if (res != VK_SUCCESS)
297       return res;
298 
299    return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
300                                                 &state->resolve_compute.stencil_zero_pipeline);
301 }
302 
303 void
radv_device_finish_meta_resolve_compute_state(struct radv_device * device)304 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
305 {
306    struct radv_meta_state *state = &device->meta_state;
307    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
308       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline, &state->alloc);
309 
310       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline, &state->alloc);
311 
312       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
313 
314       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].average_pipeline,
315                            &state->alloc);
316 
317       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].max_pipeline, &state->alloc);
318 
319       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].min_pipeline, &state->alloc);
320 
321       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].max_pipeline,
322                            &state->alloc);
323 
324       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].min_pipeline,
325                            &state->alloc);
326    }
327 
328    radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline, &state->alloc);
329 
330    radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline, &state->alloc);
331 
332    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
333                                                         &state->alloc);
334    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout, &state->alloc);
335 }
336 
337 static VkResult
get_color_resolve_pipeline(struct radv_device * device,struct radv_image_view * src_iview,VkPipeline * pipeline_out)338 get_color_resolve_pipeline(struct radv_device *device, struct radv_image_view *src_iview, VkPipeline *pipeline_out)
339 {
340    struct radv_meta_state *state = &device->meta_state;
341    uint32_t samples = src_iview->image->vk.samples;
342    uint32_t samples_log2 = ffs(samples) - 1;
343    VkResult result = VK_SUCCESS;
344    VkPipeline *pipeline;
345 
346    mtx_lock(&state->mtx);
347 
348    if (vk_format_is_int(src_iview->vk.format))
349       pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
350    else if (vk_format_is_srgb(src_iview->vk.format))
351       pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
352    else
353       pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
354 
355    if (!*pipeline) {
356       result = create_color_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk.format),
357                                              vk_format_is_srgb(src_iview->vk.format), pipeline);
358       if (result != VK_SUCCESS)
359          goto fail;
360    }
361 
362    *pipeline_out = *pipeline;
363 
364 fail:
365    mtx_unlock(&state->mtx);
366    return result;
367 }
368 
369 static void
emit_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dst_iview,const VkOffset2D * src_offset,const VkOffset2D * dst_offset,const VkExtent2D * resolve_extent)370 emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview,
371              const VkOffset2D *src_offset, const VkOffset2D *dst_offset, const VkExtent2D *resolve_extent)
372 {
373    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
374    VkPipeline pipeline;
375    VkResult result;
376 
377    result = get_color_resolve_pipeline(device, src_iview, &pipeline);
378    if (result != VK_SUCCESS) {
379       vk_command_buffer_set_error(&cmd_buffer->vk, result);
380       return;
381    }
382 
383    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
384                                  device->meta_state.resolve_compute.p_layout, 0, 2,
385                                  (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
386                                                            .dstBinding = 0,
387                                                            .dstArrayElement = 0,
388                                                            .descriptorCount = 1,
389                                                            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
390                                                            .pImageInfo =
391                                                               (VkDescriptorImageInfo[]){
392                                                                  {.sampler = VK_NULL_HANDLE,
393                                                                   .imageView = radv_image_view_to_handle(src_iview),
394                                                                   .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
395                                                               }},
396                                                           {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
397                                                            .dstBinding = 1,
398                                                            .dstArrayElement = 0,
399                                                            .descriptorCount = 1,
400                                                            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
401                                                            .pImageInfo = (VkDescriptorImageInfo[]){
402                                                               {
403                                                                  .sampler = VK_NULL_HANDLE,
404                                                                  .imageView = radv_image_view_to_handle(dst_iview),
405                                                                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
406                                                               },
407                                                            }}});
408 
409    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
410 
411    unsigned push_constants[4] = {
412       src_offset->x,
413       src_offset->y,
414       dst_offset->x,
415       dst_offset->y,
416    };
417    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
418                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
419    radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
420 }
421 
422 static VkResult
get_depth_stencil_resolve_pipeline(struct radv_device * device,int samples,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode,VkPipeline * pipeline_out)423 get_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, VkImageAspectFlags aspects,
424                                    VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline_out)
425 
426 {
427    const int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
428    const uint32_t samples_log2 = ffs(samples) - 1;
429    struct radv_meta_state *state = &device->meta_state;
430    VkResult result = VK_SUCCESS;
431    VkPipeline *pipeline;
432 
433    mtx_lock(&state->mtx);
434 
435    switch (resolve_mode) {
436    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
437       if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
438          pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
439       else
440          pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
441       break;
442    case VK_RESOLVE_MODE_AVERAGE_BIT:
443       assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
444       pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
445       break;
446    case VK_RESOLVE_MODE_MIN_BIT:
447       if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
448          pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
449       else
450          pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
451       break;
452    case VK_RESOLVE_MODE_MAX_BIT:
453       if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
454          pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
455       else
456          pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
457       break;
458    default:
459       unreachable("invalid resolve mode");
460    }
461 
462    if (!*pipeline) {
463       result = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
464       if (result != VK_SUCCESS)
465          goto fail;
466    }
467 
468    *pipeline_out = *pipeline;
469 
470 fail:
471    mtx_unlock(&state->mtx);
472    return result;
473 }
474 
475 static void
emit_depth_stencil_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dst_iview,const VkOffset2D * resolve_offset,const VkExtent3D * resolve_extent,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)476 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
477                            struct radv_image_view *dst_iview, const VkOffset2D *resolve_offset,
478                            const VkExtent3D *resolve_extent, VkImageAspectFlags aspects,
479                            VkResolveModeFlagBits resolve_mode)
480 {
481    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
482    const uint32_t samples = src_iview->image->vk.samples;
483    VkPipeline pipeline;
484    VkResult result;
485 
486    result = get_depth_stencil_resolve_pipeline(device, samples, aspects, resolve_mode, &pipeline);
487    if (result != VK_SUCCESS) {
488       vk_command_buffer_set_error(&cmd_buffer->vk, result);
489       return;
490    }
491 
492    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
493                                  device->meta_state.resolve_compute.p_layout, 0, 2,
494                                  (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
495                                                            .dstBinding = 0,
496                                                            .dstArrayElement = 0,
497                                                            .descriptorCount = 1,
498                                                            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
499                                                            .pImageInfo =
500                                                               (VkDescriptorImageInfo[]){
501                                                                  {.sampler = VK_NULL_HANDLE,
502                                                                   .imageView = radv_image_view_to_handle(src_iview),
503                                                                   .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
504                                                               }},
505                                                           {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
506                                                            .dstBinding = 1,
507                                                            .dstArrayElement = 0,
508                                                            .descriptorCount = 1,
509                                                            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
510                                                            .pImageInfo = (VkDescriptorImageInfo[]){
511                                                               {
512                                                                  .sampler = VK_NULL_HANDLE,
513                                                                  .imageView = radv_image_view_to_handle(dst_iview),
514                                                                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
515                                                               },
516                                                            }}});
517 
518    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
519 
520    uint32_t push_constants[2] = {resolve_offset->x, resolve_offset->y};
521 
522    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
523                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants);
524 
525    radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, resolve_extent->depth);
526 }
527 
528 void
radv_meta_resolve_compute_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkFormat src_format,VkImageLayout src_image_layout,struct radv_image * dst_image,VkFormat dst_format,VkImageLayout dst_image_layout,const VkImageResolve2 * region)529 radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkFormat src_format,
530                                 VkImageLayout src_image_layout, struct radv_image *dst_image, VkFormat dst_format,
531                                 VkImageLayout dst_image_layout, const VkImageResolve2 *region)
532 {
533    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
534    struct radv_meta_saved_state saved_state;
535 
536    /* For partial resolves, DCC should be decompressed before resolving
537     * because the metadata is re-initialized to the uncompressed after.
538     */
539    uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
540 
541    if (!radv_image_use_dcc_image_stores(device, dst_image) &&
542        radv_layout_dcc_compressed(device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, queue_mask) &&
543        (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
544         region->extent.width != dst_image->vk.extent.width || region->extent.height != dst_image->vk.extent.height ||
545         region->extent.depth != dst_image->vk.extent.depth)) {
546       radv_decompress_dcc(cmd_buffer, dst_image,
547                           &(VkImageSubresourceRange){
548                              .aspectMask = region->dstSubresource.aspectMask,
549                              .baseMipLevel = region->dstSubresource.mipLevel,
550                              .levelCount = 1,
551                              .baseArrayLayer = region->dstSubresource.baseArrayLayer,
552                              .layerCount = vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource),
553                           });
554    }
555 
556    radv_meta_save(&saved_state, cmd_buffer,
557                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
558 
559    assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
560    assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
561    assert(vk_image_subresource_layer_count(&src_image->vk, &region->srcSubresource) ==
562           vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource));
563 
564    const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, &region->dstSubresource, &region->dstOffset);
565 
566    const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
567    const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
568    const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
569    const unsigned src_layer_count = vk_image_subresource_layer_count(&src_image->vk, &region->srcSubresource);
570 
571    for (uint32_t layer = 0; layer < src_layer_count; ++layer) {
572 
573       struct radv_image_view src_iview;
574       radv_image_view_init(&src_iview, device,
575                            &(VkImageViewCreateInfo){
576                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
577                               .image = radv_image_to_handle(src_image),
578                               .viewType = VK_IMAGE_VIEW_TYPE_2D,
579                               .format = src_format,
580                               .subresourceRange =
581                                  {
582                                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
583                                     .baseMipLevel = 0,
584                                     .levelCount = 1,
585                                     .baseArrayLayer = region->srcSubresource.baseArrayLayer + layer,
586                                     .layerCount = 1,
587                                  },
588                            },
589                            0, NULL);
590 
591       struct radv_image_view dst_iview;
592       radv_image_view_init(&dst_iview, device,
593                            &(VkImageViewCreateInfo){
594                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
595                               .image = radv_image_to_handle(dst_image),
596                               .viewType = radv_meta_get_view_type(dst_image),
597                               .format = vk_format_no_srgb(dst_format),
598                               .subresourceRange =
599                                  {
600                                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
601                                     .baseMipLevel = region->dstSubresource.mipLevel,
602                                     .levelCount = 1,
603                                     .baseArrayLayer = dst_base_layer + layer,
604                                     .layerCount = 1,
605                                  },
606                            },
607                            0, NULL);
608 
609       emit_resolve(cmd_buffer, &src_iview, &dst_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
610                    &(VkOffset2D){dstOffset.x, dstOffset.y}, &(VkExtent2D){extent.width, extent.height});
611 
612       radv_image_view_finish(&src_iview);
613       radv_image_view_finish(&dst_iview);
614    }
615 
616    radv_meta_restore(&saved_state, cmd_buffer);
617 
618    if (!radv_image_use_dcc_image_stores(device, dst_image) &&
619        radv_layout_dcc_compressed(device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, queue_mask)) {
620 
621       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
622 
623       VkImageSubresourceRange range = {
624          .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
625          .baseMipLevel = region->dstSubresource.mipLevel,
626          .levelCount = 1,
627          .baseArrayLayer = dst_base_layer,
628          .layerCount = vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource),
629       };
630 
631       cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
632    }
633 }
634 
635 void
radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,VkImageLayout src_layout,struct radv_image_view * dst_iview,VkImageLayout dst_layout,const VkImageResolve2 * region)636 radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
637                                      VkImageLayout src_layout, struct radv_image_view *dst_iview,
638                                      VkImageLayout dst_layout, const VkImageResolve2 *region)
639 {
640    radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format, src_layout, dst_iview->image,
641                                    dst_iview->vk.format, dst_layout, region);
642 
643    cmd_buffer->state.flush_bits |=
644       RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
645       radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
646 }
647 
648 void
radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)649 radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects,
650                                         VkResolveModeFlagBits resolve_mode)
651 {
652    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
653    const struct radv_rendering_state *render = &cmd_buffer->state.render;
654    VkRect2D resolve_area = render->area;
655    struct radv_meta_saved_state saved_state;
656 
657    uint32_t layer_count = render->layer_count;
658    if (render->view_mask)
659       layer_count = util_last_bit(render->view_mask);
660 
661    /* Resolves happen before the end-of-subpass barriers get executed, so
662     * we have to make the attachment shader-readable.
663     */
664    cmd_buffer->state.flush_bits |=
665       radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
666                             VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
667       radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL);
668 
669    struct radv_image_view *src_iview = render->ds_att.iview;
670    VkImageLayout src_layout =
671       aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? render->ds_att.layout : render->ds_att.stencil_layout;
672    struct radv_image *src_image = src_iview->image;
673 
674    VkImageResolve2 region = {0};
675    region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2;
676    region.srcSubresource.aspectMask = aspects;
677    region.srcSubresource.mipLevel = 0;
678    region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer;
679    region.srcSubresource.layerCount = layer_count;
680 
681    radv_decompress_resolve_src(cmd_buffer, src_image, src_layout, &region);
682 
683    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
684 
685    struct radv_image_view *dst_iview = render->ds_att.resolve_iview;
686    VkImageLayout dst_layout =
687       aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? render->ds_att.resolve_layout : render->ds_att.stencil_resolve_layout;
688    struct radv_image *dst_image = dst_iview->image;
689 
690    struct radv_image_view tsrc_iview;
691    radv_image_view_init(&tsrc_iview, device,
692                         &(VkImageViewCreateInfo){
693                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
694                            .image = radv_image_to_handle(src_image),
695                            .viewType = VK_IMAGE_VIEW_TYPE_2D,
696                            .format = src_iview->vk.format,
697                            .subresourceRange =
698                               {
699                                  .aspectMask = aspects,
700                                  .baseMipLevel = 0,
701                                  .levelCount = 1,
702                                  .baseArrayLayer = src_iview->vk.base_array_layer,
703                                  .layerCount = layer_count,
704                               },
705                         },
706                         0, NULL);
707 
708    struct radv_image_view tdst_iview;
709    radv_image_view_init(&tdst_iview, device,
710                         &(VkImageViewCreateInfo){
711                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
712                            .image = radv_image_to_handle(dst_image),
713                            .viewType = radv_meta_get_view_type(dst_image),
714                            .format = dst_iview->vk.format,
715                            .subresourceRange =
716                               {
717                                  .aspectMask = aspects,
718                                  .baseMipLevel = dst_iview->vk.base_mip_level,
719                                  .levelCount = 1,
720                                  .baseArrayLayer = dst_iview->vk.base_array_layer,
721                                  .layerCount = layer_count,
722                               },
723                         },
724                         0, NULL);
725 
726    emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview, &resolve_area.offset,
727                               &(VkExtent3D){resolve_area.extent.width, resolve_area.extent.height, layer_count},
728                               aspects, resolve_mode);
729 
730    cmd_buffer->state.flush_bits |=
731       RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
732       radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
733 
734    uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
735 
736    if (radv_layout_is_htile_compressed(device, dst_image, dst_layout, queue_mask)) {
737       VkImageSubresourceRange range = {0};
738       range.aspectMask = aspects;
739       range.baseMipLevel = dst_iview->vk.base_mip_level;
740       range.levelCount = 1;
741       range.baseArrayLayer = dst_iview->vk.base_array_layer;
742       range.layerCount = layer_count;
743 
744       uint32_t htile_value = radv_get_htile_initial_value(device, dst_image);
745 
746       cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
747    }
748 
749    radv_image_view_finish(&tsrc_iview);
750    radv_image_view_finish(&tdst_iview);
751 
752    radv_meta_restore(&saved_state, cmd_buffer);
753 }
754