xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/meta/radv_meta_resolve.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <assert.h>
8 #include <stdbool.h>
9 
10 #include "nir/nir_builder.h"
11 #include "radv_entrypoints.h"
12 #include "radv_meta.h"
13 #include "sid.h"
14 #include "vk_format.h"
15 
16 static nir_shader *
build_nir_fs(struct radv_device * dev)17 build_nir_fs(struct radv_device *dev)
18 {
19    const struct glsl_type *vec4 = glsl_vec4_type();
20    nir_variable *f_color;
21 
22    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs");
23 
24    f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
25    f_color->data.location = FRAG_RESULT_DATA0;
26    nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);
27 
28    return b.shader;
29 }
30 
31 static VkResult
create_pipeline(struct radv_device * device,VkFormat format,VkPipeline * pipeline)32 create_pipeline(struct radv_device *device, VkFormat format, VkPipeline *pipeline)
33 {
34    VkResult result;
35    VkDevice device_h = radv_device_to_handle(device);
36 
37    if (!device->meta_state.resolve.p_layout) {
38       result = radv_meta_create_pipeline_layout(device, NULL, 0, NULL, &device->meta_state.resolve.p_layout);
39       if (result != VK_SUCCESS)
40          return result;
41    }
42 
43    nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
44    nir_shader *fs_module = build_nir_fs(device);
45 
46    VkFormat color_formats[2] = {format, format};
47    const VkPipelineRenderingCreateInfo rendering_create_info = {
48       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
49       .colorAttachmentCount = 2,
50       .pColorAttachmentFormats = color_formats,
51    };
52 
53    result = radv_graphics_pipeline_create(
54       device_h, device->meta_state.cache,
55       &(VkGraphicsPipelineCreateInfo){
56          .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
57          .pNext = &rendering_create_info,
58          .stageCount = 2,
59          .pStages =
60             (VkPipelineShaderStageCreateInfo[]){
61                {
62                   .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
63                   .stage = VK_SHADER_STAGE_VERTEX_BIT,
64                   .module = vk_shader_module_handle_from_nir(vs_module),
65                   .pName = "main",
66                },
67                {
68                   .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
69                   .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
70                   .module = vk_shader_module_handle_from_nir(fs_module),
71                   .pName = "main",
72                },
73             },
74          .pVertexInputState =
75             &(VkPipelineVertexInputStateCreateInfo){
76                .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
77                .vertexBindingDescriptionCount = 0,
78                .vertexAttributeDescriptionCount = 0,
79             },
80          .pInputAssemblyState =
81             &(VkPipelineInputAssemblyStateCreateInfo){
82                .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
83                .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
84                .primitiveRestartEnable = false,
85             },
86          .pViewportState =
87             &(VkPipelineViewportStateCreateInfo){
88                .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
89                .viewportCount = 1,
90                .scissorCount = 1,
91             },
92          .pRasterizationState =
93             &(VkPipelineRasterizationStateCreateInfo){
94                .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
95                .depthClampEnable = false,
96                .rasterizerDiscardEnable = false,
97                .polygonMode = VK_POLYGON_MODE_FILL,
98                .cullMode = VK_CULL_MODE_NONE,
99                .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
100             },
101          .pMultisampleState =
102             &(VkPipelineMultisampleStateCreateInfo){
103                .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
104                .rasterizationSamples = 1,
105                .sampleShadingEnable = false,
106                .pSampleMask = NULL,
107                .alphaToCoverageEnable = false,
108                .alphaToOneEnable = false,
109             },
110          .pColorBlendState =
111             &(VkPipelineColorBlendStateCreateInfo){
112                .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
113                .logicOpEnable = false,
114                .attachmentCount = 2,
115                .pAttachments =
116                   (VkPipelineColorBlendAttachmentState[]){
117                      {
118                         .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
119                                           VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
120                      },
121                      {
122                         .colorWriteMask = 0,
123 
124                      }},
125             },
126          .pDynamicState =
127             &(VkPipelineDynamicStateCreateInfo){
128                .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
129                .dynamicStateCount = 2,
130                .pDynamicStates =
131                   (VkDynamicState[]){
132                      VK_DYNAMIC_STATE_VIEWPORT,
133                      VK_DYNAMIC_STATE_SCISSOR,
134                   },
135             },
136          .layout = device->meta_state.resolve.p_layout,
137          .renderPass = VK_NULL_HANDLE,
138          .subpass = 0,
139       },
140       &(struct radv_graphics_pipeline_create_info){
141          .use_rectlist = true,
142          .custom_blend_mode = V_028808_CB_RESOLVE,
143       },
144       &device->meta_state.alloc, pipeline);
145 
146    ralloc_free(vs_module);
147    ralloc_free(fs_module);
148    return result;
149 }
150 
151 static VkResult
get_pipeline(struct radv_device * device,unsigned fs_key,VkPipeline * pipeline_out)152 get_pipeline(struct radv_device *device, unsigned fs_key, VkPipeline *pipeline_out)
153 {
154    struct radv_meta_state *state = &device->meta_state;
155    VkResult result = VK_SUCCESS;
156 
157    mtx_lock(&state->mtx);
158    if (!state->resolve.pipeline[fs_key]) {
159       result = create_pipeline(device, radv_fs_key_format_exemplars[fs_key], &state->resolve.pipeline[fs_key]);
160       if (result != VK_SUCCESS)
161          goto fail;
162    }
163 
164    *pipeline_out = state->resolve.pipeline[fs_key];
165 
166 fail:
167    mtx_unlock(&state->mtx);
168    return result;
169 }
170 
171 void
radv_device_finish_meta_resolve_state(struct radv_device * device)172 radv_device_finish_meta_resolve_state(struct radv_device *device)
173 {
174    struct radv_meta_state *state = &device->meta_state;
175 
176    for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {
177       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve.pipeline[j], &state->alloc);
178    }
179    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve.p_layout, &state->alloc);
180 }
181 
182 VkResult
radv_device_init_meta_resolve_state(struct radv_device * device,bool on_demand)183 radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand)
184 {
185    if (on_demand)
186       return VK_SUCCESS;
187 
188    VkResult res = VK_SUCCESS;
189    struct radv_meta_state *state = &device->meta_state;
190 
191    for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
192       VkFormat format = radv_fs_key_format_exemplars[i];
193       unsigned fs_key = radv_format_meta_fs_key(device, format);
194 
195       res = create_pipeline(device, format, &state->resolve.pipeline[fs_key]);
196       if (res != VK_SUCCESS)
197           return res;
198    }
199 
200    return res;
201 }
202 
203 static void
emit_resolve(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image,VkFormat vk_format)204 emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image, const struct radv_image *dst_image,
205              VkFormat vk_format)
206 {
207    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
208    VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
209    unsigned fs_key = radv_format_meta_fs_key(device, vk_format);
210    VkPipeline pipeline;
211    VkResult result;
212 
213    result = get_pipeline(device, fs_key, &pipeline);
214    if (result != VK_SUCCESS) {
215       vk_command_buffer_set_error(&cmd_buffer->vk, result);
216       return;
217    }
218 
219    cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
220                                                          VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, src_image) |
221                                    radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
222                                                          VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, src_image);
223 
224    radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
225 
226    radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
227    cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
228                                                          VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
229 }
230 
231 enum radv_resolve_method {
232    RESOLVE_HW,
233    RESOLVE_COMPUTE,
234    RESOLVE_FRAGMENT,
235 };
236 
237 static bool
image_hw_resolve_compat(const struct radv_device * device,struct radv_image * src_image,struct radv_image * dst_image)238 image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image, struct radv_image *dst_image)
239 {
240    const struct radv_physical_device *pdev = radv_device_physical(device);
241    if (pdev->info.gfx_level >= GFX9) {
242       return dst_image->planes[0].surface.u.gfx9.swizzle_mode == src_image->planes[0].surface.u.gfx9.swizzle_mode;
243    } else {
244       return dst_image->planes[0].surface.micro_tile_mode == src_image->planes[0].surface.micro_tile_mode;
245    }
246 }
247 
248 static void
radv_pick_resolve_method_images(struct radv_device * device,struct radv_image * src_image,VkFormat src_format,struct radv_image * dst_image,unsigned dst_level,VkImageLayout dst_image_layout,struct radv_cmd_buffer * cmd_buffer,enum radv_resolve_method * method)249 radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *src_image, VkFormat src_format,
250                                 struct radv_image *dst_image, unsigned dst_level, VkImageLayout dst_image_layout,
251                                 struct radv_cmd_buffer *cmd_buffer, enum radv_resolve_method *method)
252 
253 {
254    uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
255 
256    if (vk_format_is_color(src_format)) {
257       /* Using the fragment resolve path is currently a hint to
258        * avoid decompressing DCC for partial resolves and
259        * re-initialize it after resolving using compute.
260        * TODO: Add support for layered and int to the fragment path.
261        */
262       if (radv_layout_dcc_compressed(device, dst_image, dst_level, dst_image_layout, queue_mask)) {
263          *method = RESOLVE_FRAGMENT;
264       } else if (!image_hw_resolve_compat(device, src_image, dst_image)) {
265          /* The micro tile mode only needs to match for the HW
266           * resolve path which is the default path for non-DCC
267           * resolves.
268           */
269          *method = RESOLVE_COMPUTE;
270       }
271 
272       if (src_format == VK_FORMAT_R16G16_UNORM || src_format == VK_FORMAT_R16G16_SNORM)
273          *method = RESOLVE_COMPUTE;
274       else if (vk_format_is_int(src_format))
275          *method = RESOLVE_COMPUTE;
276       else if (src_image->vk.array_layers > 1 || dst_image->vk.array_layers > 1)
277          *method = RESOLVE_COMPUTE;
278    } else {
279       assert(dst_image_layout == VK_IMAGE_LAYOUT_UNDEFINED);
280       if (src_image->vk.array_layers > 1 || dst_image->vk.array_layers > 1 ||
281           (dst_image->planes[0].surface.flags & RADEON_SURF_NO_RENDER_TARGET))
282          *method = RESOLVE_COMPUTE;
283       else
284          *method = RESOLVE_FRAGMENT;
285    }
286 }
287 
288 static void
radv_meta_resolve_hardware_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkImageLayout src_image_layout,struct radv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region)289 radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
290                                  VkImageLayout src_image_layout, struct radv_image *dst_image,
291                                  VkImageLayout dst_image_layout, const VkImageResolve2 *region)
292 {
293    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
294    struct radv_meta_saved_state saved_state;
295 
296    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE);
297 
298    assert(src_image->vk.samples > 1);
299    assert(dst_image->vk.samples == 1);
300 
301    /* From the Vulkan 1.0 spec:
302     *
303     *    - The aspectMask member of srcSubresource and dstSubresource must
304     *      only contain VK_IMAGE_ASPECT_COLOR_BIT
305     */
306    assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
307    assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
308    /* Multi-layer resolves are handled by compute */
309    assert(vk_image_subresource_layer_count(&src_image->vk, &region->srcSubresource) == 1 &&
310           vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource) == 1);
311    /**
312     * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
313     *
314     *    extent is the size in texels of the source image to resolve in width,
315     *    height and depth. 1D images use only x and width. 2D images use x, y,
316     *    width and height. 3D images use x, y, z, width, height and depth.
317     *
318     *    srcOffset and dstOffset select the initial x, y, and z offsets in
319     *    texels of the sub-regions of the source and destination image data.
320     *    extent is the size in texels of the source image to resolve in width,
321     *    height and depth. 1D images use only x and width. 2D images use x, y,
322     *    width and height. 3D images use x, y, z, width, height and depth.
323     */
324    const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
325    const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
326 
327    uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
328 
329    if (radv_layout_dcc_compressed(device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, queue_mask)) {
330       VkImageSubresourceRange range = {
331          .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
332          .baseMipLevel = region->dstSubresource.mipLevel,
333          .levelCount = 1,
334          .baseArrayLayer = 0,
335          .layerCount = 1,
336       };
337 
338       cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
339    }
340 
341    VkRect2D resolve_area = {
342       .offset = {dstOffset.x, dstOffset.y},
343       .extent = {extent.width, extent.height},
344    };
345 
346    radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
347                        &(VkViewport){.x = resolve_area.offset.x,
348                                      .y = resolve_area.offset.y,
349                                      .width = resolve_area.extent.width,
350                                      .height = resolve_area.extent.height,
351                                      .minDepth = 0.0f,
352                                      .maxDepth = 1.0f});
353 
354    radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &resolve_area);
355 
356    struct radv_image_view src_iview;
357    radv_image_view_init(&src_iview, device,
358                         &(VkImageViewCreateInfo){
359                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
360                            .image = radv_image_to_handle(src_image),
361                            .viewType = VK_IMAGE_VIEW_TYPE_2D,
362                            .format = src_image->vk.format,
363                            .subresourceRange =
364                               {
365                                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
366                                  .baseMipLevel = 0,
367                                  .levelCount = 1,
368                                  .baseArrayLayer = 0,
369                                  .layerCount = 1,
370                               },
371                         },
372                         0, NULL);
373 
374    struct radv_image_view dst_iview;
375    radv_image_view_init(&dst_iview, device,
376                         &(VkImageViewCreateInfo){
377                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
378                            .image = radv_image_to_handle(dst_image),
379                            .viewType = radv_meta_get_view_type(dst_image),
380                            .format = dst_image->vk.format,
381                            .subresourceRange =
382                               {
383                                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
384                                  .baseMipLevel = region->dstSubresource.mipLevel,
385                                  .levelCount = 1,
386                                  .baseArrayLayer = 0,
387                                  .layerCount = 1,
388                               },
389                         },
390                         0, NULL);
391 
392    const VkRenderingAttachmentInfo color_atts[2] = {
393       {
394          .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
395          .imageView = radv_image_view_to_handle(&src_iview),
396          .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
397          .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
398          .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
399       },
400       {
401          .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
402          .imageView = radv_image_view_to_handle(&dst_iview),
403          .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
404          .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
405          .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
406       },
407    };
408 
409    const VkRenderingInfo rendering_info = {
410       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
411       .flags = VK_RENDERING_INPUT_ATTACHMENT_NO_CONCURRENT_WRITES_BIT_MESA,
412       .renderArea = resolve_area,
413       .layerCount = 1,
414       .colorAttachmentCount = 2,
415       .pColorAttachments = color_atts,
416    };
417 
418    radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
419 
420    emit_resolve(cmd_buffer, src_image, dst_image, dst_iview.vk.format);
421 
422    radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
423 
424    radv_image_view_finish(&src_iview);
425    radv_image_view_finish(&dst_iview);
426 
427    radv_meta_restore(&saved_state, cmd_buffer);
428 }
429 
430 static void
resolve_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkImageLayout src_image_layout,struct radv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region,enum radv_resolve_method resolve_method)431 resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout,
432               struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageResolve2 *region,
433               enum radv_resolve_method resolve_method)
434 {
435    switch (resolve_method) {
436    case RESOLVE_HW:
437       radv_meta_resolve_hardware_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region);
438       break;
439    case RESOLVE_FRAGMENT:
440       radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
441 
442       radv_meta_resolve_fragment_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region);
443       break;
444    case RESOLVE_COMPUTE:
445       radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
446 
447       radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image->vk.format, src_image_layout, dst_image,
448                                       dst_image->vk.format, dst_image_layout, region);
449       break;
450    default:
451       assert(!"Invalid resolve method selected");
452    }
453 }
454 
455 VKAPI_ATTR void VKAPI_CALL
radv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)456 radv_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo)
457 {
458    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
459    VK_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
460    VK_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);
461    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
462    const struct radv_physical_device *pdev = radv_device_physical(device);
463    VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
464    VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
465    enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
466 
467    /* we can use the hw resolve only for single full resolves */
468    if (pResolveImageInfo->regionCount == 1) {
469       if (pResolveImageInfo->pRegions[0].srcOffset.x || pResolveImageInfo->pRegions[0].srcOffset.y ||
470           pResolveImageInfo->pRegions[0].srcOffset.z)
471          resolve_method = RESOLVE_COMPUTE;
472       if (pResolveImageInfo->pRegions[0].dstOffset.x || pResolveImageInfo->pRegions[0].dstOffset.y ||
473           pResolveImageInfo->pRegions[0].dstOffset.z)
474          resolve_method = RESOLVE_COMPUTE;
475 
476       if (pResolveImageInfo->pRegions[0].extent.width != src_image->vk.extent.width ||
477           pResolveImageInfo->pRegions[0].extent.height != src_image->vk.extent.height ||
478           pResolveImageInfo->pRegions[0].extent.depth != src_image->vk.extent.depth)
479          resolve_method = RESOLVE_COMPUTE;
480    } else
481       resolve_method = RESOLVE_COMPUTE;
482 
483    for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
484       const VkImageResolve2 *region = &pResolveImageInfo->pRegions[r];
485 
486       radv_pick_resolve_method_images(device, src_image, src_image->vk.format, dst_image,
487                                       region->dstSubresource.mipLevel, dst_image_layout, cmd_buffer, &resolve_method);
488 
489       resolve_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region, resolve_method);
490    }
491 }
492 
493 static void
radv_cmd_buffer_resolve_rendering_hw(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,VkImageLayout src_layout,struct radv_image_view * dst_iview,VkImageLayout dst_layout)494 radv_cmd_buffer_resolve_rendering_hw(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
495                                      VkImageLayout src_layout, struct radv_image_view *dst_iview,
496                                      VkImageLayout dst_layout)
497 {
498    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
499    struct radv_meta_saved_state saved_state;
500 
501    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER);
502 
503    VkRect2D *resolve_area = &saved_state.render.area;
504 
505    radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
506                        &(VkViewport){.x = resolve_area->offset.x,
507                                      .y = resolve_area->offset.y,
508                                      .width = resolve_area->extent.width,
509                                      .height = resolve_area->extent.height,
510                                      .minDepth = 0.0f,
511                                      .maxDepth = 1.0f});
512 
513    radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, resolve_area);
514 
515    struct radv_image *src_img = src_iview->image;
516    struct radv_image *dst_img = dst_iview->image;
517    uint32_t queue_mask = radv_image_queue_family_mask(dst_img, cmd_buffer->qf, cmd_buffer->qf);
518 
519    if (radv_layout_dcc_compressed(device, dst_img, dst_iview->vk.base_mip_level, dst_layout, queue_mask)) {
520       VkImageSubresourceRange range = {
521          .aspectMask = dst_iview->vk.aspects,
522          .baseMipLevel = dst_iview->vk.base_mip_level,
523          .levelCount = 1,
524          .baseArrayLayer = 0,
525          .layerCount = 1,
526       };
527 
528       cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_img, &range, 0xffffffff);
529    }
530 
531    const VkRenderingAttachmentInfo color_atts[2] = {
532       {
533          .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
534          .imageView = radv_image_view_to_handle(src_iview),
535          .imageLayout = src_layout,
536          .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
537          .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
538       },
539       {
540          .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
541          .imageView = radv_image_view_to_handle(dst_iview),
542          .imageLayout = dst_layout,
543          .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
544          .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
545       },
546    };
547 
548    const VkRenderingInfo rendering_info = {
549       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
550       .flags = VK_RENDERING_INPUT_ATTACHMENT_NO_CONCURRENT_WRITES_BIT_MESA,
551       .renderArea = saved_state.render.area,
552       .layerCount = 1,
553       .viewMask = saved_state.render.view_mask,
554       .colorAttachmentCount = 2,
555       .pColorAttachments = color_atts,
556    };
557 
558    radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
559 
560    emit_resolve(cmd_buffer, src_img, dst_img, dst_iview->vk.format);
561 
562    radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
563 
564    radv_meta_restore(&saved_state, cmd_buffer);
565 }
566 
567 /**
568  * Emit any needed resolves for the current subpass.
569  */
570 void
radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer * cmd_buffer)571 radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer)
572 {
573    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
574    const struct radv_physical_device *pdev = radv_device_physical(device);
575    const struct radv_rendering_state *render = &cmd_buffer->state.render;
576    enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
577 
578    bool has_color_resolve = false;
579    for (uint32_t i = 0; i < render->color_att_count; ++i) {
580       if (render->color_att[i].resolve_iview != NULL)
581          has_color_resolve = true;
582    }
583    bool has_ds_resolve = render->ds_att.resolve_iview != NULL;
584 
585    if (!has_color_resolve && !has_ds_resolve)
586       return;
587 
588    radv_describe_begin_render_pass_resolve(cmd_buffer);
589 
590    if (render->ds_att.resolve_iview != NULL) {
591       struct radv_image_view *src_iview = render->ds_att.iview;
592       struct radv_image_view *dst_iview = render->ds_att.resolve_iview;
593 
594       radv_pick_resolve_method_images(device, src_iview->image, src_iview->vk.format, dst_iview->image,
595                                       dst_iview->vk.base_mip_level, VK_IMAGE_LAYOUT_UNDEFINED, cmd_buffer,
596                                       &resolve_method);
597 
598       if ((src_iview->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && render->ds_att.resolve_mode != VK_RESOLVE_MODE_NONE) {
599          if (resolve_method == RESOLVE_FRAGMENT) {
600             radv_depth_stencil_resolve_rendering_fs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT, render->ds_att.resolve_mode);
601          } else {
602             assert(resolve_method == RESOLVE_COMPUTE);
603             radv_depth_stencil_resolve_rendering_cs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT, render->ds_att.resolve_mode);
604          }
605       }
606 
607       if ((src_iview->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
608           render->ds_att.stencil_resolve_mode != VK_RESOLVE_MODE_NONE) {
609          if (resolve_method == RESOLVE_FRAGMENT) {
610             radv_depth_stencil_resolve_rendering_fs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,
611                                                     render->ds_att.stencil_resolve_mode);
612          } else {
613             assert(resolve_method == RESOLVE_COMPUTE);
614             radv_depth_stencil_resolve_rendering_cs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,
615                                                     render->ds_att.stencil_resolve_mode);
616          }
617       }
618 
619       /* From the Vulkan spec 1.2.165:
620        *
621        * "VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT specifies
622        *  write access to a color, resolve, or depth/stencil
623        *  resolve attachment during a render pass or via
624        *  certain subpass load and store operations."
625        *
626        * Yes, it's counterintuitive but it makes sense because ds
627        * resolve operations happen late at the end of the subpass.
628        *
629        * That said, RADV is wrong because it executes the subpass
630        * end barrier *before* any subpass resolves instead of after.
631        *
632        * TODO: Fix this properly by executing subpass end barriers
633        * after subpass resolves.
634        */
635       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
636       if (radv_image_has_htile(dst_iview->image))
637          cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
638    }
639 
640    if (has_color_resolve) {
641       uint32_t layer_count = render->layer_count;
642       VkRect2D resolve_area = render->area;
643       struct radv_resolve_barrier barrier;
644 
645       if (render->view_mask)
646          layer_count = util_last_bit(render->view_mask);
647 
648       /* Resolves happen before the end-of-subpass barriers get executed, so we have to make the
649        * attachment shader-readable.
650        */
651       barrier.src_stage_mask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
652       barrier.dst_stage_mask = VK_PIPELINE_STAGE_2_RESOLVE_BIT;
653       barrier.src_access_mask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
654       barrier.dst_access_mask = VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT;
655       radv_emit_resolve_barrier(cmd_buffer, &barrier);
656 
657       for (uint32_t i = 0; i < render->color_att_count; ++i) {
658          if (render->color_att[i].resolve_iview == NULL)
659             continue;
660 
661          struct radv_image_view *src_iview = render->color_att[i].iview;
662          VkImageLayout src_layout = render->color_att[i].layout;
663          struct radv_image *src_img = src_iview->image;
664          struct radv_image_view *dst_iview = render->color_att[i].resolve_iview;
665          VkImageLayout dst_layout = render->color_att[i].resolve_layout;
666          struct radv_image *dst_img = dst_iview->image;
667 
668          radv_pick_resolve_method_images(device, src_img, src_iview->vk.format, dst_img, dst_iview->vk.base_mip_level,
669                                          dst_layout, cmd_buffer, &resolve_method);
670          VkImageResolve2 region = {
671             .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2,
672             .extent =
673                {
674                   .width = resolve_area.extent.width,
675                   .height = resolve_area.extent.height,
676                   .depth = 1,
677                },
678             .srcSubresource =
679                (VkImageSubresourceLayers){
680                   .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
681                   .mipLevel = src_iview->vk.base_mip_level,
682                   .baseArrayLayer = src_iview->vk.base_array_layer,
683                   .layerCount = layer_count,
684                },
685             .dstSubresource =
686                (VkImageSubresourceLayers){
687                   .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
688                   .mipLevel = dst_iview->vk.base_mip_level,
689                   .baseArrayLayer = dst_iview->vk.base_array_layer,
690                   .layerCount = layer_count,
691                },
692             .srcOffset = {resolve_area.offset.x, resolve_area.offset.y, 0},
693             .dstOffset = {resolve_area.offset.x, resolve_area.offset.y, 0},
694          };
695 
696          switch (resolve_method) {
697          case RESOLVE_HW:
698             radv_cmd_buffer_resolve_rendering_hw(cmd_buffer, src_iview, src_layout, dst_iview, dst_layout);
699             break;
700          case RESOLVE_COMPUTE:
701             radv_decompress_resolve_src(cmd_buffer, src_iview->image, src_layout, &region);
702 
703             radv_cmd_buffer_resolve_rendering_cs(cmd_buffer, src_iview, src_layout, dst_iview, dst_layout, &region);
704             break;
705          case RESOLVE_FRAGMENT:
706             radv_decompress_resolve_src(cmd_buffer, src_iview->image, src_layout, &region);
707 
708             radv_cmd_buffer_resolve_rendering_fs(cmd_buffer, src_iview, src_layout, dst_iview, dst_layout);
709             break;
710          default:
711             unreachable("Invalid resolve method");
712          }
713       }
714    }
715 
716    radv_describe_end_render_pass_resolve(cmd_buffer);
717 }
718 
719 /**
720  * Decompress CMask/FMask before resolving a multisampled source image inside a
721  * subpass.
722  */
723 void
radv_decompress_resolve_rendering_src(struct radv_cmd_buffer * cmd_buffer)724 radv_decompress_resolve_rendering_src(struct radv_cmd_buffer *cmd_buffer)
725 {
726    const struct radv_rendering_state *render = &cmd_buffer->state.render;
727 
728    uint32_t layer_count = render->layer_count;
729    if (render->view_mask)
730       layer_count = util_last_bit(render->view_mask);
731 
732    for (uint32_t i = 0; i < render->color_att_count; ++i) {
733       if (render->color_att[i].resolve_iview == NULL)
734          continue;
735 
736       struct radv_image_view *src_iview = render->color_att[i].iview;
737       VkImageLayout src_layout = render->color_att[i].layout;
738       struct radv_image *src_image = src_iview->image;
739 
740       VkImageResolve2 region = {0};
741       region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2;
742       region.srcSubresource.aspectMask = src_iview->vk.aspects;
743       region.srcSubresource.mipLevel = 0;
744       region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer;
745       region.srcSubresource.layerCount = layer_count;
746 
747       radv_decompress_resolve_src(cmd_buffer, src_image, src_layout, &region);
748    }
749 }
750 
751 /**
752  * Decompress CMask/FMask before resolving a multisampled source image.
753  */
754 void
radv_decompress_resolve_src(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkImageLayout src_image_layout,const VkImageResolve2 * region)755 radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
756                             VkImageLayout src_image_layout, const VkImageResolve2 *region)
757 {
758    VkImageMemoryBarrier2 barrier = {
759       .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
760       .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
761       .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
762       .dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
763       .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
764       .oldLayout = src_image_layout,
765       .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
766       .image = radv_image_to_handle(src_image),
767       .subresourceRange = (VkImageSubresourceRange){
768          .aspectMask = region->srcSubresource.aspectMask,
769          .baseMipLevel = 0,
770          .levelCount = 1,
771          .baseArrayLayer = region->srcSubresource.baseArrayLayer,
772          .layerCount = vk_image_subresource_layer_count(&src_image->vk, &region->srcSubresource),
773       }};
774 
775    VkSampleLocationsInfoEXT sample_loc_info;
776    if (src_image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) {
777       /* If the depth/stencil image uses different sample
778        * locations, we need them during HTILE decompressions.
779        */
780       struct radv_sample_locations_state *sample_locs = &cmd_buffer->state.render.sample_locations;
781 
782       sample_loc_info = (VkSampleLocationsInfoEXT){
783          .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
784          .sampleLocationsPerPixel = sample_locs->per_pixel,
785          .sampleLocationGridSize = sample_locs->grid_size,
786          .sampleLocationsCount = sample_locs->count,
787          .pSampleLocations = sample_locs->locations,
788       };
789       barrier.pNext = &sample_loc_info;
790    }
791 
792    VkDependencyInfo dep_info = {
793       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
794       .imageMemoryBarrierCount = 1,
795       .pImageMemoryBarriers = &barrier,
796    };
797 
798    radv_CmdPipelineBarrier2(radv_cmd_buffer_to_handle(cmd_buffer), &dep_info);
799 }
800