xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/meta/radv_meta_fmask_copy.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 #include "nir/nir_builder.h"
7 #include "radv_formats.h"
8 #include "radv_meta.h"
9 
10 static nir_shader *
build_fmask_copy_compute_shader(struct radv_device * dev,int samples)11 build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
12 {
13    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
14    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT);
15 
16    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples);
17 
18    b.shader->info.workgroup_size[0] = 8;
19    b.shader->info.workgroup_size[1] = 8;
20 
21    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
22    input_img->data.descriptor_set = 0;
23    input_img->data.binding = 0;
24 
25    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
26    output_img->data.descriptor_set = 0;
27    output_img->data.binding = 1;
28 
29    nir_def *invoc_id = nir_load_local_invocation_id(&b);
30    nir_def *wg_id = nir_load_workgroup_id(&b);
31    nir_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
32                                        b.shader->info.workgroup_size[2]);
33 
34    nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
35 
36    /* Get coordinates. */
37    nir_def *src_coord = nir_trim_vector(&b, global_id, 2);
38    nir_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), nir_undef(&b, 1, 32),
39                                  nir_undef(&b, 1, 32));
40 
41    nir_tex_src frag_mask_srcs[] = {{
42       .src_type = nir_tex_src_coord,
43       .src = nir_src_for_ssa(src_coord),
44    }};
45    nir_def *frag_mask =
46       nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
47                                 ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs);
48 
49    /* Get the maximum sample used in this fragment. */
50    nir_def *max_sample_index = nir_imm_int(&b, 0);
51    for (uint32_t s = 0; s < samples; s++) {
52       /* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
53       max_sample_index = nir_umax(&b, max_sample_index,
54                                   nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), nir_imm_int(&b, 4)));
55    }
56 
57    nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
58    nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
59 
60    nir_loop *loop = nir_push_loop(&b);
61    {
62       nir_def *sample_id = nir_load_var(&b, counter);
63 
64       nir_tex_src frag_fetch_srcs[] = {{
65                                           .src_type = nir_tex_src_coord,
66                                           .src = nir_src_for_ssa(src_coord),
67                                        },
68                                        {
69                                           .src_type = nir_tex_src_ms_index,
70                                           .src = nir_src_for_ssa(sample_id),
71                                        }};
72       nir_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img),
73                                                   NULL, ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
74 
75       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, sample_id, outval,
76                             nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS);
77 
78       radv_break_on_count(&b, counter, max_sample_index);
79    }
80    nir_pop_loop(&b, loop);
81 
82    return b.shader;
83 }
84 
85 static VkResult
create_pipeline(struct radv_device * device,int samples,VkPipeline * pipeline)86 create_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
87 {
88    struct radv_meta_state *state = &device->meta_state;
89    VkResult result;
90 
91    if (!state->fmask_copy.ds_layout) {
92       const VkDescriptorSetLayoutBinding bindings[] = {
93          {
94             .binding = 0,
95             .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
96             .descriptorCount = 1,
97             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
98          },
99          {
100             .binding = 1,
101             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
102             .descriptorCount = 1,
103             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
104          },
105       };
106 
107       result = radv_meta_create_descriptor_set_layout(device, 2, bindings, &state->fmask_copy.ds_layout);
108       if (result != VK_SUCCESS)
109          return result;
110    }
111 
112    if (!state->fmask_copy.p_layout) {
113       result =
114          radv_meta_create_pipeline_layout(device, &state->fmask_copy.ds_layout, 0, NULL, &state->fmask_copy.p_layout);
115       if (result != VK_SUCCESS)
116          return result;
117    }
118 
119    nir_shader *cs = build_fmask_copy_compute_shader(device, samples);
120 
121    result = radv_meta_create_compute_pipeline(device, cs, state->fmask_copy.p_layout, pipeline);
122 
123    ralloc_free(cs);
124    return result;
125 }
126 
127 static VkResult
get_pipeline(struct radv_device * device,uint32_t samples_log2,VkPipeline * pipeline_out)128 get_pipeline(struct radv_device *device, uint32_t samples_log2, VkPipeline *pipeline_out)
129 {
130    struct radv_meta_state *state = &device->meta_state;
131    VkResult result = VK_SUCCESS;
132 
133    mtx_lock(&state->mtx);
134    if (!state->fmask_copy.pipeline[samples_log2]) {
135       result = create_pipeline(device, 1 << samples_log2, &state->fmask_copy.pipeline[samples_log2]);
136       if (result != VK_SUCCESS)
137          goto fail;
138    }
139 
140    *pipeline_out = state->fmask_copy.pipeline[samples_log2];
141 
142 fail:
143    mtx_unlock(&state->mtx);
144    return result;
145 }
146 
147 void
radv_device_finish_meta_fmask_copy_state(struct radv_device * device)148 radv_device_finish_meta_fmask_copy_state(struct radv_device *device)
149 {
150    struct radv_meta_state *state = &device->meta_state;
151 
152    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_copy.p_layout, &state->alloc);
153    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_copy.ds_layout,
154                                                         &state->alloc);
155 
156    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
157       radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_copy.pipeline[i], &state->alloc);
158    }
159 }
160 
161 VkResult
radv_device_init_meta_fmask_copy_state(struct radv_device * device,bool on_demand)162 radv_device_init_meta_fmask_copy_state(struct radv_device *device, bool on_demand)
163 {
164    VkResult result;
165 
166    if (on_demand)
167       return VK_SUCCESS;
168 
169    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
170       result = create_pipeline(device, 1u << i, &device->meta_state.fmask_copy.pipeline[i]);
171       if (result != VK_SUCCESS)
172          return result;
173    }
174 
175    return VK_SUCCESS;
176 }
177 
178 static void
radv_fixup_copy_dst_metadata(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image)179 radv_fixup_copy_dst_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
180                              const struct radv_image *dst_image)
181 {
182    uint64_t src_offset, dst_offset, size;
183 
184    assert(src_image->planes[0].surface.cmask_size == dst_image->planes[0].surface.cmask_size &&
185           src_image->planes[0].surface.fmask_size == dst_image->planes[0].surface.fmask_size);
186    assert(src_image->planes[0].surface.fmask_offset + src_image->planes[0].surface.fmask_size ==
187              src_image->planes[0].surface.cmask_offset &&
188           dst_image->planes[0].surface.fmask_offset + dst_image->planes[0].surface.fmask_size ==
189              dst_image->planes[0].surface.cmask_offset);
190 
191    /* Copy CMASK+FMASK. */
192    size = src_image->planes[0].surface.cmask_size + src_image->planes[0].surface.fmask_size;
193    src_offset = src_image->bindings[0].offset + src_image->planes[0].surface.fmask_offset;
194    dst_offset = dst_image->bindings[0].offset + dst_image->planes[0].surface.fmask_offset;
195 
196    radv_copy_buffer(cmd_buffer, src_image->bindings[0].bo, dst_image->bindings[0].bo, src_offset, dst_offset, size);
197 }
198 
199 bool
radv_can_use_fmask_copy(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image,const struct radv_meta_blit2d_rect * rect)200 radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
201                         const struct radv_image *dst_image, const struct radv_meta_blit2d_rect *rect)
202 {
203    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
204    const struct radv_physical_device *pdev = radv_device_physical(device);
205 
206    /* TODO: Test on pre GFX10 chips. */
207    if (pdev->info.gfx_level < GFX10)
208       return false;
209 
210    /* TODO: Add support for layers. */
211    if (src_image->vk.array_layers != 1 || dst_image->vk.array_layers != 1)
212       return false;
213 
214    /* Source/destination images must have FMASK. */
215    if (!radv_image_has_fmask(src_image) || !radv_image_has_fmask(dst_image))
216       return false;
217 
218    /* Source/destination images must have identical TC-compat mode. */
219    if (radv_image_is_tc_compat_cmask(src_image) != radv_image_is_tc_compat_cmask(dst_image))
220       return false;
221 
222    /* The region must be a whole image copy. */
223    if (rect->src_x || rect->src_y || rect->dst_x || rect->dst_y || rect->width != src_image->vk.extent.width ||
224        rect->height != src_image->vk.extent.height)
225       return false;
226 
227    /* Source/destination images must have identical size. */
228    if (src_image->vk.extent.width != dst_image->vk.extent.width ||
229        src_image->vk.extent.height != dst_image->vk.extent.height)
230       return false;
231 
232    /* Source/destination images must have identical swizzle. */
233    if (src_image->planes[0].surface.fmask_tile_swizzle != dst_image->planes[0].surface.fmask_tile_swizzle ||
234        src_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode !=
235           dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode)
236       return false;
237 
238    return true;
239 }
240 
241 void
radv_fmask_copy(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst)242 radv_fmask_copy(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
243                 struct radv_meta_blit2d_surf *dst)
244 {
245    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
246    struct radv_image_view src_iview, dst_iview;
247    uint32_t samples = src->image->vk.samples;
248    uint32_t samples_log2 = ffs(samples) - 1;
249    VkPipeline pipeline;
250    VkResult result;
251 
252    result = get_pipeline(device, samples_log2, &pipeline);
253    if (result != VK_SUCCESS) {
254       vk_command_buffer_set_error(&cmd_buffer->vk, result);
255       return;
256    }
257 
258    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
259 
260    radv_image_view_init(&src_iview, device,
261                         &(VkImageViewCreateInfo){
262                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
263                            .image = radv_image_to_handle(src->image),
264                            .viewType = radv_meta_get_view_type(src->image),
265                            .format = vk_format_no_srgb(src->image->vk.format),
266                            .subresourceRange =
267                               {
268                                  .aspectMask = src->aspect_mask,
269                                  .baseMipLevel = 0,
270                                  .levelCount = 1,
271                                  .baseArrayLayer = 0,
272                                  .layerCount = 1,
273                               },
274                         },
275                         0, NULL);
276 
277    radv_image_view_init(&dst_iview, device,
278                         &(VkImageViewCreateInfo){
279                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
280                            .image = radv_image_to_handle(dst->image),
281                            .viewType = radv_meta_get_view_type(dst->image),
282                            .format = vk_format_no_srgb(dst->image->vk.format),
283                            .subresourceRange =
284                               {
285                                  .aspectMask = dst->aspect_mask,
286                                  .baseMipLevel = 0,
287                                  .levelCount = 1,
288                                  .baseArrayLayer = 0,
289                                  .layerCount = 1,
290                               },
291                         },
292                         0, NULL);
293 
294    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.fmask_copy.p_layout, 0,
295                                  2,
296                                  (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
297                                                            .dstBinding = 0,
298                                                            .dstArrayElement = 0,
299                                                            .descriptorCount = 1,
300                                                            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
301                                                            .pImageInfo =
302                                                               (VkDescriptorImageInfo[]){
303                                                                  {.sampler = VK_NULL_HANDLE,
304                                                                   .imageView = radv_image_view_to_handle(&src_iview),
305                                                                   .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
306                                                               }},
307                                                           {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
308                                                            .dstBinding = 1,
309                                                            .dstArrayElement = 0,
310                                                            .descriptorCount = 1,
311                                                            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
312                                                            .pImageInfo = (VkDescriptorImageInfo[]){
313                                                               {.sampler = VK_NULL_HANDLE,
314                                                                .imageView = radv_image_view_to_handle(&dst_iview),
315                                                                .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
316                                                            }}});
317 
318    radv_unaligned_dispatch(cmd_buffer, src->image->vk.extent.width, src->image->vk.extent.height, 1);
319 
320    /* Fixup destination image metadata by copying CMASK/FMASK from the source image. */
321    radv_fixup_copy_dst_metadata(cmd_buffer, src->image, dst->image);
322 }
323