1 /*
2 * Copyright © 2021 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6 #include "nir/nir_builder.h"
7 #include "radv_formats.h"
8 #include "radv_meta.h"
9
10 static nir_shader *
build_fmask_copy_compute_shader(struct radv_device * dev,int samples)11 build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
12 {
13 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
14 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT);
15
16 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples);
17
18 b.shader->info.workgroup_size[0] = 8;
19 b.shader->info.workgroup_size[1] = 8;
20
21 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
22 input_img->data.descriptor_set = 0;
23 input_img->data.binding = 0;
24
25 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
26 output_img->data.descriptor_set = 0;
27 output_img->data.binding = 1;
28
29 nir_def *invoc_id = nir_load_local_invocation_id(&b);
30 nir_def *wg_id = nir_load_workgroup_id(&b);
31 nir_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
32 b.shader->info.workgroup_size[2]);
33
34 nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
35
36 /* Get coordinates. */
37 nir_def *src_coord = nir_trim_vector(&b, global_id, 2);
38 nir_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), nir_undef(&b, 1, 32),
39 nir_undef(&b, 1, 32));
40
41 nir_tex_src frag_mask_srcs[] = {{
42 .src_type = nir_tex_src_coord,
43 .src = nir_src_for_ssa(src_coord),
44 }};
45 nir_def *frag_mask =
46 nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
47 ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs);
48
49 /* Get the maximum sample used in this fragment. */
50 nir_def *max_sample_index = nir_imm_int(&b, 0);
51 for (uint32_t s = 0; s < samples; s++) {
52 /* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
53 max_sample_index = nir_umax(&b, max_sample_index,
54 nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), nir_imm_int(&b, 4)));
55 }
56
57 nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
58 nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
59
60 nir_loop *loop = nir_push_loop(&b);
61 {
62 nir_def *sample_id = nir_load_var(&b, counter);
63
64 nir_tex_src frag_fetch_srcs[] = {{
65 .src_type = nir_tex_src_coord,
66 .src = nir_src_for_ssa(src_coord),
67 },
68 {
69 .src_type = nir_tex_src_ms_index,
70 .src = nir_src_for_ssa(sample_id),
71 }};
72 nir_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img),
73 NULL, ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
74
75 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, sample_id, outval,
76 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS);
77
78 radv_break_on_count(&b, counter, max_sample_index);
79 }
80 nir_pop_loop(&b, loop);
81
82 return b.shader;
83 }
84
85 static VkResult
create_pipeline(struct radv_device * device,int samples,VkPipeline * pipeline)86 create_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
87 {
88 struct radv_meta_state *state = &device->meta_state;
89 VkResult result;
90
91 if (!state->fmask_copy.ds_layout) {
92 const VkDescriptorSetLayoutBinding bindings[] = {
93 {
94 .binding = 0,
95 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
96 .descriptorCount = 1,
97 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
98 },
99 {
100 .binding = 1,
101 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
102 .descriptorCount = 1,
103 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
104 },
105 };
106
107 result = radv_meta_create_descriptor_set_layout(device, 2, bindings, &state->fmask_copy.ds_layout);
108 if (result != VK_SUCCESS)
109 return result;
110 }
111
112 if (!state->fmask_copy.p_layout) {
113 result =
114 radv_meta_create_pipeline_layout(device, &state->fmask_copy.ds_layout, 0, NULL, &state->fmask_copy.p_layout);
115 if (result != VK_SUCCESS)
116 return result;
117 }
118
119 nir_shader *cs = build_fmask_copy_compute_shader(device, samples);
120
121 result = radv_meta_create_compute_pipeline(device, cs, state->fmask_copy.p_layout, pipeline);
122
123 ralloc_free(cs);
124 return result;
125 }
126
127 static VkResult
get_pipeline(struct radv_device * device,uint32_t samples_log2,VkPipeline * pipeline_out)128 get_pipeline(struct radv_device *device, uint32_t samples_log2, VkPipeline *pipeline_out)
129 {
130 struct radv_meta_state *state = &device->meta_state;
131 VkResult result = VK_SUCCESS;
132
133 mtx_lock(&state->mtx);
134 if (!state->fmask_copy.pipeline[samples_log2]) {
135 result = create_pipeline(device, 1 << samples_log2, &state->fmask_copy.pipeline[samples_log2]);
136 if (result != VK_SUCCESS)
137 goto fail;
138 }
139
140 *pipeline_out = state->fmask_copy.pipeline[samples_log2];
141
142 fail:
143 mtx_unlock(&state->mtx);
144 return result;
145 }
146
147 void
radv_device_finish_meta_fmask_copy_state(struct radv_device * device)148 radv_device_finish_meta_fmask_copy_state(struct radv_device *device)
149 {
150 struct radv_meta_state *state = &device->meta_state;
151
152 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_copy.p_layout, &state->alloc);
153 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_copy.ds_layout,
154 &state->alloc);
155
156 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
157 radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_copy.pipeline[i], &state->alloc);
158 }
159 }
160
161 VkResult
radv_device_init_meta_fmask_copy_state(struct radv_device * device,bool on_demand)162 radv_device_init_meta_fmask_copy_state(struct radv_device *device, bool on_demand)
163 {
164 VkResult result;
165
166 if (on_demand)
167 return VK_SUCCESS;
168
169 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
170 result = create_pipeline(device, 1u << i, &device->meta_state.fmask_copy.pipeline[i]);
171 if (result != VK_SUCCESS)
172 return result;
173 }
174
175 return VK_SUCCESS;
176 }
177
178 static void
radv_fixup_copy_dst_metadata(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image)179 radv_fixup_copy_dst_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
180 const struct radv_image *dst_image)
181 {
182 uint64_t src_offset, dst_offset, size;
183
184 assert(src_image->planes[0].surface.cmask_size == dst_image->planes[0].surface.cmask_size &&
185 src_image->planes[0].surface.fmask_size == dst_image->planes[0].surface.fmask_size);
186 assert(src_image->planes[0].surface.fmask_offset + src_image->planes[0].surface.fmask_size ==
187 src_image->planes[0].surface.cmask_offset &&
188 dst_image->planes[0].surface.fmask_offset + dst_image->planes[0].surface.fmask_size ==
189 dst_image->planes[0].surface.cmask_offset);
190
191 /* Copy CMASK+FMASK. */
192 size = src_image->planes[0].surface.cmask_size + src_image->planes[0].surface.fmask_size;
193 src_offset = src_image->bindings[0].offset + src_image->planes[0].surface.fmask_offset;
194 dst_offset = dst_image->bindings[0].offset + dst_image->planes[0].surface.fmask_offset;
195
196 radv_copy_buffer(cmd_buffer, src_image->bindings[0].bo, dst_image->bindings[0].bo, src_offset, dst_offset, size);
197 }
198
199 bool
radv_can_use_fmask_copy(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image,const struct radv_meta_blit2d_rect * rect)200 radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
201 const struct radv_image *dst_image, const struct radv_meta_blit2d_rect *rect)
202 {
203 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
204 const struct radv_physical_device *pdev = radv_device_physical(device);
205
206 /* TODO: Test on pre GFX10 chips. */
207 if (pdev->info.gfx_level < GFX10)
208 return false;
209
210 /* TODO: Add support for layers. */
211 if (src_image->vk.array_layers != 1 || dst_image->vk.array_layers != 1)
212 return false;
213
214 /* Source/destination images must have FMASK. */
215 if (!radv_image_has_fmask(src_image) || !radv_image_has_fmask(dst_image))
216 return false;
217
218 /* Source/destination images must have identical TC-compat mode. */
219 if (radv_image_is_tc_compat_cmask(src_image) != radv_image_is_tc_compat_cmask(dst_image))
220 return false;
221
222 /* The region must be a whole image copy. */
223 if (rect->src_x || rect->src_y || rect->dst_x || rect->dst_y || rect->width != src_image->vk.extent.width ||
224 rect->height != src_image->vk.extent.height)
225 return false;
226
227 /* Source/destination images must have identical size. */
228 if (src_image->vk.extent.width != dst_image->vk.extent.width ||
229 src_image->vk.extent.height != dst_image->vk.extent.height)
230 return false;
231
232 /* Source/destination images must have identical swizzle. */
233 if (src_image->planes[0].surface.fmask_tile_swizzle != dst_image->planes[0].surface.fmask_tile_swizzle ||
234 src_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode !=
235 dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode)
236 return false;
237
238 return true;
239 }
240
241 void
radv_fmask_copy(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst)242 radv_fmask_copy(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
243 struct radv_meta_blit2d_surf *dst)
244 {
245 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
246 struct radv_image_view src_iview, dst_iview;
247 uint32_t samples = src->image->vk.samples;
248 uint32_t samples_log2 = ffs(samples) - 1;
249 VkPipeline pipeline;
250 VkResult result;
251
252 result = get_pipeline(device, samples_log2, &pipeline);
253 if (result != VK_SUCCESS) {
254 vk_command_buffer_set_error(&cmd_buffer->vk, result);
255 return;
256 }
257
258 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
259
260 radv_image_view_init(&src_iview, device,
261 &(VkImageViewCreateInfo){
262 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
263 .image = radv_image_to_handle(src->image),
264 .viewType = radv_meta_get_view_type(src->image),
265 .format = vk_format_no_srgb(src->image->vk.format),
266 .subresourceRange =
267 {
268 .aspectMask = src->aspect_mask,
269 .baseMipLevel = 0,
270 .levelCount = 1,
271 .baseArrayLayer = 0,
272 .layerCount = 1,
273 },
274 },
275 0, NULL);
276
277 radv_image_view_init(&dst_iview, device,
278 &(VkImageViewCreateInfo){
279 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
280 .image = radv_image_to_handle(dst->image),
281 .viewType = radv_meta_get_view_type(dst->image),
282 .format = vk_format_no_srgb(dst->image->vk.format),
283 .subresourceRange =
284 {
285 .aspectMask = dst->aspect_mask,
286 .baseMipLevel = 0,
287 .levelCount = 1,
288 .baseArrayLayer = 0,
289 .layerCount = 1,
290 },
291 },
292 0, NULL);
293
294 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.fmask_copy.p_layout, 0,
295 2,
296 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
297 .dstBinding = 0,
298 .dstArrayElement = 0,
299 .descriptorCount = 1,
300 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
301 .pImageInfo =
302 (VkDescriptorImageInfo[]){
303 {.sampler = VK_NULL_HANDLE,
304 .imageView = radv_image_view_to_handle(&src_iview),
305 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
306 }},
307 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
308 .dstBinding = 1,
309 .dstArrayElement = 0,
310 .descriptorCount = 1,
311 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
312 .pImageInfo = (VkDescriptorImageInfo[]){
313 {.sampler = VK_NULL_HANDLE,
314 .imageView = radv_image_view_to_handle(&dst_iview),
315 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
316 }}});
317
318 radv_unaligned_dispatch(cmd_buffer, src->image->vk.extent.width, src->image->vk.extent.height, 1);
319
320 /* Fixup destination image metadata by copying CMASK/FMASK from the source image. */
321 radv_fixup_copy_dst_metadata(cmd_buffer, src->image, dst->image);
322 }
323