1 /*
2 * Copyright © 2016 Dave Airlie
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <assert.h>
8 #include <stdbool.h>
9
10 #include "nir/nir_builder.h"
11 #include "nir/nir_format_convert.h"
12
13 #include "radv_entrypoints.h"
14 #include "radv_formats.h"
15 #include "radv_meta.h"
16 #include "sid.h"
17 #include "vk_common_entrypoints.h"
18 #include "vk_format.h"
19 #include "vk_shader_module.h"
20
21 static nir_def *
radv_meta_build_resolve_srgb_conversion(nir_builder * b,nir_def * input)22 radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_def *input)
23 {
24 unsigned i;
25 nir_def *comp[4];
26 for (i = 0; i < 3; i++)
27 comp[i] = nir_format_linear_to_srgb(b, nir_channel(b, input, i));
28 comp[3] = nir_channels(b, input, 1 << 3);
29 return nir_vec(b, comp, 4);
30 }
31
32 static nir_shader *
build_resolve_compute_shader(struct radv_device * dev,bool is_integer,bool is_srgb,int samples)33 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
34 {
35 enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
36 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
37 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type);
38 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples,
39 is_integer ? "int" : (is_srgb ? "srgb" : "float"));
40 b.shader->info.workgroup_size[0] = 8;
41 b.shader->info.workgroup_size[1] = 8;
42
43 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
44 input_img->data.descriptor_set = 0;
45 input_img->data.binding = 0;
46
47 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
48 output_img->data.descriptor_set = 0;
49 output_img->data.binding = 1;
50
51 nir_def *global_id = get_global_ids(&b, 2);
52
53 nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
54 nir_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
55
56 nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
57 nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
58
59 nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
60
61 radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, src_coord);
62
63 nir_def *outval = nir_load_var(&b, color);
64 if (is_srgb)
65 outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
66
67 nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1), nir_undef(&b, 1, 32),
68 nir_undef(&b, 1, 32));
69
70 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), outval,
71 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
72 return b.shader;
73 }
74
75 enum {
76 DEPTH_RESOLVE,
77 STENCIL_RESOLVE,
78 };
79
80 static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)81 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
82 {
83 switch (resolve_mode) {
84 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
85 return "zero";
86 case VK_RESOLVE_MODE_AVERAGE_BIT:
87 return "average";
88 case VK_RESOLVE_MODE_MIN_BIT:
89 return "min";
90 case VK_RESOLVE_MODE_MAX_BIT:
91 return "max";
92 default:
93 unreachable("invalid resolve mode");
94 }
95 }
96
97 static nir_shader *
build_depth_stencil_resolve_compute_shader(struct radv_device * dev,int samples,int index,VkResolveModeFlagBits resolve_mode)98 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
99 VkResolveModeFlagBits resolve_mode)
100 {
101 enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
102 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
103 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type);
104
105 nir_builder b =
106 radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
107 index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
108 b.shader->info.workgroup_size[0] = 8;
109 b.shader->info.workgroup_size[1] = 8;
110
111 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
112 input_img->data.descriptor_set = 0;
113 input_img->data.binding = 0;
114
115 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
116 output_img->data.descriptor_set = 0;
117 output_img->data.binding = 1;
118
119 nir_def *global_id = get_global_ids(&b, 3);
120
121 nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
122
123 nir_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
124
125 nir_def *img_coord =
126 nir_vec3(&b, nir_channel(&b, resolve_coord, 0), nir_channel(&b, resolve_coord, 1), nir_channel(&b, global_id, 2));
127
128 nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
129 nir_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
130
131 if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
132 for (int i = 1; i < samples; i++) {
133 nir_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
134
135 switch (resolve_mode) {
136 case VK_RESOLVE_MODE_AVERAGE_BIT:
137 assert(index == DEPTH_RESOLVE);
138 outval = nir_fadd(&b, outval, si);
139 break;
140 case VK_RESOLVE_MODE_MIN_BIT:
141 if (index == DEPTH_RESOLVE)
142 outval = nir_fmin(&b, outval, si);
143 else
144 outval = nir_umin(&b, outval, si);
145 break;
146 case VK_RESOLVE_MODE_MAX_BIT:
147 if (index == DEPTH_RESOLVE)
148 outval = nir_fmax(&b, outval, si);
149 else
150 outval = nir_umax(&b, outval, si);
151 break;
152 default:
153 unreachable("invalid resolve mode");
154 }
155 }
156
157 if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT)
158 outval = nir_fdiv_imm(&b, outval, samples);
159 }
160
161 nir_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
162 nir_channel(&b, img_coord, 2), nir_undef(&b, 1, 32));
163 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32), outval,
164 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
165 return b.shader;
166 }
167
168 static VkResult
create_layout(struct radv_device * device)169 create_layout(struct radv_device *device)
170 {
171 VkResult result = VK_SUCCESS;
172
173 if (!device->meta_state.resolve_compute.ds_layout) {
174 const VkDescriptorSetLayoutBinding bindings[] = {
175 {
176 .binding = 0,
177 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
178 .descriptorCount = 1,
179 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
180 },
181 {
182 .binding = 1,
183 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
184 .descriptorCount = 1,
185 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
186 },
187 };
188
189 result =
190 radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.resolve_compute.ds_layout);
191 if (result != VK_SUCCESS)
192 return result;
193 }
194
195 if (!device->meta_state.resolve_compute.p_layout) {
196 const VkPushConstantRange pc_range = {
197 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
198 .size = 16,
199 };
200
201 result = radv_meta_create_pipeline_layout(device, &device->meta_state.resolve_compute.ds_layout, 1, &pc_range,
202 &device->meta_state.resolve_compute.p_layout);
203 }
204
205 return result;
206 }
207
208 static VkResult
create_color_resolve_pipeline(struct radv_device * device,int samples,bool is_integer,bool is_srgb,VkPipeline * pipeline)209 create_color_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
210 VkPipeline *pipeline)
211 {
212 VkResult result;
213
214 result = create_layout(device);
215 if (result != VK_SUCCESS)
216 return result;
217
218 nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
219
220 result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.resolve_compute.p_layout, pipeline);
221
222 ralloc_free(cs);
223 return result;
224 }
225
226 static VkResult
create_depth_stencil_resolve_pipeline(struct radv_device * device,int samples,int index,VkResolveModeFlagBits resolve_mode,VkPipeline * pipeline)227 create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
228 VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
229 {
230 VkResult result;
231
232 result = create_layout(device);
233 if (result != VK_SUCCESS)
234 return result;
235
236 nir_shader *cs = build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
237
238 result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.resolve_compute.p_layout, pipeline);
239
240 ralloc_free(cs);
241 return result;
242 }
243
244 VkResult
radv_device_init_meta_resolve_compute_state(struct radv_device * device,bool on_demand)245 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
246 {
247 struct radv_meta_state *state = &device->meta_state;
248 VkResult res;
249
250 if (on_demand)
251 return VK_SUCCESS;
252
253 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
254 uint32_t samples = 1 << i;
255
256 res = create_color_resolve_pipeline(device, samples, false, false, &state->resolve_compute.rc[i].pipeline);
257 if (res != VK_SUCCESS)
258 return res;
259
260 res = create_color_resolve_pipeline(device, samples, true, false, &state->resolve_compute.rc[i].i_pipeline);
261 if (res != VK_SUCCESS)
262 return res;
263
264 res = create_color_resolve_pipeline(device, samples, false, true, &state->resolve_compute.rc[i].srgb_pipeline);
265 if (res != VK_SUCCESS)
266 return res;
267
268 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT,
269 &state->resolve_compute.depth[i].average_pipeline);
270 if (res != VK_SUCCESS)
271 return res;
272
273 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
274 &state->resolve_compute.depth[i].max_pipeline);
275 if (res != VK_SUCCESS)
276 return res;
277
278 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
279 &state->resolve_compute.depth[i].min_pipeline);
280 if (res != VK_SUCCESS)
281 return res;
282
283 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
284 &state->resolve_compute.stencil[i].max_pipeline);
285 if (res != VK_SUCCESS)
286 return res;
287
288 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
289 &state->resolve_compute.stencil[i].min_pipeline);
290 if (res != VK_SUCCESS)
291 return res;
292 }
293
294 res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
295 &state->resolve_compute.depth_zero_pipeline);
296 if (res != VK_SUCCESS)
297 return res;
298
299 return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
300 &state->resolve_compute.stencil_zero_pipeline);
301 }
302
303 void
radv_device_finish_meta_resolve_compute_state(struct radv_device * device)304 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
305 {
306 struct radv_meta_state *state = &device->meta_state;
307 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
308 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline, &state->alloc);
309
310 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline, &state->alloc);
311
312 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
313
314 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].average_pipeline,
315 &state->alloc);
316
317 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].max_pipeline, &state->alloc);
318
319 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].min_pipeline, &state->alloc);
320
321 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].max_pipeline,
322 &state->alloc);
323
324 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].min_pipeline,
325 &state->alloc);
326 }
327
328 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline, &state->alloc);
329
330 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline, &state->alloc);
331
332 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
333 &state->alloc);
334 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout, &state->alloc);
335 }
336
337 static VkResult
get_color_resolve_pipeline(struct radv_device * device,struct radv_image_view * src_iview,VkPipeline * pipeline_out)338 get_color_resolve_pipeline(struct radv_device *device, struct radv_image_view *src_iview, VkPipeline *pipeline_out)
339 {
340 struct radv_meta_state *state = &device->meta_state;
341 uint32_t samples = src_iview->image->vk.samples;
342 uint32_t samples_log2 = ffs(samples) - 1;
343 VkResult result = VK_SUCCESS;
344 VkPipeline *pipeline;
345
346 mtx_lock(&state->mtx);
347
348 if (vk_format_is_int(src_iview->vk.format))
349 pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
350 else if (vk_format_is_srgb(src_iview->vk.format))
351 pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
352 else
353 pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
354
355 if (!*pipeline) {
356 result = create_color_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk.format),
357 vk_format_is_srgb(src_iview->vk.format), pipeline);
358 if (result != VK_SUCCESS)
359 goto fail;
360 }
361
362 *pipeline_out = *pipeline;
363
364 fail:
365 mtx_unlock(&state->mtx);
366 return result;
367 }
368
369 static void
emit_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dst_iview,const VkOffset2D * src_offset,const VkOffset2D * dst_offset,const VkExtent2D * resolve_extent)370 emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview,
371 const VkOffset2D *src_offset, const VkOffset2D *dst_offset, const VkExtent2D *resolve_extent)
372 {
373 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
374 VkPipeline pipeline;
375 VkResult result;
376
377 result = get_color_resolve_pipeline(device, src_iview, &pipeline);
378 if (result != VK_SUCCESS) {
379 vk_command_buffer_set_error(&cmd_buffer->vk, result);
380 return;
381 }
382
383 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
384 device->meta_state.resolve_compute.p_layout, 0, 2,
385 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
386 .dstBinding = 0,
387 .dstArrayElement = 0,
388 .descriptorCount = 1,
389 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
390 .pImageInfo =
391 (VkDescriptorImageInfo[]){
392 {.sampler = VK_NULL_HANDLE,
393 .imageView = radv_image_view_to_handle(src_iview),
394 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
395 }},
396 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
397 .dstBinding = 1,
398 .dstArrayElement = 0,
399 .descriptorCount = 1,
400 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
401 .pImageInfo = (VkDescriptorImageInfo[]){
402 {
403 .sampler = VK_NULL_HANDLE,
404 .imageView = radv_image_view_to_handle(dst_iview),
405 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
406 },
407 }}});
408
409 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
410
411 unsigned push_constants[4] = {
412 src_offset->x,
413 src_offset->y,
414 dst_offset->x,
415 dst_offset->y,
416 };
417 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
418 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
419 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
420 }
421
422 static VkResult
get_depth_stencil_resolve_pipeline(struct radv_device * device,int samples,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode,VkPipeline * pipeline_out)423 get_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, VkImageAspectFlags aspects,
424 VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline_out)
425
426 {
427 const int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
428 const uint32_t samples_log2 = ffs(samples) - 1;
429 struct radv_meta_state *state = &device->meta_state;
430 VkResult result = VK_SUCCESS;
431 VkPipeline *pipeline;
432
433 mtx_lock(&state->mtx);
434
435 switch (resolve_mode) {
436 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
437 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
438 pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
439 else
440 pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
441 break;
442 case VK_RESOLVE_MODE_AVERAGE_BIT:
443 assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
444 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
445 break;
446 case VK_RESOLVE_MODE_MIN_BIT:
447 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
448 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
449 else
450 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
451 break;
452 case VK_RESOLVE_MODE_MAX_BIT:
453 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
454 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
455 else
456 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
457 break;
458 default:
459 unreachable("invalid resolve mode");
460 }
461
462 if (!*pipeline) {
463 result = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
464 if (result != VK_SUCCESS)
465 goto fail;
466 }
467
468 *pipeline_out = *pipeline;
469
470 fail:
471 mtx_unlock(&state->mtx);
472 return result;
473 }
474
475 static void
emit_depth_stencil_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dst_iview,const VkOffset2D * resolve_offset,const VkExtent3D * resolve_extent,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)476 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
477 struct radv_image_view *dst_iview, const VkOffset2D *resolve_offset,
478 const VkExtent3D *resolve_extent, VkImageAspectFlags aspects,
479 VkResolveModeFlagBits resolve_mode)
480 {
481 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
482 const uint32_t samples = src_iview->image->vk.samples;
483 VkPipeline pipeline;
484 VkResult result;
485
486 result = get_depth_stencil_resolve_pipeline(device, samples, aspects, resolve_mode, &pipeline);
487 if (result != VK_SUCCESS) {
488 vk_command_buffer_set_error(&cmd_buffer->vk, result);
489 return;
490 }
491
492 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
493 device->meta_state.resolve_compute.p_layout, 0, 2,
494 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
495 .dstBinding = 0,
496 .dstArrayElement = 0,
497 .descriptorCount = 1,
498 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
499 .pImageInfo =
500 (VkDescriptorImageInfo[]){
501 {.sampler = VK_NULL_HANDLE,
502 .imageView = radv_image_view_to_handle(src_iview),
503 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
504 }},
505 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
506 .dstBinding = 1,
507 .dstArrayElement = 0,
508 .descriptorCount = 1,
509 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
510 .pImageInfo = (VkDescriptorImageInfo[]){
511 {
512 .sampler = VK_NULL_HANDLE,
513 .imageView = radv_image_view_to_handle(dst_iview),
514 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
515 },
516 }}});
517
518 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
519
520 uint32_t push_constants[2] = {resolve_offset->x, resolve_offset->y};
521
522 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
523 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants);
524
525 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, resolve_extent->depth);
526 }
527
528 void
radv_meta_resolve_compute_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkFormat src_format,VkImageLayout src_image_layout,struct radv_image * dst_image,VkFormat dst_format,VkImageLayout dst_image_layout,const VkImageResolve2 * region)529 radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkFormat src_format,
530 VkImageLayout src_image_layout, struct radv_image *dst_image, VkFormat dst_format,
531 VkImageLayout dst_image_layout, const VkImageResolve2 *region)
532 {
533 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
534 struct radv_meta_saved_state saved_state;
535
536 /* For partial resolves, DCC should be decompressed before resolving
537 * because the metadata is re-initialized to the uncompressed after.
538 */
539 uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
540
541 if (!radv_image_use_dcc_image_stores(device, dst_image) &&
542 radv_layout_dcc_compressed(device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, queue_mask) &&
543 (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
544 region->extent.width != dst_image->vk.extent.width || region->extent.height != dst_image->vk.extent.height ||
545 region->extent.depth != dst_image->vk.extent.depth)) {
546 radv_decompress_dcc(cmd_buffer, dst_image,
547 &(VkImageSubresourceRange){
548 .aspectMask = region->dstSubresource.aspectMask,
549 .baseMipLevel = region->dstSubresource.mipLevel,
550 .levelCount = 1,
551 .baseArrayLayer = region->dstSubresource.baseArrayLayer,
552 .layerCount = vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource),
553 });
554 }
555
556 radv_meta_save(&saved_state, cmd_buffer,
557 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
558
559 assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
560 assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
561 assert(vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource) ==
562 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource));
563
564 const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset);
565
566 const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
567 const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
568 const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
569 const unsigned src_layer_count = vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource);
570
571 for (uint32_t layer = 0; layer < src_layer_count; ++layer) {
572
573 struct radv_image_view src_iview;
574 radv_image_view_init(&src_iview, device,
575 &(VkImageViewCreateInfo){
576 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
577 .image = radv_image_to_handle(src_image),
578 .viewType = VK_IMAGE_VIEW_TYPE_2D,
579 .format = src_format,
580 .subresourceRange =
581 {
582 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
583 .baseMipLevel = 0,
584 .levelCount = 1,
585 .baseArrayLayer = region->srcSubresource.baseArrayLayer + layer,
586 .layerCount = 1,
587 },
588 },
589 0, NULL);
590
591 struct radv_image_view dst_iview;
592 radv_image_view_init(&dst_iview, device,
593 &(VkImageViewCreateInfo){
594 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
595 .image = radv_image_to_handle(dst_image),
596 .viewType = radv_meta_get_view_type(dst_image),
597 .format = vk_format_no_srgb(dst_format),
598 .subresourceRange =
599 {
600 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
601 .baseMipLevel = region->dstSubresource.mipLevel,
602 .levelCount = 1,
603 .baseArrayLayer = dst_base_layer + layer,
604 .layerCount = 1,
605 },
606 },
607 0, NULL);
608
609 emit_resolve(cmd_buffer, &src_iview, &dst_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
610 &(VkOffset2D){dstOffset.x, dstOffset.y}, &(VkExtent2D){extent.width, extent.height});
611
612 radv_image_view_finish(&src_iview);
613 radv_image_view_finish(&dst_iview);
614 }
615
616 radv_meta_restore(&saved_state, cmd_buffer);
617
618 if (!radv_image_use_dcc_image_stores(device, dst_image) &&
619 radv_layout_dcc_compressed(device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, queue_mask)) {
620
621 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
622
623 VkImageSubresourceRange range = {
624 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
625 .baseMipLevel = region->dstSubresource.mipLevel,
626 .levelCount = 1,
627 .baseArrayLayer = dst_base_layer,
628 .layerCount = vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource),
629 };
630
631 cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
632 }
633 }
634
635 void
radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,VkImageLayout src_layout,struct radv_image_view * dst_iview,VkImageLayout dst_layout,const VkImageResolve2 * region)636 radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
637 VkImageLayout src_layout, struct radv_image_view *dst_iview,
638 VkImageLayout dst_layout, const VkImageResolve2 *region)
639 {
640 radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format, src_layout, dst_iview->image,
641 dst_iview->vk.format, dst_layout, region);
642
643 cmd_buffer->state.flush_bits |=
644 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
645 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
646 }
647
648 void
radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)649 radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects,
650 VkResolveModeFlagBits resolve_mode)
651 {
652 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
653 const struct radv_rendering_state *render = &cmd_buffer->state.render;
654 VkRect2D resolve_area = render->area;
655 struct radv_meta_saved_state saved_state;
656
657 uint32_t layer_count = render->layer_count;
658 if (render->view_mask)
659 layer_count = util_last_bit(render->view_mask);
660
661 /* Resolves happen before the end-of-subpass barriers get executed, so
662 * we have to make the attachment shader-readable.
663 */
664 cmd_buffer->state.flush_bits |=
665 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
666 VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
667 radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL);
668
669 struct radv_image_view *src_iview = render->ds_att.iview;
670 VkImageLayout src_layout =
671 aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? render->ds_att.layout : render->ds_att.stencil_layout;
672 struct radv_image *src_image = src_iview->image;
673
674 VkImageResolve2 region = {0};
675 region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2;
676 region.srcSubresource.aspectMask = aspects;
677 region.srcSubresource.mipLevel = 0;
678 region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer;
679 region.srcSubresource.layerCount = layer_count;
680
681 radv_decompress_resolve_src(cmd_buffer, src_image, src_layout, ®ion);
682
683 radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
684
685 struct radv_image_view *dst_iview = render->ds_att.resolve_iview;
686 VkImageLayout dst_layout =
687 aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? render->ds_att.resolve_layout : render->ds_att.stencil_resolve_layout;
688 struct radv_image *dst_image = dst_iview->image;
689
690 struct radv_image_view tsrc_iview;
691 radv_image_view_init(&tsrc_iview, device,
692 &(VkImageViewCreateInfo){
693 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
694 .image = radv_image_to_handle(src_image),
695 .viewType = VK_IMAGE_VIEW_TYPE_2D,
696 .format = src_iview->vk.format,
697 .subresourceRange =
698 {
699 .aspectMask = aspects,
700 .baseMipLevel = 0,
701 .levelCount = 1,
702 .baseArrayLayer = src_iview->vk.base_array_layer,
703 .layerCount = layer_count,
704 },
705 },
706 0, NULL);
707
708 struct radv_image_view tdst_iview;
709 radv_image_view_init(&tdst_iview, device,
710 &(VkImageViewCreateInfo){
711 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
712 .image = radv_image_to_handle(dst_image),
713 .viewType = radv_meta_get_view_type(dst_image),
714 .format = dst_iview->vk.format,
715 .subresourceRange =
716 {
717 .aspectMask = aspects,
718 .baseMipLevel = dst_iview->vk.base_mip_level,
719 .levelCount = 1,
720 .baseArrayLayer = dst_iview->vk.base_array_layer,
721 .layerCount = layer_count,
722 },
723 },
724 0, NULL);
725
726 emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview, &resolve_area.offset,
727 &(VkExtent3D){resolve_area.extent.width, resolve_area.extent.height, layer_count},
728 aspects, resolve_mode);
729
730 cmd_buffer->state.flush_bits |=
731 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
732 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
733
734 uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
735
736 if (radv_layout_is_htile_compressed(device, dst_image, dst_layout, queue_mask)) {
737 VkImageSubresourceRange range = {0};
738 range.aspectMask = aspects;
739 range.baseMipLevel = dst_iview->vk.base_mip_level;
740 range.levelCount = 1;
741 range.baseArrayLayer = dst_iview->vk.base_array_layer;
742 range.layerCount = layer_count;
743
744 uint32_t htile_value = radv_get_htile_initial_value(device, dst_image);
745
746 cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
747 }
748
749 radv_image_view_finish(&tsrc_iview);
750 radv_image_view_finish(&tdst_iview);
751
752 radv_meta_restore(&saved_state, cmd_buffer);
753 }
754