1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * SPDX-License-Identifier: MIT
6 */
7 #include "nir/nir_builder.h"
8 #include "radv_entrypoints.h"
9 #include "radv_meta.h"
10 #include "vk_common_entrypoints.h"
11 #include "vk_shader_module.h"
12
13 /*
14 * GFX queue: Compute shader implementation of image->buffer copy
15 * Compute queue: implementation also of buffer->image, image->image, and image clear.
16 */
17
18 static nir_shader *
build_nir_itob_compute_shader(struct radv_device * dev,bool is_3d)19 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
20 {
21 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
22 const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
23 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
24 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
25 b.shader->info.workgroup_size[0] = 8;
26 b.shader->info.workgroup_size[1] = 8;
27 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
28 input_img->data.descriptor_set = 0;
29 input_img->data.binding = 0;
30
31 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
32 output_img->data.descriptor_set = 0;
33 output_img->data.binding = 1;
34
35 nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
36
37 nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
38 nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
39
40 nir_def *img_coord = nir_iadd(&b, global_id, offset);
41 nir_def *outval =
42 nir_txf_deref(&b, nir_build_deref_var(&b, input_img), nir_trim_vector(&b, img_coord, 2 + is_3d), NULL);
43
44 nir_def *pos_x = nir_channel(&b, global_id, 0);
45 nir_def *pos_y = nir_channel(&b, global_id, 1);
46
47 nir_def *tmp = nir_imul(&b, pos_y, stride);
48 tmp = nir_iadd(&b, tmp, pos_x);
49
50 nir_def *coord = nir_replicate(&b, tmp, 4);
51
52 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32), outval,
53 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
54
55 return b.shader;
56 }
57
58 static VkResult
create_itob_layout(struct radv_device * device)59 create_itob_layout(struct radv_device *device)
60 {
61 VkResult result = VK_SUCCESS;
62
63 if (!device->meta_state.itob.img_ds_layout) {
64 const VkDescriptorSetLayoutBinding bindings[] = {
65 {
66 .binding = 0,
67 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
68 .descriptorCount = 1,
69 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
70 },
71 {
72 .binding = 1,
73 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
74 .descriptorCount = 1,
75 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
76 },
77 };
78
79 result = radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.itob.img_ds_layout);
80 if (result != VK_SUCCESS)
81 return result;
82 }
83
84 if (!device->meta_state.itob.img_p_layout) {
85 const VkPushConstantRange pc_range = {
86 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
87 .size = 16,
88 };
89
90 result = radv_meta_create_pipeline_layout(device, &device->meta_state.itob.img_ds_layout, 1, &pc_range,
91 &device->meta_state.itob.img_p_layout);
92 }
93
94 return result;
95 }
96
97 static VkResult
create_itob_pipeline(struct radv_device * device,bool is_3d,VkPipeline * pipeline)98 create_itob_pipeline(struct radv_device *device, bool is_3d, VkPipeline *pipeline)
99 {
100 VkResult result;
101
102 result = create_itob_layout(device);
103 if (result != VK_SUCCESS)
104 return result;
105
106 nir_shader *cs = build_nir_itob_compute_shader(device, is_3d);
107
108 result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.itob.img_p_layout, pipeline);
109
110 ralloc_free(cs);
111 return result;
112 }
113
114 static VkResult
get_itob_pipeline(struct radv_device * device,const struct radv_image * image,VkPipeline * pipeline_out)115 get_itob_pipeline(struct radv_device *device, const struct radv_image *image, VkPipeline *pipeline_out)
116 {
117 struct radv_meta_state *state = &device->meta_state;
118 const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D;
119 VkResult result = VK_SUCCESS;
120 VkPipeline *pipeline;
121
122 mtx_lock(&state->mtx);
123
124 pipeline = is_3d ? &state->itob.pipeline_3d : &state->itob.pipeline;
125 if (!*pipeline) {
126 result = create_itob_pipeline(device, is_3d, pipeline);
127 if (result != VK_SUCCESS)
128 goto fail;
129 }
130
131 *pipeline_out = *pipeline;
132
133 fail:
134 mtx_unlock(&state->mtx);
135 return result;
136 }
137
138 /* Image to buffer - don't write use image accessors */
139 static VkResult
radv_device_init_meta_itob_state(struct radv_device * device)140 radv_device_init_meta_itob_state(struct radv_device *device)
141 {
142 VkResult result;
143
144 result = create_itob_pipeline(device, false, &device->meta_state.itob.pipeline);
145 if (result != VK_SUCCESS)
146 return result;
147
148 return create_itob_pipeline(device, true, &device->meta_state.itob.pipeline_3d);
149 }
150
151 static void
radv_device_finish_meta_itob_state(struct radv_device * device)152 radv_device_finish_meta_itob_state(struct radv_device *device)
153 {
154 struct radv_meta_state *state = &device->meta_state;
155
156 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout, &state->alloc);
157 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout,
158 &state->alloc);
159 radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc);
160 radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc);
161 }
162
163 static nir_shader *
build_nir_btoi_compute_shader(struct radv_device * dev,bool is_3d)164 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
165 {
166 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
167 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
168 const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
169 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
170 b.shader->info.workgroup_size[0] = 8;
171 b.shader->info.workgroup_size[1] = 8;
172 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
173 input_img->data.descriptor_set = 0;
174 input_img->data.binding = 0;
175
176 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
177 output_img->data.descriptor_set = 0;
178 output_img->data.binding = 1;
179
180 nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
181
182 nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
183 nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
184
185 nir_def *pos_x = nir_channel(&b, global_id, 0);
186 nir_def *pos_y = nir_channel(&b, global_id, 1);
187
188 nir_def *buf_coord = nir_imul(&b, pos_y, stride);
189 buf_coord = nir_iadd(&b, buf_coord, pos_x);
190
191 nir_def *coord = nir_iadd(&b, global_id, offset);
192 nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
193
194 nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
195 is_3d ? nir_channel(&b, coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
196
197 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), outval,
198 nir_imm_int(&b, 0), .image_dim = dim);
199
200 return b.shader;
201 }
202
203 static VkResult
create_btoi_layout(struct radv_device * device)204 create_btoi_layout(struct radv_device *device)
205 {
206 VkResult result = VK_SUCCESS;
207
208 if (!device->meta_state.btoi.img_ds_layout) {
209 const VkDescriptorSetLayoutBinding bindings[] = {
210 {
211 .binding = 0,
212 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
213 .descriptorCount = 1,
214 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
215 },
216 {
217 .binding = 1,
218 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
219 .descriptorCount = 1,
220 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
221 },
222 };
223
224 result = radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.btoi.img_ds_layout);
225 if (result != VK_SUCCESS)
226 return result;
227 }
228
229 if (!device->meta_state.btoi.img_p_layout) {
230 const VkPushConstantRange pc_range = {
231 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
232 .size = 16,
233 };
234
235 result = radv_meta_create_pipeline_layout(device, &device->meta_state.btoi.img_ds_layout, 1, &pc_range,
236 &device->meta_state.btoi.img_p_layout);
237 }
238
239 return result;
240 }
241
242 static VkResult
create_btoi_pipeline(struct radv_device * device,bool is_3d,VkPipeline * pipeline)243 create_btoi_pipeline(struct radv_device *device, bool is_3d, VkPipeline *pipeline)
244 {
245 VkResult result;
246
247 result = create_btoi_layout(device);
248 if (result != VK_SUCCESS)
249 return result;
250
251 nir_shader *cs = build_nir_btoi_compute_shader(device, is_3d);
252
253 result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.btoi.img_p_layout, pipeline);
254
255 ralloc_free(cs);
256 return result;
257 }
258
259 static VkResult
get_btoi_pipeline(struct radv_device * device,const struct radv_image * image,VkPipeline * pipeline_out)260 get_btoi_pipeline(struct radv_device *device, const struct radv_image *image, VkPipeline *pipeline_out)
261 {
262 struct radv_meta_state *state = &device->meta_state;
263 const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D;
264 VkResult result = VK_SUCCESS;
265 VkPipeline *pipeline;
266
267 mtx_lock(&state->mtx);
268
269 pipeline = is_3d ? &state->btoi.pipeline_3d : &state->btoi.pipeline;
270 if (!*pipeline) {
271 result = create_btoi_pipeline(device, is_3d, pipeline);
272 if (result != VK_SUCCESS)
273 goto fail;
274 }
275
276 *pipeline_out = *pipeline;
277
278 fail:
279 mtx_unlock(&state->mtx);
280 return result;
281 }
282
283 /* Buffer to image - don't write use image accessors */
284 static VkResult
radv_device_init_meta_btoi_state(struct radv_device * device)285 radv_device_init_meta_btoi_state(struct radv_device *device)
286 {
287 VkResult result;
288
289 result = create_btoi_pipeline(device, false, &device->meta_state.btoi.pipeline);
290 if (result != VK_SUCCESS)
291 return result;
292
293 return create_btoi_pipeline(device, true, &device->meta_state.btoi.pipeline_3d);
294 }
295
296 static void
radv_device_finish_meta_btoi_state(struct radv_device * device)297 radv_device_finish_meta_btoi_state(struct radv_device *device)
298 {
299 struct radv_meta_state *state = &device->meta_state;
300
301 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout, &state->alloc);
302 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout,
303 &state->alloc);
304 radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc);
305 radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc);
306 }
307
308 /* Buffer to image - special path for R32G32B32 */
309 static nir_shader *
build_nir_btoi_r32g32b32_compute_shader(struct radv_device * dev)310 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
311 {
312 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
313 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
314 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_btoi_r32g32b32_cs");
315 b.shader->info.workgroup_size[0] = 8;
316 b.shader->info.workgroup_size[1] = 8;
317 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
318 input_img->data.descriptor_set = 0;
319 input_img->data.binding = 0;
320
321 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
322 output_img->data.descriptor_set = 0;
323 output_img->data.binding = 1;
324
325 nir_def *global_id = get_global_ids(&b, 2);
326
327 nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
328 nir_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
329 nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
330
331 nir_def *pos_x = nir_channel(&b, global_id, 0);
332 nir_def *pos_y = nir_channel(&b, global_id, 1);
333
334 nir_def *buf_coord = nir_imul(&b, pos_y, stride);
335 buf_coord = nir_iadd(&b, buf_coord, pos_x);
336
337 nir_def *img_coord = nir_iadd(&b, global_id, offset);
338
339 nir_def *global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
340 nir_imul_imm(&b, nir_channel(&b, img_coord, 0), 3));
341
342 nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
343
344 for (int chan = 0; chan < 3; chan++) {
345 nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
346
347 nir_def *coord = nir_replicate(&b, local_pos, 4);
348
349 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32),
350 nir_channel(&b, outval, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
351 }
352
353 return b.shader;
354 }
355
356 static VkResult
create_btoi_r32g32b32_layout(struct radv_device * device)357 create_btoi_r32g32b32_layout(struct radv_device *device)
358 {
359 VkResult result = VK_SUCCESS;
360
361 if (!device->meta_state.btoi_r32g32b32.img_ds_layout) {
362 const VkDescriptorSetLayoutBinding bindings[] = {
363 {
364 .binding = 0,
365 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
366 .descriptorCount = 1,
367 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
368 },
369 {
370 .binding = 1,
371 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
372 .descriptorCount = 1,
373 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
374 },
375 };
376
377 result =
378 radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.btoi_r32g32b32.img_ds_layout);
379 if (result != VK_SUCCESS)
380 return result;
381 }
382
383 if (!device->meta_state.btoi_r32g32b32.img_p_layout) {
384 const VkPushConstantRange pc_range = {
385 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
386 .size = 16,
387 };
388
389 result = radv_meta_create_pipeline_layout(device, &device->meta_state.btoi_r32g32b32.img_ds_layout, 1, &pc_range,
390 &device->meta_state.btoi_r32g32b32.img_p_layout);
391 }
392
393 return result;
394 }
395
396 static VkResult
create_btoi_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline)397 create_btoi_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline)
398 {
399 VkResult result;
400
401 result = create_btoi_r32g32b32_layout(device);
402 if (result != VK_SUCCESS)
403 return result;
404
405 nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
406
407 result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.btoi_r32g32b32.img_p_layout, pipeline);
408
409 ralloc_free(cs);
410 return result;
411 }
412
413 static VkResult
get_btoi_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline_out)414 get_btoi_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline_out)
415 {
416 struct radv_meta_state *state = &device->meta_state;
417 VkResult result = VK_SUCCESS;
418
419 mtx_lock(&state->mtx);
420
421 if (!state->btoi_r32g32b32.pipeline) {
422 result = create_btoi_r32g32b32_pipeline(device, &state->btoi_r32g32b32.pipeline);
423 if (result != VK_SUCCESS)
424 goto fail;
425 }
426
427 *pipeline_out = state->btoi_r32g32b32.pipeline;
428
429 fail:
430 mtx_unlock(&state->mtx);
431 return result;
432 }
433
434 static VkResult
radv_device_init_meta_btoi_r32g32b32_state(struct radv_device * device)435 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
436 {
437 return create_btoi_r32g32b32_pipeline(device, &device->meta_state.btoi_r32g32b32.pipeline);
438 }
439
440 static void
radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device * device)441 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
442 {
443 struct radv_meta_state *state = &device->meta_state;
444
445 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout, &state->alloc);
446 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
447 state->btoi_r32g32b32.img_ds_layout, &state->alloc);
448 radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline, &state->alloc);
449 }
450
451 static nir_shader *
build_nir_itoi_compute_shader(struct radv_device * dev,bool src_3d,bool dst_3d,int samples)452 build_nir_itoi_compute_shader(struct radv_device *dev, bool src_3d, bool dst_3d, int samples)
453 {
454 bool is_multisampled = samples > 1;
455 enum glsl_sampler_dim src_dim = src_3d ? GLSL_SAMPLER_DIM_3D
456 : is_multisampled ? GLSL_SAMPLER_DIM_MS
457 : GLSL_SAMPLER_DIM_2D;
458 enum glsl_sampler_dim dst_dim = dst_3d ? GLSL_SAMPLER_DIM_3D
459 : is_multisampled ? GLSL_SAMPLER_DIM_MS
460 : GLSL_SAMPLER_DIM_2D;
461 const struct glsl_type *buf_type = glsl_sampler_type(src_dim, false, false, GLSL_TYPE_FLOAT);
462 const struct glsl_type *img_type = glsl_image_type(dst_dim, false, GLSL_TYPE_FLOAT);
463 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_cs-%dd-%dd-%d", src_3d ? 3 : 2,
464 dst_3d ? 3 : 2, samples);
465 b.shader->info.workgroup_size[0] = 8;
466 b.shader->info.workgroup_size[1] = 8;
467 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
468 input_img->data.descriptor_set = 0;
469 input_img->data.binding = 0;
470
471 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
472 output_img->data.descriptor_set = 0;
473 output_img->data.binding = 1;
474
475 nir_def *global_id = get_global_ids(&b, (src_3d || dst_3d) ? 3 : 2);
476
477 nir_def *src_offset = nir_load_push_constant(&b, src_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = src_3d ? 12 : 8);
478 nir_def *dst_offset = nir_load_push_constant(&b, dst_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = dst_3d ? 24 : 20);
479
480 nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
481 nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
482
483 nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
484
485 nir_def *tex_vals[8];
486 if (is_multisampled) {
487 for (uint32_t i = 0; i < samples; i++) {
488 tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2), nir_imm_int(&b, i));
489 }
490 } else {
491 tex_vals[0] = nir_txf_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2 + src_3d), nir_imm_int(&b, 0));
492 }
493
494 nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
495 dst_3d ? nir_channel(&b, dst_coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
496
497 for (uint32_t i = 0; i < samples; i++) {
498 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_imm_int(&b, i), tex_vals[i],
499 nir_imm_int(&b, 0), .image_dim = dst_dim);
500 }
501
502 return b.shader;
503 }
504
505 static VkResult
create_itoi_layout(struct radv_device * device)506 create_itoi_layout(struct radv_device *device)
507 {
508 VkResult result = VK_SUCCESS;
509
510 if (!device->meta_state.itoi.img_ds_layout) {
511 const VkDescriptorSetLayoutBinding bindings[] = {
512 {
513 .binding = 0,
514 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
515 .descriptorCount = 1,
516 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
517 },
518 {
519 .binding = 1,
520 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
521 .descriptorCount = 1,
522 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
523 },
524 };
525
526 result = radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.itoi.img_ds_layout);
527 if (result != VK_SUCCESS)
528 return result;
529 }
530
531 if (!device->meta_state.itoi.img_p_layout) {
532 const VkPushConstantRange pc_range = {
533 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
534 .size = 24,
535 };
536
537 result = radv_meta_create_pipeline_layout(device, &device->meta_state.itoi.img_ds_layout, 1, &pc_range,
538 &device->meta_state.itoi.img_p_layout);
539 }
540
541 return result;
542 }
543
544 static VkResult
create_itoi_pipeline(struct radv_device * device,bool src_3d,bool dst_3d,int samples,VkPipeline * pipeline)545 create_itoi_pipeline(struct radv_device *device, bool src_3d, bool dst_3d, int samples, VkPipeline *pipeline)
546 {
547 struct radv_meta_state *state = &device->meta_state;
548 VkResult result;
549
550 result = create_itoi_layout(device);
551 if (result != VK_SUCCESS)
552 return result;
553
554 nir_shader *cs = build_nir_itoi_compute_shader(device, src_3d, dst_3d, samples);
555
556 result = radv_meta_create_compute_pipeline(device, cs, state->itoi.img_p_layout, pipeline);
557 ralloc_free(cs);
558 return result;
559 }
560
561 static VkResult
get_itoi_pipeline(struct radv_device * device,const struct radv_image * src_image,const struct radv_image * dst_image,int samples,VkPipeline * pipeline_out)562 get_itoi_pipeline(struct radv_device *device, const struct radv_image *src_image, const struct radv_image *dst_image,
563 int samples, VkPipeline *pipeline_out)
564 {
565 struct radv_meta_state *state = &device->meta_state;
566 const bool src_3d = src_image->vk.image_type == VK_IMAGE_TYPE_3D;
567 const bool dst_3d = dst_image->vk.image_type == VK_IMAGE_TYPE_3D;
568 const uint32_t samples_log2 = ffs(samples) - 1;
569 VkResult result = VK_SUCCESS;
570 VkPipeline *pipeline;
571
572 mtx_lock(&state->mtx);
573
574 if (src_3d && dst_3d)
575 pipeline = &device->meta_state.itoi.pipeline_3d_3d;
576 else if (src_3d)
577 pipeline = &device->meta_state.itoi.pipeline_3d_2d;
578 else if (dst_3d)
579 pipeline = &device->meta_state.itoi.pipeline_2d_3d;
580 else
581 pipeline = &state->itoi.pipeline[samples_log2];
582
583 if (!*pipeline) {
584 result = create_itoi_pipeline(device, src_3d, dst_3d, samples, pipeline);
585 if (result != VK_SUCCESS)
586 goto fail;
587 }
588
589 *pipeline_out = *pipeline;
590
591 fail:
592 mtx_unlock(&state->mtx);
593 return result;
594 }
595
596 /* image to image - don't write use image accessors */
597 static VkResult
radv_device_init_meta_itoi_state(struct radv_device * device)598 radv_device_init_meta_itoi_state(struct radv_device *device)
599 {
600 VkResult result;
601
602 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
603 uint32_t samples = 1 << i;
604 result = create_itoi_pipeline(device, false, false, samples, &device->meta_state.itoi.pipeline[i]);
605 if (result != VK_SUCCESS)
606 return result;
607 }
608
609 for (uint32_t src_3d = 0; src_3d < 2; src_3d++) {
610 for (uint32_t dst_3d = 0; dst_3d < 2; dst_3d++) {
611 VkPipeline *pipeline;
612 if (src_3d && dst_3d)
613 pipeline = &device->meta_state.itoi.pipeline_3d_3d;
614 else if (src_3d)
615 pipeline = &device->meta_state.itoi.pipeline_3d_2d;
616 else if (dst_3d)
617 pipeline = &device->meta_state.itoi.pipeline_2d_3d;
618 else
619 continue;
620
621 result = create_itoi_pipeline(device, src_3d, dst_3d, 1, pipeline);
622 if (result != VK_SUCCESS)
623 return result;
624 }
625 }
626
627 return result;
628 }
629
630 static void
radv_device_finish_meta_itoi_state(struct radv_device * device)631 radv_device_finish_meta_itoi_state(struct radv_device *device)
632 {
633 struct radv_meta_state *state = &device->meta_state;
634
635 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout, &state->alloc);
636 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout,
637 &state->alloc);
638
639 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
640 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc);
641 }
642
643 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_2d_3d, &state->alloc);
644 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d_2d, &state->alloc);
645 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d_3d, &state->alloc);
646 }
647
648 static nir_shader *
build_nir_itoi_r32g32b32_compute_shader(struct radv_device * dev)649 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
650 {
651 const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
652 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
653 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_r32g32b32_cs");
654 b.shader->info.workgroup_size[0] = 8;
655 b.shader->info.workgroup_size[1] = 8;
656 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img");
657 input_img->data.descriptor_set = 0;
658 input_img->data.binding = 0;
659
660 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "output_img");
661 output_img->data.descriptor_set = 0;
662 output_img->data.binding = 1;
663
664 nir_def *global_id = get_global_ids(&b, 2);
665
666 nir_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
667 nir_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
668
669 nir_def *src_stride = nir_channel(&b, src_offset, 2);
670 nir_def *dst_stride = nir_channel(&b, dst_offset, 2);
671
672 nir_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
673 nir_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
674
675 nir_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
676 nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3));
677
678 nir_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
679 nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3));
680
681 for (int chan = 0; chan < 3; chan++) {
682 /* src */
683 nir_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan);
684 nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, NULL);
685
686 /* dst */
687 nir_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan);
688
689 nir_def *dst_coord = nir_replicate(&b, dst_local_pos, 4);
690
691 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, nir_undef(&b, 1, 32),
692 nir_channel(&b, outval, 0), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
693 }
694
695 return b.shader;
696 }
697
698 /* Image to image - special path for R32G32B32 */
699 static VkResult
create_itoi_r32g32b32_layout(struct radv_device * device)700 create_itoi_r32g32b32_layout(struct radv_device *device)
701 {
702 VkResult result = VK_SUCCESS;
703
704 if (!device->meta_state.itoi_r32g32b32.img_ds_layout) {
705 const VkDescriptorSetLayoutBinding bindings[] = {
706 {
707 .binding = 0,
708 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
709 .descriptorCount = 1,
710 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
711 },
712 {
713 .binding = 1,
714 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
715 .descriptorCount = 1,
716 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
717 },
718 };
719
720 result =
721 radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.itoi_r32g32b32.img_ds_layout);
722 if (result != VK_SUCCESS)
723 return result;
724 }
725
726 if (!device->meta_state.itoi_r32g32b32.img_p_layout) {
727 const VkPushConstantRange pc_range = {
728 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
729 .size = 24,
730 };
731
732 result = radv_meta_create_pipeline_layout(device, &device->meta_state.itoi_r32g32b32.img_ds_layout, 1, &pc_range,
733 &device->meta_state.itoi_r32g32b32.img_p_layout);
734 }
735
736 return result;
737 }
738
739 static VkResult
create_itoi_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline)740 create_itoi_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline)
741 {
742 VkResult result;
743
744 result = create_itoi_r32g32b32_layout(device);
745 if (result != VK_SUCCESS)
746 return result;
747
748 nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
749
750 result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.itoi_r32g32b32.img_p_layout, pipeline);
751
752 ralloc_free(cs);
753 return result;
754 }
755
756 static VkResult
get_itoi_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline_out)757 get_itoi_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline_out)
758 {
759 struct radv_meta_state *state = &device->meta_state;
760 VkResult result = VK_SUCCESS;
761
762 mtx_lock(&state->mtx);
763 if (!state->itoi_r32g32b32.pipeline) {
764 result = create_itoi_r32g32b32_pipeline(device, &state->itoi_r32g32b32.pipeline);
765 if (result != VK_SUCCESS)
766 goto fail;
767 }
768
769 *pipeline_out = state->itoi_r32g32b32.pipeline;
770
771 fail:
772 mtx_unlock(&state->mtx);
773 return result;
774 }
775
776 static VkResult
radv_device_init_meta_itoi_r32g32b32_state(struct radv_device * device)777 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
778 {
779 return create_itoi_r32g32b32_pipeline(device, &device->meta_state.itoi_r32g32b32.pipeline);
780 }
781
782 static void
radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device * device)783 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
784 {
785 struct radv_meta_state *state = &device->meta_state;
786
787 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout, &state->alloc);
788 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
789 state->itoi_r32g32b32.img_ds_layout, &state->alloc);
790 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline, &state->alloc);
791 }
792
793 static nir_shader *
build_nir_cleari_compute_shader(struct radv_device * dev,bool is_3d,int samples)794 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples)
795 {
796 bool is_multisampled = samples > 1;
797 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
798 : is_multisampled ? GLSL_SAMPLER_DIM_MS
799 : GLSL_SAMPLER_DIM_2D;
800 const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
801 nir_builder b =
802 radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
803 b.shader->info.workgroup_size[0] = 8;
804 b.shader->info.workgroup_size[1] = 8;
805
806 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
807 output_img->data.descriptor_set = 0;
808 output_img->data.binding = 0;
809
810 nir_def *global_id = get_global_ids(&b, 2);
811
812 nir_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
813 nir_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
814
815 nir_def *comps[4];
816 comps[0] = nir_channel(&b, global_id, 0);
817 comps[1] = nir_channel(&b, global_id, 1);
818 comps[2] = layer;
819 comps[3] = nir_undef(&b, 1, 32);
820 global_id = nir_vec(&b, comps, 4);
821
822 for (uint32_t i = 0; i < samples; i++) {
823 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_imm_int(&b, i), clear_val,
824 nir_imm_int(&b, 0), .image_dim = dim);
825 }
826
827 return b.shader;
828 }
829
830 static VkResult
create_cleari_layout(struct radv_device * device)831 create_cleari_layout(struct radv_device *device)
832 {
833 VkResult result = VK_SUCCESS;
834
835 if (!device->meta_state.cleari.img_ds_layout) {
836 const VkDescriptorSetLayoutBinding binding = {
837 .binding = 0,
838 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
839 .descriptorCount = 1,
840 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
841 };
842
843 result = radv_meta_create_descriptor_set_layout(device, 1, &binding, &device->meta_state.cleari.img_ds_layout);
844 if (result != VK_SUCCESS)
845 return result;
846 }
847
848 if (!device->meta_state.cleari.img_p_layout) {
849 const VkPushConstantRange pc_range = {
850 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
851 .size = 20,
852 };
853
854 result = radv_meta_create_pipeline_layout(device, &device->meta_state.cleari.img_ds_layout, 1, &pc_range,
855 &device->meta_state.cleari.img_p_layout);
856 }
857
858 return result;
859 }
860
861 static VkResult
create_cleari_pipeline(struct radv_device * device,bool is_3d,int samples,VkPipeline * pipeline)862 create_cleari_pipeline(struct radv_device *device, bool is_3d, int samples, VkPipeline *pipeline)
863 {
864 VkResult result;
865
866 result = create_cleari_layout(device);
867 if (result != VK_SUCCESS)
868 return result;
869
870 nir_shader *cs = build_nir_cleari_compute_shader(device, is_3d, samples);
871
872 result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.cleari.img_p_layout, pipeline);
873 ralloc_free(cs);
874 return result;
875 }
876
877 static VkResult
get_cleari_pipeline(struct radv_device * device,const struct radv_image * image,VkPipeline * pipeline_out)878 get_cleari_pipeline(struct radv_device *device, const struct radv_image *image, VkPipeline *pipeline_out)
879 {
880 struct radv_meta_state *state = &device->meta_state;
881 const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D;
882 const uint32_t samples = image->vk.samples;
883 const uint32_t samples_log2 = ffs(samples) - 1;
884 VkResult result = VK_SUCCESS;
885 VkPipeline *pipeline;
886
887 mtx_lock(&state->mtx);
888
889 if (is_3d) {
890 pipeline = &state->cleari.pipeline_3d;
891 } else {
892 pipeline = &state->cleari.pipeline[samples_log2];
893 }
894
895 if (!*pipeline) {
896 result = create_cleari_pipeline(device, is_3d, samples, pipeline);
897 if (result != VK_SUCCESS)
898 goto fail;
899 }
900
901 *pipeline_out = *pipeline;
902
903 fail:
904 mtx_unlock(&state->mtx);
905 return result;
906 }
907
908 static VkResult
radv_device_init_meta_cleari_state(struct radv_device * device)909 radv_device_init_meta_cleari_state(struct radv_device *device)
910 {
911 VkResult result;
912
913 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
914 uint32_t samples = 1 << i;
915 result = create_cleari_pipeline(device, false, samples, &device->meta_state.cleari.pipeline[i]);
916 if (result != VK_SUCCESS)
917 return result;
918 }
919
920 return create_cleari_pipeline(device, true, 1, &device->meta_state.cleari.pipeline_3d);
921 }
922
923 static void
radv_device_finish_meta_cleari_state(struct radv_device * device)924 radv_device_finish_meta_cleari_state(struct radv_device *device)
925 {
926 struct radv_meta_state *state = &device->meta_state;
927
928 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout, &state->alloc);
929 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout,
930 &state->alloc);
931
932 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
933 radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc);
934 }
935
936 radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc);
937 }
938
939 /* Special path for clearing R32G32B32 images using a compute shader. */
940 static nir_shader *
build_nir_cleari_r32g32b32_compute_shader(struct radv_device * dev)941 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
942 {
943 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
944 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_cleari_r32g32b32_cs");
945 b.shader->info.workgroup_size[0] = 8;
946 b.shader->info.workgroup_size[1] = 8;
947
948 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
949 output_img->data.descriptor_set = 0;
950 output_img->data.binding = 0;
951
952 nir_def *global_id = get_global_ids(&b, 2);
953
954 nir_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
955 nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
956
957 nir_def *global_x = nir_channel(&b, global_id, 0);
958 nir_def *global_y = nir_channel(&b, global_id, 1);
959
960 nir_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
961
962 for (unsigned chan = 0; chan < 3; chan++) {
963 nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
964
965 nir_def *coord = nir_replicate(&b, local_pos, 4);
966
967 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32),
968 nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
969 }
970
971 return b.shader;
972 }
973
974 static VkResult
create_cleari_r32g32b32_layout(struct radv_device * device)975 create_cleari_r32g32b32_layout(struct radv_device *device)
976 {
977 VkResult result = VK_SUCCESS;
978
979 if (!device->meta_state.cleari_r32g32b32.img_ds_layout) {
980 const VkDescriptorSetLayoutBinding binding = {
981 .binding = 0,
982 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
983 .descriptorCount = 1,
984 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
985 };
986
987 result = radv_meta_create_descriptor_set_layout(device, 1, &binding,
988 &device->meta_state.cleari_r32g32b32.img_ds_layout);
989 if (result != VK_SUCCESS)
990 return result;
991 }
992
993 if (!device->meta_state.cleari_r32g32b32.img_p_layout) {
994 const VkPushConstantRange pc_range = {
995 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
996 .size = 16,
997 };
998
999 result = radv_meta_create_pipeline_layout(device, &device->meta_state.cleari_r32g32b32.img_ds_layout, 1,
1000 &pc_range, &device->meta_state.cleari_r32g32b32.img_p_layout);
1001 }
1002
1003 return result;
1004 }
1005
1006 static VkResult
create_cleari_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline)1007 create_cleari_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline)
1008 {
1009 VkResult result;
1010
1011 result = create_cleari_r32g32b32_layout(device);
1012 if (result != VK_SUCCESS)
1013 return result;
1014
1015 nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
1016
1017 result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.cleari_r32g32b32.img_p_layout, pipeline);
1018
1019 ralloc_free(cs);
1020 return result;
1021 }
1022
1023 static VkResult
get_cleari_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline_out)1024 get_cleari_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline_out)
1025 {
1026 struct radv_meta_state *state = &device->meta_state;
1027 VkResult result = VK_SUCCESS;
1028
1029 mtx_lock(&state->mtx);
1030
1031 if (!state->cleari_r32g32b32.pipeline) {
1032 result = create_cleari_r32g32b32_pipeline(device, &state->cleari_r32g32b32.pipeline);
1033 if (result != VK_SUCCESS)
1034 goto fail;
1035 }
1036
1037 *pipeline_out = state->cleari_r32g32b32.pipeline;
1038
1039 fail:
1040 mtx_unlock(&state->mtx);
1041 return result;
1042 }
1043
1044 static VkResult
radv_device_init_meta_cleari_r32g32b32_state(struct radv_device * device)1045 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1046 {
1047 return create_cleari_r32g32b32_pipeline(device, &device->meta_state.cleari_r32g32b32.pipeline);
1048 }
1049
1050 static void
radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device * device)1051 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1052 {
1053 struct radv_meta_state *state = &device->meta_state;
1054
1055 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout, &state->alloc);
1056 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
1057 state->cleari_r32g32b32.img_ds_layout, &state->alloc);
1058 radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline, &state->alloc);
1059 }
1060
1061 void
radv_device_finish_meta_bufimage_state(struct radv_device * device)1062 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1063 {
1064 radv_device_finish_meta_itob_state(device);
1065 radv_device_finish_meta_btoi_state(device);
1066 radv_device_finish_meta_btoi_r32g32b32_state(device);
1067 radv_device_finish_meta_itoi_state(device);
1068 radv_device_finish_meta_itoi_r32g32b32_state(device);
1069 radv_device_finish_meta_cleari_state(device);
1070 radv_device_finish_meta_cleari_r32g32b32_state(device);
1071 }
1072
1073 VkResult
radv_device_init_meta_bufimage_state(struct radv_device * device,bool on_demand)1074 radv_device_init_meta_bufimage_state(struct radv_device *device, bool on_demand)
1075 {
1076 VkResult result;
1077
1078 if (on_demand)
1079 return VK_SUCCESS;
1080
1081 result = radv_device_init_meta_itob_state(device);
1082 if (result != VK_SUCCESS)
1083 return result;
1084
1085 result = radv_device_init_meta_btoi_state(device);
1086 if (result != VK_SUCCESS)
1087 return result;
1088
1089 result = radv_device_init_meta_btoi_r32g32b32_state(device);
1090 if (result != VK_SUCCESS)
1091 return result;
1092
1093 result = radv_device_init_meta_itoi_state(device);
1094 if (result != VK_SUCCESS)
1095 return result;
1096
1097 result = radv_device_init_meta_itoi_r32g32b32_state(device);
1098 if (result != VK_SUCCESS)
1099 return result;
1100
1101 result = radv_device_init_meta_cleari_state(device);
1102 if (result != VK_SUCCESS)
1103 return result;
1104
1105 result = radv_device_init_meta_cleari_r32g32b32_state(device);
1106 if (result != VK_SUCCESS)
1107 return result;
1108
1109 return VK_SUCCESS;
1110 }
1111
1112 static void
create_iview(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,struct radv_image_view * iview,VkFormat format,VkImageAspectFlagBits aspects)1113 create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, struct radv_image_view *iview,
1114 VkFormat format, VkImageAspectFlagBits aspects)
1115 {
1116 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1117
1118 if (format == VK_FORMAT_UNDEFINED)
1119 format = surf->format;
1120
1121 radv_image_view_init(iview, device,
1122 &(VkImageViewCreateInfo){
1123 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1124 .image = radv_image_to_handle(surf->image),
1125 .viewType = radv_meta_get_view_type(surf->image),
1126 .format = format,
1127 .subresourceRange = {.aspectMask = aspects,
1128 .baseMipLevel = surf->level,
1129 .levelCount = 1,
1130 .baseArrayLayer = surf->layer,
1131 .layerCount = 1},
1132 },
1133 0,
1134 &(struct radv_image_view_extra_create_info){
1135 .disable_compression = surf->disable_compression,
1136 });
1137 }
1138
1139 static void
create_bview(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat format,struct radv_buffer_view * bview)1140 create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset, VkFormat format,
1141 struct radv_buffer_view *bview)
1142 {
1143 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1144
1145 radv_buffer_view_init(bview, device,
1146 &(VkBufferViewCreateInfo){
1147 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1148 .flags = 0,
1149 .buffer = radv_buffer_to_handle(buffer),
1150 .format = format,
1151 .offset = offset,
1152 .range = VK_WHOLE_SIZE,
1153 });
1154 }
1155
1156 static void
create_buffer_from_image(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,VkBufferUsageFlagBits2KHR usage,VkBuffer * buffer)1157 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
1158 VkBufferUsageFlagBits2KHR usage, VkBuffer *buffer)
1159 {
1160 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1161 struct radv_device_memory mem;
1162
1163 radv_device_memory_init(&mem, device, surf->image->bindings[0].bo);
1164
1165 radv_create_buffer(device,
1166 &(VkBufferCreateInfo){
1167 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1168 .pNext =
1169 &(VkBufferUsageFlags2CreateInfoKHR){
1170 .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
1171 .usage = usage,
1172 },
1173 .flags = 0,
1174 .size = surf->image->size,
1175 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1176 },
1177 NULL, buffer, true);
1178
1179 radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1180 (VkBindBufferMemoryInfo[]){{
1181 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1182 .buffer = *buffer,
1183 .memory = radv_device_memory_to_handle(&mem),
1184 .memoryOffset = surf->image->bindings[0].offset,
1185 }});
1186
1187 radv_device_memory_finish(&mem);
1188 }
1189
1190 static void
create_bview_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat src_format,struct radv_buffer_view * bview)1191 create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
1192 VkFormat src_format, struct radv_buffer_view *bview)
1193 {
1194 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1195 VkFormat format;
1196
1197 switch (src_format) {
1198 case VK_FORMAT_R32G32B32_UINT:
1199 format = VK_FORMAT_R32_UINT;
1200 break;
1201 case VK_FORMAT_R32G32B32_SINT:
1202 format = VK_FORMAT_R32_SINT;
1203 break;
1204 case VK_FORMAT_R32G32B32_SFLOAT:
1205 format = VK_FORMAT_R32_SFLOAT;
1206 break;
1207 default:
1208 unreachable("invalid R32G32B32 format");
1209 }
1210
1211 radv_buffer_view_init(bview, device,
1212 &(VkBufferViewCreateInfo){
1213 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1214 .flags = 0,
1215 .buffer = radv_buffer_to_handle(buffer),
1216 .format = format,
1217 .offset = offset,
1218 .range = VK_WHOLE_SIZE,
1219 });
1220 }
1221
1222 /* GFX9+ has an issue where the HW does not calculate mipmap degradations
1223 * for block-compressed images correctly (see the comment in
1224 * radv_image_view_init). Some texels are unaddressable and cannot be copied
1225 * to/from by a compute shader. Here we will perform a buffer copy to copy the
1226 * texels that the hardware missed.
1227 *
1228 * GFX10 will not use this workaround because it can be fixed by adjusting its
1229 * image view descriptors instead.
1230 */
1231 static void
fixup_gfx9_cs_copy(struct radv_cmd_buffer * cmd_buffer,const struct radv_meta_blit2d_buffer * buf_bsurf,const struct radv_meta_blit2d_surf * img_bsurf,const struct radv_meta_blit2d_rect * rect,bool to_image)1232 fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_blit2d_buffer *buf_bsurf,
1233 const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect,
1234 bool to_image)
1235 {
1236 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1237 const struct radv_physical_device *pdev = radv_device_physical(device);
1238 const unsigned mip_level = img_bsurf->level;
1239 const struct radv_image *image = img_bsurf->image;
1240 const struct radeon_surf *surf = &image->planes[0].surface;
1241 const struct radeon_info *gpu_info = &pdev->info;
1242 struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
1243 struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
1244
1245 /* GFX10 will use a different workaround unless this is not a 2D image */
1246 if (gpu_info->gfx_level < GFX9 || (gpu_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) ||
1247 image->vk.mip_levels == 1 || !vk_format_is_block_compressed(image->vk.format))
1248 return;
1249
1250 /* The physical extent of the base mip */
1251 VkExtent2D hw_base_extent = {surf->u.gfx9.base_mip_width, surf->u.gfx9.base_mip_height};
1252
1253 /* The hardware-calculated extent of the selected mip
1254 * (naive divide-by-two integer math)
1255 */
1256 VkExtent2D hw_mip_extent = {u_minify(hw_base_extent.width, mip_level), u_minify(hw_base_extent.height, mip_level)};
1257
1258 /* The actual extent we want to copy */
1259 VkExtent2D mip_extent = {rect->width, rect->height};
1260
1261 VkOffset2D mip_offset = {to_image ? rect->dst_x : rect->src_x, to_image ? rect->dst_y : rect->src_y};
1262
1263 if (hw_mip_extent.width >= mip_offset.x + mip_extent.width &&
1264 hw_mip_extent.height >= mip_offset.y + mip_extent.height)
1265 return;
1266
1267 if (!to_image) {
1268 /* If we are writing to a buffer, then we need to wait for the compute
1269 * shader to finish because it may write over the unaddressable texels
1270 * while we're fixing them. If we're writing to an image, we do not need
1271 * to wait because the compute shader cannot write to those texels
1272 */
1273 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
1274 }
1275
1276 for (uint32_t y = 0; y < mip_extent.height; y++) {
1277 uint32_t coordY = y + mip_offset.y;
1278 /* If the default copy algorithm (done previously) has already seen this
1279 * scanline, then we can bias the starting X coordinate over to skip the
1280 * region already copied by the default copy.
1281 */
1282 uint32_t x = (coordY < hw_mip_extent.height) ? hw_mip_extent.width : 0;
1283 for (; x < mip_extent.width; x++) {
1284 uint32_t coordX = x + mip_offset.x;
1285 uint64_t addr = ac_surface_addr_from_coord(addrlib, gpu_info, surf, &surf_info, mip_level, coordX, coordY,
1286 img_bsurf->layer, image->vk.image_type == VK_IMAGE_TYPE_3D);
1287 struct radeon_winsys_bo *img_bo = image->bindings[0].bo;
1288 struct radeon_winsys_bo *mem_bo = buf_bsurf->buffer->bo;
1289 const uint64_t img_offset = image->bindings[0].offset + addr;
1290 /* buf_bsurf->offset already includes the layer offset */
1291 const uint64_t mem_offset =
1292 buf_bsurf->buffer->offset + buf_bsurf->offset + y * buf_bsurf->pitch * surf->bpe + x * surf->bpe;
1293 if (to_image) {
1294 radv_copy_buffer(cmd_buffer, mem_bo, img_bo, mem_offset, img_offset, surf->bpe);
1295 } else {
1296 radv_copy_buffer(cmd_buffer, img_bo, mem_bo, img_offset, mem_offset, surf->bpe);
1297 }
1298 }
1299 }
1300 }
1301
1302 static unsigned
get_image_stride_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf)1303 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf)
1304 {
1305 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1306 const struct radv_physical_device *pdev = radv_device_physical(device);
1307 unsigned stride;
1308
1309 if (pdev->info.gfx_level >= GFX9) {
1310 stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1311 } else {
1312 stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1313 }
1314
1315 return stride;
1316 }
1317
1318 static void
itob_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_buffer_view * dst)1319 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_buffer_view *dst)
1320 {
1321 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1322
1323 radv_meta_push_descriptor_set(
1324 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, 2,
1325 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1326 .dstBinding = 0,
1327 .dstArrayElement = 0,
1328 .descriptorCount = 1,
1329 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1330 .pImageInfo =
1331 (VkDescriptorImageInfo[]){
1332 {
1333 .sampler = VK_NULL_HANDLE,
1334 .imageView = radv_image_view_to_handle(src),
1335 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1336 },
1337 }},
1338 {
1339 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1340 .dstBinding = 1,
1341 .dstArrayElement = 0,
1342 .descriptorCount = 1,
1343 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1344 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1345 }});
1346 }
1347
1348 void
radv_meta_image_to_buffer(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_buffer * dst,struct radv_meta_blit2d_rect * rect)1349 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1350 struct radv_meta_blit2d_buffer *dst, struct radv_meta_blit2d_rect *rect)
1351 {
1352 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1353 struct radv_image_view src_view;
1354 struct radv_buffer_view dst_view;
1355 VkPipeline pipeline;
1356 VkResult result;
1357
1358 result = get_itob_pipeline(device, src->image, &pipeline);
1359 if (result != VK_SUCCESS) {
1360 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1361 return;
1362 }
1363
1364 create_iview(cmd_buffer, src, &src_view, VK_FORMAT_UNDEFINED, src->aspect_mask);
1365 create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1366 itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1367
1368 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1369
1370 unsigned push_constants[4] = {rect->src_x, rect->src_y, src->layer, dst->pitch};
1371 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itob.img_p_layout,
1372 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1373
1374 radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1375 fixup_gfx9_cs_copy(cmd_buffer, dst, src, rect, false);
1376
1377 radv_image_view_finish(&src_view);
1378 radv_buffer_view_finish(&dst_view);
1379 }
1380
1381 static void
btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1382 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1383 struct radv_buffer_view *dst)
1384 {
1385 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1386
1387 radv_meta_push_descriptor_set(
1388 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout, 0, 2,
1389 (VkWriteDescriptorSet[]){{
1390 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1391 .dstBinding = 0,
1392 .dstArrayElement = 0,
1393 .descriptorCount = 1,
1394 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1395 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1396 },
1397 {
1398 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1399 .dstBinding = 1,
1400 .dstArrayElement = 0,
1401 .descriptorCount = 1,
1402 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1403 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1404 }});
1405 }
1406
1407 static void
radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,struct radv_meta_blit2d_rect * rect)1408 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
1409 struct radv_meta_blit2d_surf *dst, struct radv_meta_blit2d_rect *rect)
1410 {
1411 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1412 struct radv_buffer_view src_view, dst_view;
1413 unsigned dst_offset = 0;
1414 VkPipeline pipeline;
1415 unsigned stride;
1416 VkBuffer buffer;
1417 VkResult result;
1418
1419 result = get_btoi_r32g32b32_pipeline(device, &pipeline);
1420 if (result != VK_SUCCESS) {
1421 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1422 return;
1423 }
1424
1425 /* This special btoi path for R32G32B32 formats will write the linear
1426 * image as a buffer with the same underlying memory. The compute
1427 * shader will copy all components separately using a R32 format.
1428 */
1429 create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR, &buffer);
1430
1431 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1432 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format, &dst_view);
1433 btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1434
1435 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1436
1437 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1438
1439 unsigned push_constants[4] = {
1440 rect->dst_x,
1441 rect->dst_y,
1442 stride,
1443 src->pitch,
1444 };
1445
1446 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi_r32g32b32.img_p_layout,
1447 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1448
1449 radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1450
1451 radv_buffer_view_finish(&src_view);
1452 radv_buffer_view_finish(&dst_view);
1453 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1454 }
1455
1456 static void
btoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_image_view * dst)1457 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, struct radv_image_view *dst)
1458 {
1459 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1460
1461 radv_meta_push_descriptor_set(
1462 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, 2,
1463 (VkWriteDescriptorSet[]){{
1464 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1465 .dstBinding = 0,
1466 .dstArrayElement = 0,
1467 .descriptorCount = 1,
1468 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1469 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1470 },
1471 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1472 .dstBinding = 1,
1473 .dstArrayElement = 0,
1474 .descriptorCount = 1,
1475 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1476 .pImageInfo = (VkDescriptorImageInfo[]){
1477 {
1478 .sampler = VK_NULL_HANDLE,
1479 .imageView = radv_image_view_to_handle(dst),
1480 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1481 },
1482 }}});
1483 }
1484
1485 void
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,struct radv_meta_blit2d_rect * rect)1486 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
1487 struct radv_meta_blit2d_surf *dst, struct radv_meta_blit2d_rect *rect)
1488 {
1489 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1490 struct radv_buffer_view src_view;
1491 struct radv_image_view dst_view;
1492 VkPipeline pipeline;
1493 VkResult result;
1494
1495 if (dst->image->vk.format == VK_FORMAT_R32G32B32_UINT || dst->image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1496 dst->image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
1497 radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, rect);
1498 return;
1499 }
1500
1501 result = get_btoi_pipeline(device, dst->image, &pipeline);
1502 if (result != VK_SUCCESS) {
1503 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1504 return;
1505 }
1506
1507 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1508 create_iview(cmd_buffer, dst, &dst_view, VK_FORMAT_UNDEFINED, dst->aspect_mask);
1509 btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1510
1511 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1512
1513 unsigned push_constants[4] = {
1514 rect->dst_x,
1515 rect->dst_y,
1516 dst->layer,
1517 src->pitch,
1518 };
1519 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi.img_p_layout,
1520 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1521
1522 radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1523 fixup_gfx9_cs_copy(cmd_buffer, src, dst, rect, true);
1524
1525 radv_image_view_finish(&dst_view);
1526 radv_buffer_view_finish(&src_view);
1527 }
1528
1529 static void
itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1530 itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1531 struct radv_buffer_view *dst)
1532 {
1533 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1534
1535 radv_meta_push_descriptor_set(
1536 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout, 0, 2,
1537 (VkWriteDescriptorSet[]){{
1538 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1539 .dstBinding = 0,
1540 .dstArrayElement = 0,
1541 .descriptorCount = 1,
1542 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1543 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1544 },
1545 {
1546 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1547 .dstBinding = 1,
1548 .dstArrayElement = 0,
1549 .descriptorCount = 1,
1550 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1551 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1552 }});
1553 }
1554
1555 static void
radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,struct radv_meta_blit2d_rect * rect)1556 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1557 struct radv_meta_blit2d_surf *dst, struct radv_meta_blit2d_rect *rect)
1558 {
1559 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1560 struct radv_buffer_view src_view, dst_view;
1561 unsigned src_offset = 0, dst_offset = 0;
1562 unsigned src_stride, dst_stride;
1563 VkBuffer src_buffer, dst_buffer;
1564 VkPipeline pipeline;
1565 VkResult result;
1566
1567 result = get_itoi_r32g32b32_pipeline(device, &pipeline);
1568 if (result != VK_SUCCESS) {
1569 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1570 return;
1571 }
1572
1573 /* 96-bit formats are only compatible to themselves. */
1574 assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1575 dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1576
1577 /* This special itoi path for R32G32B32 formats will write the linear
1578 * image as a buffer with the same underlying memory. The compute
1579 * shader will copy all components separately using a R32 format.
1580 */
1581 create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_2_UNIFORM_TEXEL_BUFFER_BIT_KHR, &src_buffer);
1582 create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR, &dst_buffer);
1583
1584 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset, src->format, &src_view);
1585 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset, dst->format, &dst_view);
1586 itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1587
1588 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1589
1590 src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1591 dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1592
1593 unsigned push_constants[6] = {
1594 rect->src_x, rect->src_y, src_stride, rect->dst_x, rect->dst_y, dst_stride,
1595 };
1596 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi_r32g32b32.img_p_layout,
1597 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
1598
1599 radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1600
1601 radv_buffer_view_finish(&src_view);
1602 radv_buffer_view_finish(&dst_view);
1603 radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1604 radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
1605 }
1606
1607 static void
itoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_image_view * dst)1608 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_image_view *dst)
1609 {
1610 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1611
1612 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, 2,
1613 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1614 .dstBinding = 0,
1615 .dstArrayElement = 0,
1616 .descriptorCount = 1,
1617 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1618 .pImageInfo =
1619 (VkDescriptorImageInfo[]){
1620 {
1621 .sampler = VK_NULL_HANDLE,
1622 .imageView = radv_image_view_to_handle(src),
1623 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1624 },
1625 }},
1626 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1627 .dstBinding = 1,
1628 .dstArrayElement = 0,
1629 .descriptorCount = 1,
1630 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1631 .pImageInfo = (VkDescriptorImageInfo[]){
1632 {
1633 .sampler = VK_NULL_HANDLE,
1634 .imageView = radv_image_view_to_handle(dst),
1635 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1636 },
1637 }}});
1638 }
1639
1640 void
radv_meta_image_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,struct radv_meta_blit2d_rect * rect)1641 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1642 struct radv_meta_blit2d_surf *dst, struct radv_meta_blit2d_rect *rect)
1643 {
1644 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1645 struct radv_image_view src_view, dst_view;
1646 uint32_t samples = src->image->vk.samples;
1647 VkPipeline pipeline;
1648 VkResult result;
1649
1650 if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT ||
1651 src->format == VK_FORMAT_R32G32B32_SFLOAT) {
1652 radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, rect);
1653 return;
1654 }
1655
1656 result = get_itoi_pipeline(device, src->image, dst->image, samples, &pipeline);
1657 if (result != VK_SUCCESS) {
1658 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1659 return;
1660 }
1661
1662 u_foreach_bit (i, dst->aspect_mask) {
1663 unsigned dst_aspect_mask = 1u << i;
1664 unsigned src_aspect_mask = dst_aspect_mask;
1665 VkFormat depth_format = 0;
1666 if (dst_aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
1667 depth_format = vk_format_stencil_only(dst->image->vk.format);
1668 else if (dst_aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
1669 depth_format = vk_format_depth_only(dst->image->vk.format);
1670 else {
1671 /*
1672 * "Multi-planar images can only be copied on a per-plane basis, and the subresources used in each region when
1673 * copying to or from such images must specify only one plane, though different regions can specify different
1674 * planes."
1675 */
1676 assert((dst->aspect_mask & (dst->aspect_mask - 1)) == 0);
1677 assert((src->aspect_mask & (src->aspect_mask - 1)) == 0);
1678 src_aspect_mask = src->aspect_mask;
1679 }
1680
1681 create_iview(cmd_buffer, src, &src_view, depth_format, src_aspect_mask);
1682 create_iview(cmd_buffer, dst, &dst_view, depth_format, dst_aspect_mask);
1683
1684 itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1685
1686 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1687
1688 unsigned push_constants[6] = {
1689 rect->src_x, rect->src_y, src->layer, rect->dst_x, rect->dst_y, dst->layer,
1690 };
1691 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi.img_p_layout,
1692 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
1693
1694 radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1695
1696 radv_image_view_finish(&src_view);
1697 radv_image_view_finish(&dst_view);
1698 }
1699 }
1700
1701 static void
cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * view)1702 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view)
1703 {
1704 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1705
1706 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1707 device->meta_state.cleari_r32g32b32.img_p_layout, 0, 1,
1708 (VkWriteDescriptorSet[]){{
1709 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1710 .dstBinding = 0,
1711 .dstArrayElement = 0,
1712 .descriptorCount = 1,
1713 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1714 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
1715 }});
1716 }
1717
1718 static void
radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)1719 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
1720 const VkClearColorValue *clear_color)
1721 {
1722 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1723 struct radv_buffer_view dst_view;
1724 VkPipeline pipeline;
1725 unsigned stride;
1726 VkBuffer buffer;
1727 VkResult result;
1728
1729 result = get_cleari_r32g32b32_pipeline(device, &pipeline);
1730 if (result != VK_SUCCESS) {
1731 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1732 return;
1733 }
1734
1735 /* This special clear path for R32G32B32 formats will write the linear
1736 * image as a buffer with the same underlying memory. The compute
1737 * shader will clear all components separately using a R32 format.
1738 */
1739 create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR, &buffer);
1740
1741 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format, &dst_view);
1742 cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
1743
1744 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1745
1746 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1747
1748 unsigned push_constants[4] = {
1749 clear_color->uint32[0],
1750 clear_color->uint32[1],
1751 clear_color->uint32[2],
1752 stride,
1753 };
1754
1755 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari_r32g32b32.img_p_layout,
1756 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1757
1758 radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1);
1759
1760 radv_buffer_view_finish(&dst_view);
1761 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1762 }
1763
1764 static void
cleari_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * dst_iview)1765 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview)
1766 {
1767 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1768
1769 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari.img_p_layout, 0,
1770 1,
1771 (VkWriteDescriptorSet[]){
1772 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1773 .dstBinding = 0,
1774 .dstArrayElement = 0,
1775 .descriptorCount = 1,
1776 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1777 .pImageInfo =
1778 (VkDescriptorImageInfo[]){
1779 {
1780 .sampler = VK_NULL_HANDLE,
1781 .imageView = radv_image_view_to_handle(dst_iview),
1782 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1783 },
1784 }},
1785 });
1786 }
1787
1788 void
radv_meta_clear_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)1789 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
1790 const VkClearColorValue *clear_color)
1791 {
1792 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1793 struct radv_image_view dst_iview;
1794 VkPipeline pipeline;
1795 VkResult result;
1796
1797 if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1798 dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
1799 radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
1800 return;
1801 }
1802
1803 result = get_cleari_pipeline(device, dst->image, &pipeline);
1804 if (result != VK_SUCCESS) {
1805 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1806 return;
1807 }
1808
1809 create_iview(cmd_buffer, dst, &dst_iview, VK_FORMAT_UNDEFINED, dst->aspect_mask);
1810 cleari_bind_descriptors(cmd_buffer, &dst_iview);
1811
1812 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1813
1814 unsigned push_constants[5] = {
1815 clear_color->uint32[0], clear_color->uint32[1], clear_color->uint32[2], clear_color->uint32[3], dst->layer,
1816 };
1817
1818 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari.img_p_layout,
1819 VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, push_constants);
1820
1821 radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1);
1822
1823 radv_image_view_finish(&dst_iview);
1824 }
1825