1 /*
2 * Copyright 2024 Valve Corporation
3 * Copyright 2024 Alyssa Rosenzweig
4 * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5 * SPDX-License-Identifier: MIT
6 */
7 #include "util/format/u_format.h"
8 #include "util/format/u_formats.h"
9 #include "util/u_math.h"
10 #include "vulkan/vulkan_core.h"
11 #include "agx_pack.h"
12 #include "hk_buffer.h"
13 #include "hk_cmd_buffer.h"
14 #include "hk_device.h"
15 #include "hk_entrypoints.h"
16 #include "hk_image.h"
17 #include "hk_physical_device.h"
18
19 #include "layout.h"
20 #include "nir_builder.h"
21 #include "nir_builder_opcodes.h"
22 #include "nir_format_convert.h"
23 #include "shader_enums.h"
24 #include "vk_format.h"
25 #include "vk_meta.h"
26 #include "vk_pipeline.h"
27
28 /* For block based blit kernels, we hardcode the maximum tile size which we can
29 * always achieve. This simplifies our life.
30 */
31 #define TILE_WIDTH 32
32 #define TILE_HEIGHT 32
33
34 static VkResult
hk_cmd_bind_map_buffer(struct vk_command_buffer * vk_cmd,struct vk_meta_device * meta,VkBuffer _buffer,void ** map_out)35 hk_cmd_bind_map_buffer(struct vk_command_buffer *vk_cmd,
36 struct vk_meta_device *meta, VkBuffer _buffer,
37 void **map_out)
38 {
39 struct hk_cmd_buffer *cmd = container_of(vk_cmd, struct hk_cmd_buffer, vk);
40 VK_FROM_HANDLE(hk_buffer, buffer, _buffer);
41
42 assert(buffer->vk.size < UINT_MAX);
43 struct agx_ptr T = hk_pool_alloc(cmd, buffer->vk.size, 16);
44 if (unlikely(T.cpu == NULL))
45 return VK_ERROR_OUT_OF_POOL_MEMORY;
46
47 buffer->addr = T.gpu;
48 *map_out = T.cpu;
49 return VK_SUCCESS;
50 }
51
52 VkResult
hk_device_init_meta(struct hk_device * dev)53 hk_device_init_meta(struct hk_device *dev)
54 {
55 VkResult result = vk_meta_device_init(&dev->vk, &dev->meta);
56 if (result != VK_SUCCESS)
57 return result;
58
59 dev->meta.use_gs_for_layer = false;
60 dev->meta.use_stencil_export = true;
61 dev->meta.cmd_bind_map_buffer = hk_cmd_bind_map_buffer;
62 dev->meta.max_bind_map_buffer_size_B = 64 * 1024;
63
64 return VK_SUCCESS;
65 }
66
67 void
hk_device_finish_meta(struct hk_device * dev)68 hk_device_finish_meta(struct hk_device *dev)
69 {
70 vk_meta_device_finish(&dev->vk, &dev->meta);
71 }
72
73 struct hk_meta_save {
74 struct vk_vertex_input_state _dynamic_vi;
75 struct vk_sample_locations_state _dynamic_sl;
76 struct vk_dynamic_graphics_state dynamic;
77 struct hk_api_shader *shaders[MESA_SHADER_MESH + 1];
78 struct hk_addr_range vb0;
79 struct hk_descriptor_set *desc0;
80 bool has_push_desc0;
81 enum agx_visibility_mode occlusion;
82 struct hk_push_descriptor_set push_desc0;
83 VkQueryPipelineStatisticFlags pipeline_stats_flags;
84 uint8_t push[128];
85 };
86
87 static void
hk_meta_begin(struct hk_cmd_buffer * cmd,struct hk_meta_save * save,VkPipelineBindPoint bind_point)88 hk_meta_begin(struct hk_cmd_buffer *cmd, struct hk_meta_save *save,
89 VkPipelineBindPoint bind_point)
90 {
91 struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
92
93 if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
94 save->dynamic = cmd->vk.dynamic_graphics_state;
95 save->_dynamic_vi = cmd->state.gfx._dynamic_vi;
96 save->_dynamic_sl = cmd->state.gfx._dynamic_sl;
97
98 static_assert(sizeof(cmd->state.gfx.shaders) == sizeof(save->shaders));
99 memcpy(save->shaders, cmd->state.gfx.shaders, sizeof(save->shaders));
100
101 /* Pause queries */
102 save->occlusion = cmd->state.gfx.occlusion.mode;
103 cmd->state.gfx.occlusion.mode = AGX_VISIBILITY_MODE_NONE;
104 cmd->state.gfx.dirty |= HK_DIRTY_OCCLUSION;
105
106 save->pipeline_stats_flags = desc->root.draw.pipeline_stats_flags;
107 desc->root.draw.pipeline_stats_flags = 0;
108 desc->root_dirty = true;
109 } else {
110 save->shaders[MESA_SHADER_COMPUTE] = cmd->state.cs.shader;
111 }
112
113 save->vb0 = cmd->state.gfx.vb[0];
114
115 save->desc0 = desc->sets[0];
116 save->has_push_desc0 = desc->push[0];
117 if (save->has_push_desc0)
118 save->push_desc0 = *desc->push[0];
119
120 static_assert(sizeof(save->push) == sizeof(desc->root.push));
121 memcpy(save->push, desc->root.push, sizeof(save->push));
122
123 cmd->in_meta = true;
124 }
125
126 static void
hk_meta_init_render(struct hk_cmd_buffer * cmd,struct vk_meta_rendering_info * info)127 hk_meta_init_render(struct hk_cmd_buffer *cmd,
128 struct vk_meta_rendering_info *info)
129 {
130 const struct hk_rendering_state *render = &cmd->state.gfx.render;
131
132 *info = (struct vk_meta_rendering_info){
133 .samples = MAX2(render->tilebuffer.nr_samples, 1),
134 .view_mask = render->view_mask,
135 .color_attachment_count = render->color_att_count,
136 .depth_attachment_format = render->depth_att.vk_format,
137 .stencil_attachment_format = render->stencil_att.vk_format,
138 };
139 for (uint32_t a = 0; a < render->color_att_count; a++) {
140 info->color_attachment_formats[a] = render->color_att[a].vk_format;
141 info->color_attachment_write_masks[a] =
142 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
143 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
144 }
145 }
146
147 static void
hk_meta_end(struct hk_cmd_buffer * cmd,struct hk_meta_save * save,VkPipelineBindPoint bind_point)148 hk_meta_end(struct hk_cmd_buffer *cmd, struct hk_meta_save *save,
149 VkPipelineBindPoint bind_point)
150 {
151 struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
152 desc->root_dirty = true;
153
154 if (save->desc0) {
155 desc->sets[0] = save->desc0;
156 desc->root.sets[0] = hk_descriptor_set_addr(save->desc0);
157 desc->sets_dirty |= BITFIELD_BIT(0);
158 desc->push_dirty &= ~BITFIELD_BIT(0);
159 } else if (save->has_push_desc0) {
160 *desc->push[0] = save->push_desc0;
161 desc->push_dirty |= BITFIELD_BIT(0);
162 }
163
164 if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
165 /* Restore the dynamic state */
166 assert(save->dynamic.vi == &cmd->state.gfx._dynamic_vi);
167 assert(save->dynamic.ms.sample_locations == &cmd->state.gfx._dynamic_sl);
168 cmd->vk.dynamic_graphics_state = save->dynamic;
169 cmd->state.gfx._dynamic_vi = save->_dynamic_vi;
170 cmd->state.gfx._dynamic_sl = save->_dynamic_sl;
171 memcpy(cmd->vk.dynamic_graphics_state.dirty,
172 cmd->vk.dynamic_graphics_state.set,
173 sizeof(cmd->vk.dynamic_graphics_state.set));
174
175 for (uint32_t stage = 0; stage < ARRAY_SIZE(save->shaders); stage++) {
176 hk_cmd_bind_graphics_shader(cmd, stage, save->shaders[stage]);
177 }
178
179 hk_cmd_bind_vertex_buffer(cmd, 0, save->vb0);
180
181 /* Restore queries */
182 cmd->state.gfx.occlusion.mode = save->occlusion;
183 cmd->state.gfx.dirty |= HK_DIRTY_OCCLUSION;
184
185 desc->root.draw.pipeline_stats_flags = save->pipeline_stats_flags;
186 desc->root_dirty = true;
187 } else {
188 hk_cmd_bind_compute_shader(cmd, save->shaders[MESA_SHADER_COMPUTE]);
189 }
190
191 memcpy(desc->root.push, save->push, sizeof(save->push));
192 cmd->in_meta = false;
193 }
194
195 #define VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE (0xcafe0000)
196 #define VK_META_OBJECT_KEY_FILL_PIPELINE (0xcafe0001)
197
198 #define BINDING_OUTPUT 0
199 #define BINDING_INPUT 1
200
201 static VkFormat
aspect_format(VkFormat fmt,VkImageAspectFlags aspect)202 aspect_format(VkFormat fmt, VkImageAspectFlags aspect)
203 {
204 bool depth = (aspect & VK_IMAGE_ASPECT_DEPTH_BIT);
205 bool stencil = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
206
207 enum pipe_format p_format = vk_format_to_pipe_format(fmt);
208
209 if (util_format_is_depth_or_stencil(p_format)) {
210 assert(depth ^ stencil);
211 if (depth) {
212 switch (fmt) {
213 case VK_FORMAT_D32_SFLOAT:
214 case VK_FORMAT_D32_SFLOAT_S8_UINT:
215 return VK_FORMAT_D32_SFLOAT;
216 case VK_FORMAT_D16_UNORM:
217 case VK_FORMAT_D16_UNORM_S8_UINT:
218 return VK_FORMAT_D16_UNORM;
219 default:
220 unreachable("invalid depth");
221 }
222 } else {
223 switch (fmt) {
224 case VK_FORMAT_S8_UINT:
225 case VK_FORMAT_D32_SFLOAT_S8_UINT:
226 case VK_FORMAT_D16_UNORM_S8_UINT:
227 return VK_FORMAT_S8_UINT;
228 default:
229 unreachable("invalid stencil");
230 }
231 }
232 }
233
234 assert(!depth && !stencil);
235
236 const struct vk_format_ycbcr_info *ycbcr_info =
237 vk_format_get_ycbcr_info(fmt);
238
239 if (ycbcr_info) {
240 switch (aspect) {
241 case VK_IMAGE_ASPECT_PLANE_0_BIT:
242 return ycbcr_info->planes[0].format;
243 case VK_IMAGE_ASPECT_PLANE_1_BIT:
244 return ycbcr_info->planes[1].format;
245 case VK_IMAGE_ASPECT_PLANE_2_BIT:
246 return ycbcr_info->planes[2].format;
247 default:
248 unreachable("invalid ycbcr aspect");
249 }
250 }
251
252 return fmt;
253 }
254
255 /*
256 * Canonicalize formats to simplify the copies. The returned format must in the
257 * same compression class, and should roundtrip lossless (minifloat formats are
258 * the unfortunate exception).
259 */
260 static enum pipe_format
canonical_format_pipe(enum pipe_format fmt,bool canonicalize_zs)261 canonical_format_pipe(enum pipe_format fmt, bool canonicalize_zs)
262 {
263 if (!canonicalize_zs && util_format_is_depth_or_stencil(fmt))
264 return fmt;
265
266 assert(ail_is_valid_pixel_format(fmt));
267
268 if (util_format_is_compressed(fmt)) {
269 unsigned size_B = util_format_get_blocksize(fmt);
270 assert(size_B == 8 || size_B == 16);
271
272 return size_B == 16 ? PIPE_FORMAT_R32G32B32A32_UINT
273 : PIPE_FORMAT_R32G32_UINT;
274 }
275
276 #define CASE(x, y) [AGX_CHANNELS_##x] = PIPE_FORMAT_##y
277 /* clang-format off */
278 static enum pipe_format map[] = {
279 CASE(R8, R8_UINT),
280 CASE(R16, R16_UNORM /* XXX: Hack for Z16 copies */),
281 CASE(R8G8, R8G8_UINT),
282 CASE(R5G6B5, R5G6B5_UNORM),
283 CASE(R4G4B4A4, R4G4B4A4_UNORM),
284 CASE(A1R5G5B5, A1R5G5B5_UNORM),
285 CASE(R5G5B5A1, B5G5R5A1_UNORM),
286 CASE(R32, R32_UINT),
287 CASE(R16G16, R16G16_UINT),
288 CASE(R11G11B10, R11G11B10_FLOAT),
289 CASE(R10G10B10A2, R10G10B10A2_UNORM),
290 CASE(R9G9B9E5, R9G9B9E5_FLOAT),
291 CASE(R8G8B8A8, R8G8B8A8_UINT),
292 CASE(R32G32, R32G32_UINT),
293 CASE(R16G16B16A16, R16G16B16A16_UINT),
294 CASE(R32G32B32A32, R32G32B32A32_UINT),
295 };
296 /* clang-format on */
297 #undef CASE
298
299 enum agx_channels channels = ail_pixel_format[fmt].channels;
300 assert(channels < ARRAY_SIZE(map) && "all valid channels handled");
301 assert(map[channels] != PIPE_FORMAT_NONE && "all valid channels handled");
302 return map[channels];
303 }
304
305 static VkFormat
canonical_format(VkFormat fmt)306 canonical_format(VkFormat fmt)
307 {
308 return vk_format_from_pipe_format(
309 canonical_format_pipe(vk_format_to_pipe_format(fmt), false));
310 }
311
312 enum copy_type {
313 BUF2IMG,
314 IMG2BUF,
315 IMG2IMG,
316 };
317
318 struct vk_meta_push_data {
319 uint64_t buffer;
320
321 uint32_t row_extent;
322 uint32_t slice_or_layer_extent;
323
324 int32_t src_offset_el[4];
325 int32_t dst_offset_el[4];
326 uint32_t grid_el[3];
327 } PACKED;
328
329 #define get_push(b, name) \
330 nir_load_push_constant( \
331 b, 1, sizeof(((struct vk_meta_push_data *)0)->name) * 8, \
332 nir_imm_int(b, offsetof(struct vk_meta_push_data, name)))
333
334 struct vk_meta_image_copy_key {
335 enum vk_meta_object_key_type key_type;
336 enum copy_type type;
337 enum pipe_format src_format, dst_format;
338 unsigned block_size;
339 unsigned nr_samples;
340 bool block_based;
341 };
342
343 static nir_def *
linearize_coords(nir_builder * b,nir_def * coord,const struct vk_meta_image_copy_key * key)344 linearize_coords(nir_builder *b, nir_def *coord,
345 const struct vk_meta_image_copy_key *key)
346 {
347 assert(key->nr_samples == 1 && "buffer<-->image copies not multisampled");
348
349 nir_def *row_extent = get_push(b, row_extent);
350 nir_def *slice_or_layer_extent = get_push(b, slice_or_layer_extent);
351 nir_def *x = nir_channel(b, coord, 0);
352 nir_def *y = nir_channel(b, coord, 1);
353 nir_def *z_or_layer = nir_channel(b, coord, 2);
354
355 nir_def *v = nir_imul_imm(b, x, key->block_size);
356
357 v = nir_iadd(b, v, nir_imul(b, y, row_extent));
358 v = nir_iadd(b, v, nir_imul(b, z_or_layer, slice_or_layer_extent));
359
360 return nir_udiv_imm(b, v, key->block_size);
361 }
362
363 static bool
is_format_native(enum pipe_format format)364 is_format_native(enum pipe_format format)
365 {
366 switch (format) {
367 case PIPE_FORMAT_R8_UINT:
368 case PIPE_FORMAT_R8G8_UINT:
369 case PIPE_FORMAT_R32_UINT:
370 case PIPE_FORMAT_R32G32_UINT:
371 case PIPE_FORMAT_R16G16_UINT:
372 case PIPE_FORMAT_R16_UNORM:
373 /* TODO: debug me .. why do these fail */
374 return false;
375 case PIPE_FORMAT_R11G11B10_FLOAT:
376 case PIPE_FORMAT_R9G9B9E5_FLOAT:
377 case PIPE_FORMAT_R16G16B16A16_UINT:
378 case PIPE_FORMAT_R32G32B32A32_UINT:
379 case PIPE_FORMAT_R8G8B8A8_UINT:
380 case PIPE_FORMAT_R10G10B10A2_UNORM:
381 return true;
382 case PIPE_FORMAT_R5G6B5_UNORM:
383 case PIPE_FORMAT_R4G4B4A4_UNORM:
384 case PIPE_FORMAT_A1R5G5B5_UNORM:
385 case PIPE_FORMAT_B5G5R5A1_UNORM:
386 return false;
387 default:
388 unreachable("expected canonical");
389 }
390 }
391
392 static nir_def *
load_store_formatted(nir_builder * b,nir_def * base,nir_def * index,nir_def * value,enum pipe_format format)393 load_store_formatted(nir_builder *b, nir_def *base, nir_def *index,
394 nir_def *value, enum pipe_format format)
395 {
396 if (util_format_is_depth_or_stencil(format))
397 format = canonical_format_pipe(format, true);
398
399 if (is_format_native(format)) {
400 enum pipe_format isa = ail_pixel_format[format].renderable;
401 unsigned isa_size = util_format_get_blocksize(isa);
402 unsigned isa_components = util_format_get_blocksize(format) / isa_size;
403 unsigned shift = util_logbase2(isa_components);
404
405 if (value) {
406 nir_store_agx(b, value, base, index, .format = isa, .base = shift);
407 } else {
408 return nir_load_agx(b, 4, 32, base, index, .format = isa,
409 .base = shift);
410 }
411 } else {
412 unsigned blocksize_B = util_format_get_blocksize(format);
413 nir_def *addr =
414 nir_iadd(b, base, nir_imul_imm(b, nir_u2u64(b, index), blocksize_B));
415
416 if (value) {
417 nir_def *raw = nir_format_pack_rgba(b, format, value);
418
419 if (blocksize_B <= 4) {
420 assert(raw->num_components == 1);
421 raw = nir_u2uN(b, raw, blocksize_B * 8);
422 } else {
423 assert(raw->bit_size == 32);
424 raw = nir_trim_vector(b, raw, blocksize_B / 4);
425 }
426
427 nir_store_global(b, addr, blocksize_B, raw,
428 nir_component_mask(raw->num_components));
429 } else {
430 nir_def *raw =
431 nir_load_global(b, addr, blocksize_B, DIV_ROUND_UP(blocksize_B, 4),
432 MIN2(32, blocksize_B * 8));
433
434 return nir_format_unpack_rgba(b, raw, format);
435 }
436 }
437
438 return NULL;
439 }
440
441 static nir_shader *
build_image_copy_shader(const struct vk_meta_image_copy_key * key)442 build_image_copy_shader(const struct vk_meta_image_copy_key *key)
443 {
444 nir_builder build =
445 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "vk-meta-copy");
446
447 nir_builder *b = &build;
448 b->shader->info.workgroup_size[0] = TILE_WIDTH;
449 b->shader->info.workgroup_size[1] = TILE_HEIGHT;
450
451 bool src_is_buf = key->type == BUF2IMG;
452 bool dst_is_buf = key->type == IMG2BUF;
453
454 bool msaa = key->nr_samples > 1;
455 enum glsl_sampler_dim dim_2d =
456 msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
457 enum glsl_sampler_dim dim_src = src_is_buf ? GLSL_SAMPLER_DIM_BUF : dim_2d;
458 enum glsl_sampler_dim dim_dst = dst_is_buf ? GLSL_SAMPLER_DIM_BUF : dim_2d;
459
460 const struct glsl_type *texture_type =
461 glsl_sampler_type(dim_src, false, !src_is_buf, GLSL_TYPE_UINT);
462
463 const struct glsl_type *image_type =
464 glsl_image_type(dim_dst, !dst_is_buf, GLSL_TYPE_UINT);
465
466 nir_variable *texture =
467 nir_variable_create(b->shader, nir_var_uniform, texture_type, "source");
468 nir_variable *image =
469 nir_variable_create(b->shader, nir_var_image, image_type, "dest");
470
471 image->data.descriptor_set = 0;
472 image->data.binding = BINDING_OUTPUT;
473 image->data.access = ACCESS_NON_READABLE;
474
475 texture->data.descriptor_set = 0;
476 texture->data.binding = BINDING_INPUT;
477
478 /* Grab the offset vectors */
479 nir_def *src_offset_el = nir_load_push_constant(
480 b, 3, 32,
481 nir_imm_int(b, offsetof(struct vk_meta_push_data, src_offset_el)));
482
483 nir_def *dst_offset_el = nir_load_push_constant(
484 b, 3, 32,
485 nir_imm_int(b, offsetof(struct vk_meta_push_data, dst_offset_el)));
486
487 nir_def *grid_2d_el = nir_load_push_constant(
488 b, 2, 32, nir_imm_int(b, offsetof(struct vk_meta_push_data, grid_el)));
489
490 /* We're done setting up variables, do the copy */
491 nir_def *coord = nir_load_global_invocation_id(b, 32);
492
493 /* The destination format is already canonical, convert to an ISA format */
494 enum pipe_format isa_format;
495 if (key->block_based) {
496 isa_format =
497 ail_pixel_format[canonical_format_pipe(key->dst_format, true)]
498 .renderable;
499 assert(isa_format != PIPE_FORMAT_NONE);
500 }
501
502 nir_def *local_offset = nir_imm_intN_t(b, 0, 16);
503 nir_def *lid = nir_trim_vector(b, nir_load_local_invocation_id(b), 2);
504 lid = nir_u2u16(b, lid);
505
506 nir_def *src_coord = src_is_buf ? coord : nir_iadd(b, coord, src_offset_el);
507 nir_def *dst_coord = dst_is_buf ? coord : nir_iadd(b, coord, dst_offset_el);
508
509 nir_def *image_deref = &nir_build_deref_var(b, image)->def;
510
511 nir_def *coord_2d_el = nir_trim_vector(b, coord, 2);
512 nir_def *in_bounds;
513 if (key->block_based) {
514 nir_def *offset_in_block_el =
515 nir_umod_imm(b, nir_trim_vector(b, dst_offset_el, 2), TILE_WIDTH);
516
517 dst_coord =
518 nir_vector_insert_imm(b, nir_isub(b, dst_coord, offset_in_block_el),
519 nir_channel(b, dst_coord, 2), 2);
520
521 src_coord =
522 nir_vector_insert_imm(b, nir_isub(b, src_coord, offset_in_block_el),
523 nir_channel(b, src_coord, 2), 2);
524
525 in_bounds = nir_uge(b, coord_2d_el, offset_in_block_el);
526 in_bounds = nir_iand(
527 b, in_bounds,
528 nir_ult(b, coord_2d_el, nir_iadd(b, offset_in_block_el, grid_2d_el)));
529 } else {
530 in_bounds = nir_ult(b, coord_2d_el, grid_2d_el);
531 }
532
533 /* Special case handle buffer indexing */
534 if (dst_is_buf) {
535 assert(!key->block_based);
536 dst_coord = linearize_coords(b, dst_coord, key);
537 } else if (src_is_buf) {
538 src_coord = linearize_coords(b, src_coord, key);
539 }
540
541 for (unsigned s = 0; s < key->nr_samples; ++s) {
542 nir_def *ms_index = nir_imm_int(b, s);
543 nir_def *value1, *value2;
544
545 nir_push_if(b, nir_ball(b, in_bounds));
546 {
547 /* Copy formatted texel from texture to storage image */
548 nir_deref_instr *deref = nir_build_deref_var(b, texture);
549
550 if (src_is_buf) {
551 value1 = load_store_formatted(b, get_push(b, buffer), src_coord,
552 NULL, key->dst_format);
553 } else {
554 if (msaa) {
555 value1 = nir_txf_ms_deref(b, deref, src_coord, ms_index);
556 } else {
557 value1 = nir_txf_deref(b, deref, src_coord, NULL);
558 }
559
560 /* Munge according to the implicit conversions so we get a bit copy */
561 if (key->src_format != key->dst_format) {
562 nir_def *packed =
563 nir_format_pack_rgba(b, key->src_format, value1);
564
565 value1 = nir_format_unpack_rgba(b, packed, key->dst_format);
566 }
567 }
568
569 if (dst_is_buf) {
570 load_store_formatted(b, get_push(b, buffer), dst_coord, value1,
571 key->dst_format);
572 } else if (!key->block_based) {
573 nir_image_deref_store(b, image_deref, nir_pad_vec4(b, dst_coord),
574 ms_index, value1, nir_imm_int(b, 0),
575 .image_dim = dim_dst,
576 .image_array = !dst_is_buf);
577 }
578 }
579 nir_push_else(b, NULL);
580 if (key->block_based) {
581 /* Copy back the existing destination content */
582 value2 = nir_image_deref_load(b, 4, 32, image_deref,
583 nir_pad_vec4(b, dst_coord), ms_index,
584 nir_imm_int(b, 0), .image_dim = dim_dst,
585 .image_array = !dst_is_buf);
586 }
587 nir_pop_if(b, NULL);
588
589 if (key->block_based) {
590 nir_store_local_pixel_agx(b, nir_if_phi(b, value1, value2),
591 nir_imm_int(b, 1 << s), lid, .base = 0,
592 .write_mask = 0xf, .format = isa_format,
593 .explicit_coord = true);
594 }
595 }
596
597 if (key->block_based) {
598 assert(!dst_is_buf);
599
600 nir_barrier(b, .execution_scope = SCOPE_WORKGROUP);
601
602 nir_push_if(b, nir_ball(b, nir_ieq_imm(b, lid, 0)));
603 {
604 nir_image_deref_store_block_agx(
605 b, image_deref, local_offset, dst_coord, .format = isa_format,
606 .image_dim = dim_2d, .image_array = true, .explicit_coord = true);
607 }
608 nir_pop_if(b, NULL);
609 b->shader->info.cs.image_block_size_per_thread_agx =
610 util_format_get_blocksize(key->dst_format);
611 }
612
613 return b->shader;
614 }
615
616 static VkResult
get_image_copy_descriptor_set_layout(struct vk_device * device,struct vk_meta_device * meta,VkDescriptorSetLayout * layout_out,enum copy_type type)617 get_image_copy_descriptor_set_layout(struct vk_device *device,
618 struct vk_meta_device *meta,
619 VkDescriptorSetLayout *layout_out,
620 enum copy_type type)
621 {
622 const char *keys[] = {
623 [IMG2BUF] = "vk-meta-copy-image-to-buffer-descriptor-set-layout",
624 [BUF2IMG] = "vk-meta-copy-buffer-to-image-descriptor-set-layout",
625 [IMG2IMG] = "vk-meta-copy-image-to-image-descriptor-set-layout",
626 };
627
628 VkDescriptorSetLayout from_cache = vk_meta_lookup_descriptor_set_layout(
629 meta, keys[type], strlen(keys[type]));
630 if (from_cache != VK_NULL_HANDLE) {
631 *layout_out = from_cache;
632 return VK_SUCCESS;
633 }
634
635 const VkDescriptorSetLayoutBinding bindings[] = {
636 {
637 .binding = BINDING_OUTPUT,
638 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
639 .descriptorCount = 1,
640 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
641 },
642 {
643 .binding = BINDING_INPUT,
644 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
645 .descriptorCount = 1,
646 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
647 },
648 };
649
650 const VkDescriptorSetLayoutCreateInfo info = {
651 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
652 .bindingCount = ARRAY_SIZE(bindings),
653 .pBindings = bindings,
654 };
655
656 return vk_meta_create_descriptor_set_layout(device, meta, &info, keys[type],
657 strlen(keys[type]), layout_out);
658 }
659
660 static VkResult
get_image_copy_pipeline_layout(struct vk_device * device,struct vk_meta_device * meta,struct vk_meta_image_copy_key * key,VkDescriptorSetLayout set_layout,VkPipelineLayout * layout_out,enum copy_type type)661 get_image_copy_pipeline_layout(struct vk_device *device,
662 struct vk_meta_device *meta,
663 struct vk_meta_image_copy_key *key,
664 VkDescriptorSetLayout set_layout,
665 VkPipelineLayout *layout_out,
666 enum copy_type type)
667 {
668 const char *keys[] = {
669 [IMG2BUF] = "vk-meta-copy-image-to-buffer-pipeline-layout",
670 [BUF2IMG] = "vk-meta-copy-buffer-to-image-pipeline-layout",
671 [IMG2IMG] = "vk-meta-copy-image-to-image-pipeline-layout",
672 };
673
674 VkPipelineLayout from_cache =
675 vk_meta_lookup_pipeline_layout(meta, keys[type], strlen(keys[type]));
676 if (from_cache != VK_NULL_HANDLE) {
677 *layout_out = from_cache;
678 return VK_SUCCESS;
679 }
680
681 VkPipelineLayoutCreateInfo info = {
682 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
683 .setLayoutCount = 1,
684 .pSetLayouts = &set_layout,
685 };
686
687 const VkPushConstantRange push_range = {
688 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
689 .offset = 0,
690 .size = sizeof(struct vk_meta_push_data),
691 };
692
693 info.pushConstantRangeCount = 1;
694 info.pPushConstantRanges = &push_range;
695
696 return vk_meta_create_pipeline_layout(device, meta, &info, keys[type],
697 strlen(keys[type]), layout_out);
698 }
699
700 static VkResult
get_image_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_image_copy_key * key,VkPipelineLayout layout,VkPipeline * pipeline_out)701 get_image_copy_pipeline(struct vk_device *device, struct vk_meta_device *meta,
702 const struct vk_meta_image_copy_key *key,
703 VkPipelineLayout layout, VkPipeline *pipeline_out)
704 {
705 VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key, sizeof(*key));
706 if (from_cache != VK_NULL_HANDLE) {
707 *pipeline_out = from_cache;
708 return VK_SUCCESS;
709 }
710
711 const VkPipelineShaderStageNirCreateInfoMESA nir_info = {
712 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
713 .nir = build_image_copy_shader(key),
714 };
715 const VkPipelineShaderStageCreateInfo cs_info = {
716 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
717 .pNext = &nir_info,
718 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
719 .pName = "main",
720 };
721
722 const VkComputePipelineCreateInfo info = {
723 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
724 .stage = cs_info,
725 .layout = layout,
726 };
727
728 VkResult result = vk_meta_create_compute_pipeline(
729 device, meta, &info, key, sizeof(*key), pipeline_out);
730 ralloc_free(nir_info.nir);
731
732 return result;
733 }
734
735 static void
hk_meta_copy_image_to_buffer2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageToBufferInfo2 * pCopyBufferInfo)736 hk_meta_copy_image_to_buffer2(struct vk_command_buffer *cmd,
737 struct vk_meta_device *meta,
738 const VkCopyImageToBufferInfo2 *pCopyBufferInfo)
739 {
740 VK_FROM_HANDLE(vk_image, image, pCopyBufferInfo->srcImage);
741 VK_FROM_HANDLE(vk_image, src_image, pCopyBufferInfo->srcImage);
742 VK_FROM_HANDLE(hk_buffer, buffer, pCopyBufferInfo->dstBuffer);
743
744 struct vk_device *device = cmd->base.device;
745 const struct vk_device_dispatch_table *disp = &device->dispatch_table;
746
747 VkResult result;
748
749 VkDescriptorSetLayout set_layout;
750 result =
751 get_image_copy_descriptor_set_layout(device, meta, &set_layout, IMG2BUF);
752 if (unlikely(result != VK_SUCCESS)) {
753 vk_command_buffer_set_error(cmd, result);
754 return;
755 }
756
757 bool per_layer =
758 util_format_is_compressed(vk_format_to_pipe_format(image->format));
759
760 for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) {
761 const VkBufferImageCopy2 *region = &pCopyBufferInfo->pRegions[i];
762
763 unsigned layers = MAX2(region->imageExtent.depth,
764 vk_image_subresource_layer_count(
765 src_image, ®ion->imageSubresource));
766 unsigned layer_iters = per_layer ? layers : 1;
767
768 for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
769
770 VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
771 VkFormat aspect_fmt = aspect_format(image->format, aspect);
772 VkFormat canonical = canonical_format(aspect_fmt);
773
774 uint32_t blocksize_B =
775 util_format_get_blocksize(vk_format_to_pipe_format(canonical));
776
777 enum pipe_format p_format = vk_format_to_pipe_format(image->format);
778
779 unsigned row_extent = util_format_get_nblocksx(
780 p_format, MAX2(region->bufferRowLength,
781 region->imageExtent.width)) *
782 blocksize_B;
783 unsigned slice_extent =
784 util_format_get_nblocksy(
785 p_format,
786 MAX2(region->bufferImageHeight, region->imageExtent.height)) *
787 row_extent;
788 unsigned layer_extent =
789 util_format_get_nblocksz(p_format, region->imageExtent.depth) *
790 slice_extent;
791
792 bool is_3d = region->imageExtent.depth > 1;
793
794 struct vk_meta_image_copy_key key = {
795 .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
796 .type = IMG2BUF,
797 .block_size = blocksize_B,
798 .nr_samples = image->samples,
799 .src_format = vk_format_to_pipe_format(canonical),
800 .dst_format = vk_format_to_pipe_format(canonical),
801 };
802
803 VkPipelineLayout pipeline_layout;
804 result = get_image_copy_pipeline_layout(device, meta, &key, set_layout,
805 &pipeline_layout, false);
806 if (unlikely(result != VK_SUCCESS)) {
807 vk_command_buffer_set_error(cmd, result);
808 return;
809 }
810
811 VkImageView src_view;
812 const VkImageViewUsageCreateInfo src_view_usage = {
813 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
814 .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
815 };
816 const VkImageViewCreateInfo src_view_info = {
817 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
818 .flags = VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
819 .pNext = &src_view_usage,
820 .image = pCopyBufferInfo->srcImage,
821 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
822 .format = canonical,
823 .subresourceRange =
824 {
825 .aspectMask = region->imageSubresource.aspectMask,
826 .baseMipLevel = region->imageSubresource.mipLevel,
827 .baseArrayLayer =
828 MAX2(region->imageOffset.z,
829 region->imageSubresource.baseArrayLayer) +
830 layer_offs,
831 .layerCount = per_layer ? 1 : layers,
832 .levelCount = 1,
833 },
834 };
835
836 result =
837 vk_meta_create_image_view(cmd, meta, &src_view_info, &src_view);
838 if (unlikely(result != VK_SUCCESS)) {
839 vk_command_buffer_set_error(cmd, result);
840 return;
841 }
842
843 VkDescriptorImageInfo src_info = {
844 .imageLayout = pCopyBufferInfo->srcImageLayout,
845 .imageView = src_view,
846 };
847
848 VkWriteDescriptorSet desc_write = {
849 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
850 .dstSet = 0,
851 .dstBinding = BINDING_INPUT,
852 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
853 .descriptorCount = 1,
854 .pImageInfo = &src_info,
855 };
856
857 disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
858 VK_PIPELINE_BIND_POINT_COMPUTE,
859 pipeline_layout, 0, 1, &desc_write);
860
861 VkPipeline pipeline;
862 result = get_image_copy_pipeline(device, meta, &key, pipeline_layout,
863 &pipeline);
864 if (unlikely(result != VK_SUCCESS)) {
865 vk_command_buffer_set_error(cmd, result);
866 return;
867 }
868
869 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
870 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
871
872 enum pipe_format p_src_fmt =
873 vk_format_to_pipe_format(src_image->format);
874
875 struct vk_meta_push_data push = {
876 .buffer = hk_buffer_address(buffer, region->bufferOffset),
877 .row_extent = row_extent,
878 .slice_or_layer_extent = is_3d ? slice_extent : layer_extent,
879
880 .src_offset_el[0] =
881 util_format_get_nblocksx(p_src_fmt, region->imageOffset.x),
882 .src_offset_el[1] =
883 util_format_get_nblocksy(p_src_fmt, region->imageOffset.y),
884
885 .grid_el[0] =
886 util_format_get_nblocksx(p_format, region->imageExtent.width),
887 .grid_el[1] =
888 util_format_get_nblocksy(p_format, region->imageExtent.height),
889 .grid_el[2] = per_layer ? 1 : layers,
890 };
891
892 push.buffer += push.slice_or_layer_extent * layer_offs;
893
894 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd),
895 pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
896 sizeof(push), &push);
897
898 disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
899 DIV_ROUND_UP(push.grid_el[0], 32),
900 DIV_ROUND_UP(push.grid_el[1], 32), push.grid_el[2]);
901 }
902 }
903 }
904
905 static void
hk_meta_dispatch_to_image(struct vk_command_buffer * cmd,const struct vk_device_dispatch_table * disp,VkPipelineLayout pipeline_layout,struct vk_meta_push_data * push,VkOffset3D offset,VkExtent3D extent,bool per_layer,unsigned layers,enum pipe_format p_dst_fmt,enum pipe_format p_format)906 hk_meta_dispatch_to_image(struct vk_command_buffer *cmd,
907 const struct vk_device_dispatch_table *disp,
908 VkPipelineLayout pipeline_layout,
909 struct vk_meta_push_data *push, VkOffset3D offset,
910 VkExtent3D extent, bool per_layer, unsigned layers,
911 enum pipe_format p_dst_fmt, enum pipe_format p_format)
912 {
913 push->dst_offset_el[0] = util_format_get_nblocksx(p_dst_fmt, offset.x);
914 push->dst_offset_el[1] = util_format_get_nblocksy(p_dst_fmt, offset.y);
915 push->dst_offset_el[2] = 0;
916
917 push->grid_el[0] = util_format_get_nblocksx(p_format, extent.width);
918 push->grid_el[1] = util_format_get_nblocksy(p_format, extent.height);
919 push->grid_el[2] = per_layer ? 1 : layers;
920
921 unsigned w_el = util_format_get_nblocksx(p_format, extent.width);
922 unsigned h_el = util_format_get_nblocksy(p_format, extent.height);
923
924 /* Expand the grid so destinations are in tiles */
925 unsigned expanded_x0 = push->dst_offset_el[0] & ~(TILE_WIDTH - 1);
926 unsigned expanded_y0 = push->dst_offset_el[1] & ~(TILE_HEIGHT - 1);
927 unsigned expanded_x1 = align(push->dst_offset_el[0] + w_el, TILE_WIDTH);
928 unsigned expanded_y1 = align(push->dst_offset_el[1] + h_el, TILE_HEIGHT);
929
930 /* TODO: clamp to the destination size to save some redundant threads? */
931
932 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
933 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(*push), push);
934
935 disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
936 (expanded_x1 - expanded_x0) / TILE_WIDTH,
937 (expanded_y1 - expanded_y0) / TILE_HEIGHT,
938 push->grid_el[2]);
939 }
940
941 static void
hk_meta_copy_buffer_to_image2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct VkCopyBufferToImageInfo2 * info)942 hk_meta_copy_buffer_to_image2(struct vk_command_buffer *cmd,
943 struct vk_meta_device *meta,
944 const struct VkCopyBufferToImageInfo2 *info)
945 {
946 VK_FROM_HANDLE(vk_image, image, info->dstImage);
947 VK_FROM_HANDLE(hk_buffer, buffer, info->srcBuffer);
948
949 struct vk_device *device = cmd->base.device;
950 const struct vk_device_dispatch_table *disp = &device->dispatch_table;
951
952 VkDescriptorSetLayout set_layout;
953 VkResult result =
954 get_image_copy_descriptor_set_layout(device, meta, &set_layout, BUF2IMG);
955 if (unlikely(result != VK_SUCCESS)) {
956 vk_command_buffer_set_error(cmd, result);
957 return;
958 }
959
960 bool per_layer =
961 util_format_is_compressed(vk_format_to_pipe_format(image->format));
962
963 for (unsigned r = 0; r < info->regionCount; ++r) {
964 const VkBufferImageCopy2 *region = &info->pRegions[r];
965
966 unsigned layers = MAX2(
967 region->imageExtent.depth,
968 vk_image_subresource_layer_count(image, ®ion->imageSubresource));
969 unsigned layer_iters = per_layer ? layers : 1;
970
971 for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
972 VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
973 VkFormat aspect_fmt = aspect_format(image->format, aspect);
974 VkFormat canonical = canonical_format(aspect_fmt);
975 enum pipe_format p_format = vk_format_to_pipe_format(aspect_fmt);
976 uint32_t blocksize_B = util_format_get_blocksize(p_format);
977 bool is_3d = region->imageExtent.depth > 1;
978
979 struct vk_meta_image_copy_key key = {
980 .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
981 .type = BUF2IMG,
982 .block_size = blocksize_B,
983 .nr_samples = image->samples,
984 .src_format = vk_format_to_pipe_format(canonical),
985 .dst_format = canonical_format_pipe(
986 vk_format_to_pipe_format(aspect_format(image->format, aspect)),
987 false),
988
989 /* TODO: MSAA path */
990 .block_based =
991 (image->image_type != VK_IMAGE_TYPE_1D) && image->samples == 1,
992 };
993
994 VkPipelineLayout pipeline_layout;
995 result = get_image_copy_pipeline_layout(device, meta, &key, set_layout,
996 &pipeline_layout, true);
997 if (unlikely(result != VK_SUCCESS)) {
998 vk_command_buffer_set_error(cmd, result);
999 return;
1000 }
1001
1002 unsigned row_extent = util_format_get_nblocksx(
1003 p_format, MAX2(region->bufferRowLength,
1004 region->imageExtent.width)) *
1005 blocksize_B;
1006 unsigned slice_extent =
1007 util_format_get_nblocksy(
1008 p_format,
1009 MAX2(region->bufferImageHeight, region->imageExtent.height)) *
1010 row_extent;
1011 unsigned layer_extent =
1012 util_format_get_nblocksz(p_format, region->imageExtent.depth) *
1013 slice_extent;
1014
1015 VkImageView dst_view;
1016 const VkImageViewUsageCreateInfo dst_view_usage = {
1017 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1018 .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1019 };
1020 const VkImageViewCreateInfo dst_view_info = {
1021 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1022 .flags = VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
1023 .pNext = &dst_view_usage,
1024 .image = info->dstImage,
1025 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1026 .format = canonical,
1027 .subresourceRange =
1028 {
1029 .aspectMask = region->imageSubresource.aspectMask,
1030 .baseMipLevel = region->imageSubresource.mipLevel,
1031 .baseArrayLayer =
1032 MAX2(region->imageOffset.z,
1033 region->imageSubresource.baseArrayLayer) +
1034 layer_offs,
1035 .layerCount = per_layer ? 1 : layers,
1036 .levelCount = 1,
1037 },
1038 };
1039
1040 result =
1041 vk_meta_create_image_view(cmd, meta, &dst_view_info, &dst_view);
1042 if (unlikely(result != VK_SUCCESS)) {
1043 vk_command_buffer_set_error(cmd, result);
1044 return;
1045 }
1046
1047 const VkDescriptorImageInfo dst_info = {
1048 .imageView = dst_view,
1049 .imageLayout = info->dstImageLayout,
1050 };
1051
1052 VkWriteDescriptorSet desc_write = {
1053 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1054 .dstSet = 0,
1055 .dstBinding = BINDING_OUTPUT,
1056 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1057 .descriptorCount = 1,
1058 .pImageInfo = &dst_info,
1059 };
1060
1061 disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1062 VK_PIPELINE_BIND_POINT_COMPUTE,
1063 pipeline_layout, 0, 1, &desc_write);
1064
1065 VkPipeline pipeline;
1066 result = get_image_copy_pipeline(device, meta, &key, pipeline_layout,
1067 &pipeline);
1068 if (unlikely(result != VK_SUCCESS)) {
1069 vk_command_buffer_set_error(cmd, result);
1070 return;
1071 }
1072
1073 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1074 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1075
1076 struct vk_meta_push_data push = {
1077 .buffer = hk_buffer_address(buffer, region->bufferOffset),
1078 .row_extent = row_extent,
1079 .slice_or_layer_extent = is_3d ? slice_extent : layer_extent,
1080 };
1081
1082 push.buffer += push.slice_or_layer_extent * layer_offs;
1083
1084 hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
1085 region->imageOffset, region->imageExtent,
1086 per_layer, layers, p_format, p_format);
1087 }
1088 }
1089 }
1090
1091 static void
hk_meta_copy_image2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct VkCopyImageInfo2 * info)1092 hk_meta_copy_image2(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1093 const struct VkCopyImageInfo2 *info)
1094 {
1095 VK_FROM_HANDLE(vk_image, src_image, info->srcImage);
1096 VK_FROM_HANDLE(vk_image, dst_image, info->dstImage);
1097
1098 struct vk_device *device = cmd->base.device;
1099 const struct vk_device_dispatch_table *disp = &device->dispatch_table;
1100
1101 VkDescriptorSetLayout set_layout;
1102 VkResult result =
1103 get_image_copy_descriptor_set_layout(device, meta, &set_layout, BUF2IMG);
1104 if (unlikely(result != VK_SUCCESS)) {
1105 vk_command_buffer_set_error(cmd, result);
1106 return;
1107 }
1108
1109 bool per_layer =
1110 util_format_is_compressed(vk_format_to_pipe_format(src_image->format)) ||
1111 util_format_is_compressed(vk_format_to_pipe_format(dst_image->format));
1112
1113 for (unsigned r = 0; r < info->regionCount; ++r) {
1114 const VkImageCopy2 *region = &info->pRegions[r];
1115
1116 unsigned layers = MAX2(
1117 vk_image_subresource_layer_count(src_image, ®ion->srcSubresource),
1118 region->extent.depth);
1119 unsigned layer_iters = per_layer ? layers : 1;
1120
1121 for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
1122 u_foreach_bit(aspect, region->srcSubresource.aspectMask) {
1123 /* We use the source format throughout for consistent scaling with
1124 * compressed<-->uncompressed copies, where the extents are defined
1125 * to follow the source.
1126 */
1127 VkFormat aspect_fmt = aspect_format(src_image->format, 1 << aspect);
1128 VkFormat canonical = canonical_format(aspect_fmt);
1129 uint32_t blocksize_B =
1130 util_format_get_blocksize(vk_format_to_pipe_format(canonical));
1131
1132 VkImageAspectFlagBits dst_aspect_mask =
1133 vk_format_get_ycbcr_info(dst_image->format) ||
1134 vk_format_get_ycbcr_info(src_image->format)
1135 ? region->dstSubresource.aspectMask
1136 : (1 << aspect);
1137
1138 struct vk_meta_image_copy_key key = {
1139 .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
1140 .type = IMG2IMG,
1141 .block_size = blocksize_B,
1142 .nr_samples = dst_image->samples,
1143 .src_format = vk_format_to_pipe_format(canonical),
1144 .dst_format =
1145 canonical_format_pipe(vk_format_to_pipe_format(aspect_format(
1146 dst_image->format, dst_aspect_mask)),
1147 false),
1148
1149 /* TODO: MSAA path */
1150 .block_based = (dst_image->image_type != VK_IMAGE_TYPE_1D) &&
1151 dst_image->samples == 1,
1152 };
1153
1154 assert(key.nr_samples == src_image->samples);
1155
1156 VkPipelineLayout pipeline_layout;
1157 result = get_image_copy_pipeline_layout(
1158 device, meta, &key, set_layout, &pipeline_layout, true);
1159 if (unlikely(result != VK_SUCCESS)) {
1160 vk_command_buffer_set_error(cmd, result);
1161 return;
1162 }
1163
1164 VkWriteDescriptorSet desc_writes[2];
1165
1166 VkImageView src_view;
1167 const VkImageViewUsageCreateInfo src_view_usage = {
1168 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1169 .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
1170 };
1171 const VkImageViewCreateInfo src_view_info = {
1172 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1173 .flags = VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
1174 .pNext = &src_view_usage,
1175 .image = info->srcImage,
1176 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1177 .format = canonical,
1178 .subresourceRange =
1179 {
1180 .aspectMask =
1181 region->srcSubresource.aspectMask & (1 << aspect),
1182 .baseMipLevel = region->srcSubresource.mipLevel,
1183 .baseArrayLayer =
1184 MAX2(region->srcOffset.z,
1185 region->srcSubresource.baseArrayLayer) +
1186 layer_offs,
1187 .layerCount = per_layer ? 1 : layers,
1188 .levelCount = 1,
1189 },
1190 };
1191
1192 result =
1193 vk_meta_create_image_view(cmd, meta, &src_view_info, &src_view);
1194 if (unlikely(result != VK_SUCCESS)) {
1195 vk_command_buffer_set_error(cmd, result);
1196 return;
1197 }
1198
1199 VkDescriptorImageInfo src_info = {
1200 .imageLayout = info->srcImageLayout,
1201 .imageView = src_view,
1202 };
1203
1204 VkImageView dst_view;
1205 const VkImageViewUsageCreateInfo dst_view_usage = {
1206 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1207 .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1208 };
1209 const VkImageViewCreateInfo dst_view_info = {
1210 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1211 .flags = VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
1212 .pNext = &dst_view_usage,
1213 .image = info->dstImage,
1214 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1215 .format = vk_format_from_pipe_format(key.dst_format),
1216 .subresourceRange =
1217 {
1218 .aspectMask = dst_aspect_mask,
1219 .baseMipLevel = region->dstSubresource.mipLevel,
1220 .baseArrayLayer =
1221 MAX2(region->dstOffset.z,
1222 region->dstSubresource.baseArrayLayer) +
1223 layer_offs,
1224 .layerCount = per_layer ? 1 : layers,
1225 .levelCount = 1,
1226 },
1227 };
1228
1229 result =
1230 vk_meta_create_image_view(cmd, meta, &dst_view_info, &dst_view);
1231 if (unlikely(result != VK_SUCCESS)) {
1232 vk_command_buffer_set_error(cmd, result);
1233 return;
1234 }
1235
1236 const VkDescriptorImageInfo dst_info = {
1237 .imageView = dst_view,
1238 .imageLayout = info->dstImageLayout,
1239 };
1240
1241 desc_writes[0] = (VkWriteDescriptorSet){
1242 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1243 .dstSet = 0,
1244 .dstBinding = BINDING_OUTPUT,
1245 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1246 .descriptorCount = 1,
1247 .pImageInfo = &dst_info,
1248 };
1249
1250 desc_writes[1] = (VkWriteDescriptorSet){
1251 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1252 .dstSet = 0,
1253 .dstBinding = BINDING_INPUT,
1254 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1255 .descriptorCount = 1,
1256 .pImageInfo = &src_info,
1257 };
1258
1259 disp->CmdPushDescriptorSetKHR(
1260 vk_command_buffer_to_handle(cmd), VK_PIPELINE_BIND_POINT_COMPUTE,
1261 pipeline_layout, 0, ARRAY_SIZE(desc_writes), desc_writes);
1262
1263 VkPipeline pipeline;
1264 result = get_image_copy_pipeline(device, meta, &key,
1265 pipeline_layout, &pipeline);
1266 if (unlikely(result != VK_SUCCESS)) {
1267 vk_command_buffer_set_error(cmd, result);
1268 return;
1269 }
1270
1271 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1272 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1273
1274 enum pipe_format p_src_fmt =
1275 vk_format_to_pipe_format(src_image->format);
1276 enum pipe_format p_dst_fmt =
1277 vk_format_to_pipe_format(dst_image->format);
1278 enum pipe_format p_format = vk_format_to_pipe_format(aspect_fmt);
1279
1280 struct vk_meta_push_data push = {
1281 .src_offset_el[0] =
1282 util_format_get_nblocksx(p_src_fmt, region->srcOffset.x),
1283 .src_offset_el[1] =
1284 util_format_get_nblocksy(p_src_fmt, region->srcOffset.y),
1285 };
1286
1287 hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
1288 region->dstOffset, region->extent,
1289 per_layer, layers, p_dst_fmt, p_format);
1290 }
1291 }
1292 }
1293 }
1294
1295 struct vk_meta_image_to_buffer_push_data {
1296 uint32_t dest_offset_el;
1297 };
1298
1299 #define get_image_push(b, name) \
1300 nir_load_push_constant( \
1301 b, 1, sizeof(((struct vk_meta_image_to_buffer_push_data *)0)->name) * 8, \
1302 nir_imm_int(b, \
1303 offsetof(struct vk_meta_image_to_buffer_push_data, name)))
1304
1305 enum copy_source {
1306 COPY_SOURCE_PATTERN,
1307 COPY_SOURCE_BUFFER,
1308 };
1309
1310 struct vk_meta_buffer_copy_key {
1311 enum vk_meta_object_key_type key_type;
1312 enum copy_source source;
1313
1314 /* Power-of-two block size for the transfer, range [1, 16] */
1315 uint8_t blocksize;
1316 uint8_t pad[3];
1317 };
1318 static_assert(sizeof(struct vk_meta_buffer_copy_key) == 12, "packed");
1319
1320 /* XXX: TODO: move to common */
1321 /* Copyright © Microsoft Corporation */
1322 static nir_def *
dzn_nir_create_bo_desc(nir_builder * b,nir_variable_mode mode,uint32_t desc_set,uint32_t binding,const char * name,unsigned access,const struct glsl_type * dummy_type)1323 dzn_nir_create_bo_desc(nir_builder *b, nir_variable_mode mode,
1324 uint32_t desc_set, uint32_t binding, const char *name,
1325 unsigned access, const struct glsl_type *dummy_type)
1326 {
1327 nir_variable *var = nir_variable_create(b->shader, mode, dummy_type, name);
1328 var->data.descriptor_set = desc_set;
1329 var->data.binding = binding;
1330 var->data.access = access;
1331
1332 assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo);
1333 if (mode == nir_var_mem_ubo)
1334 b->shader->info.num_ubos++;
1335 else
1336 b->shader->info.num_ssbos++;
1337
1338 VkDescriptorType desc_type = var->data.mode == nir_var_mem_ubo
1339 ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
1340 : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1341 nir_address_format addr_format =
1342 nir_address_format_64bit_global_32bit_offset; /* XXX */
1343 nir_def *index = nir_vulkan_resource_index(
1344 b, nir_address_format_num_components(addr_format),
1345 nir_address_format_bit_size(addr_format), nir_imm_int(b, 0),
1346 .desc_set = desc_set, .binding = binding, .desc_type = desc_type);
1347
1348 nir_def *desc = nir_load_vulkan_descriptor(
1349 b, nir_address_format_num_components(addr_format),
1350 nir_address_format_bit_size(addr_format), index, .desc_type = desc_type);
1351
1352 return desc;
1353 }
1354
1355 static const struct glsl_type *
type_for_blocksize(uint8_t blocksize)1356 type_for_blocksize(uint8_t blocksize)
1357 {
1358 assert(util_is_power_of_two_nonzero(blocksize) && blocksize <= 16);
1359
1360 if (blocksize > 4)
1361 return glsl_vector_type(GLSL_TYPE_UINT, blocksize / 4);
1362 else
1363 return glsl_uintN_t_type(8 * blocksize);
1364 }
1365
1366 static nir_shader *
build_buffer_copy_shader(const struct vk_meta_buffer_copy_key * key)1367 build_buffer_copy_shader(const struct vk_meta_buffer_copy_key *key)
1368 {
1369 nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
1370 "vk-meta-copy-to-buffer");
1371 nir_builder *b = &build;
1372
1373 const struct glsl_type *type =
1374 glsl_array_type(type_for_blocksize(key->blocksize), 0, key->blocksize);
1375
1376 nir_def *index = nir_channel(b, nir_load_global_invocation_id(b, 32), 0);
1377 nir_def *value;
1378
1379 if (key->source == COPY_SOURCE_BUFFER) {
1380 nir_def *ubo =
1381 dzn_nir_create_bo_desc(b, nir_var_mem_ubo, 0, BINDING_INPUT, "source",
1382 ACCESS_NON_WRITEABLE, type);
1383 nir_deref_instr *ubo_deref =
1384 nir_build_deref_cast(b, ubo, nir_var_mem_ubo, type, key->blocksize);
1385
1386 nir_deref_instr *element_deref = nir_build_deref_array(
1387 b, ubo_deref, nir_u2uN(b, index, ubo_deref->def.bit_size));
1388
1389 value = nir_load_deref(b, element_deref);
1390 } else {
1391 nir_def *pattern = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0));
1392
1393 assert(key->blocksize >= 4 && "fills at least 32-bit");
1394 value = nir_replicate(b, pattern, key->blocksize / 4);
1395 }
1396
1397 /* Write out raw bytes to SSBO */
1398 nir_def *ssbo =
1399 dzn_nir_create_bo_desc(b, nir_var_mem_ssbo, 0, BINDING_OUTPUT,
1400 "destination", ACCESS_NON_READABLE, type);
1401
1402 nir_deref_instr *ssbo_deref =
1403 nir_build_deref_cast(b, ssbo, nir_var_mem_ssbo, type, key->blocksize);
1404
1405 nir_deref_instr *element_deref = nir_build_deref_array(
1406 b, ssbo_deref, nir_u2uN(b, index, ssbo_deref->def.bit_size));
1407
1408 nir_store_deref(b, element_deref, value,
1409 nir_component_mask(value->num_components));
1410
1411 return b->shader;
1412 }
1413
1414 static VkResult
get_buffer_copy_descriptor_set_layout(struct vk_device * device,struct vk_meta_device * meta,VkDescriptorSetLayout * layout_out,enum copy_source source)1415 get_buffer_copy_descriptor_set_layout(struct vk_device *device,
1416 struct vk_meta_device *meta,
1417 VkDescriptorSetLayout *layout_out,
1418 enum copy_source source)
1419 {
1420 const char buffer_key[] = "vk-meta-buffer-copy-descriptor-set-layout";
1421 const char fill_key[] = "vk-meta-fill__-copy-descriptor-set-layout";
1422
1423 static_assert(sizeof(buffer_key) == sizeof(fill_key));
1424 const char *key = source == COPY_SOURCE_BUFFER ? buffer_key : fill_key;
1425
1426 VkDescriptorSetLayout from_cache =
1427 vk_meta_lookup_descriptor_set_layout(meta, key, sizeof(buffer_key));
1428 if (from_cache != VK_NULL_HANDLE) {
1429 *layout_out = from_cache;
1430 return VK_SUCCESS;
1431 }
1432
1433 const VkDescriptorSetLayoutBinding bindings[] = {
1434 {
1435 .binding = BINDING_OUTPUT,
1436 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1437 .descriptorCount = 1,
1438 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1439 },
1440 {
1441 .binding = BINDING_INPUT,
1442 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1443 .descriptorCount = 1,
1444 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1445 },
1446 };
1447
1448 const VkDescriptorSetLayoutCreateInfo info = {
1449 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1450 .bindingCount = ARRAY_SIZE(bindings),
1451 .pBindings = bindings,
1452 };
1453
1454 return vk_meta_create_descriptor_set_layout(device, meta, &info, key,
1455 sizeof(key), layout_out);
1456 }
1457
1458 static VkResult
get_buffer_copy_pipeline_layout(struct vk_device * device,struct vk_meta_device * meta,struct vk_meta_buffer_copy_key * key,VkDescriptorSetLayout set_layout,VkPipelineLayout * layout_out)1459 get_buffer_copy_pipeline_layout(struct vk_device *device,
1460 struct vk_meta_device *meta,
1461 struct vk_meta_buffer_copy_key *key,
1462 VkDescriptorSetLayout set_layout,
1463 VkPipelineLayout *layout_out)
1464 {
1465 const char copy_key[] = "vk-meta-buffer-copy-pipeline-layout";
1466 const char fill_key[] = "vk-meta-buffer-fill-pipeline-layout";
1467 const char cimg_key[] = "vk-meta-buffer-cimg-pipeline-layout";
1468
1469 STATIC_ASSERT(sizeof(copy_key) == sizeof(fill_key));
1470 STATIC_ASSERT(sizeof(copy_key) == sizeof(cimg_key));
1471 const char *pipeline_key =
1472 key->source == COPY_SOURCE_BUFFER ? copy_key : fill_key;
1473
1474 VkPipelineLayout from_cache =
1475 vk_meta_lookup_pipeline_layout(meta, pipeline_key, sizeof(copy_key));
1476 if (from_cache != VK_NULL_HANDLE) {
1477 *layout_out = from_cache;
1478 return VK_SUCCESS;
1479 }
1480
1481 VkPipelineLayoutCreateInfo info = {
1482 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1483 .setLayoutCount = 1,
1484 .pSetLayouts = &set_layout,
1485 };
1486
1487 size_t push_size = 0;
1488 if (key->source == COPY_SOURCE_PATTERN)
1489 push_size = sizeof(uint32_t);
1490
1491 const VkPushConstantRange push_range = {
1492 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1493 .offset = 0,
1494 .size = push_size,
1495 };
1496
1497 if (push_size) {
1498 info.pushConstantRangeCount = 1;
1499 info.pPushConstantRanges = &push_range;
1500 }
1501
1502 return vk_meta_create_pipeline_layout(device, meta, &info, pipeline_key,
1503 sizeof(copy_key), layout_out);
1504 }
1505
1506 static VkResult
get_buffer_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_buffer_copy_key * key,VkPipelineLayout layout,VkPipeline * pipeline_out)1507 get_buffer_copy_pipeline(struct vk_device *device, struct vk_meta_device *meta,
1508 const struct vk_meta_buffer_copy_key *key,
1509 VkPipelineLayout layout, VkPipeline *pipeline_out)
1510 {
1511 VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key, sizeof(*key));
1512 if (from_cache != VK_NULL_HANDLE) {
1513 *pipeline_out = from_cache;
1514 return VK_SUCCESS;
1515 }
1516
1517 const VkPipelineShaderStageNirCreateInfoMESA nir_info = {
1518 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
1519 .nir = build_buffer_copy_shader(key),
1520 };
1521 const VkPipelineShaderStageCreateInfo cs_info = {
1522 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1523 .pNext = &nir_info,
1524 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1525 .pName = "main",
1526 };
1527
1528 const VkComputePipelineCreateInfo info = {
1529 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1530 .stage = cs_info,
1531 .layout = layout,
1532 };
1533
1534 VkResult result = vk_meta_create_compute_pipeline(
1535 device, meta, &info, key, sizeof(*key), pipeline_out);
1536 ralloc_free(nir_info.nir);
1537
1538 return result;
1539 }
1540
1541 static unsigned
alignment_of(unsigned x)1542 alignment_of(unsigned x)
1543 {
1544 return 1 << MIN2(__builtin_ctz(x), 31);
1545 }
1546
1547 struct copy_desc {
1548 enum copy_source source;
1549
1550 union {
1551 uint32_t pattern;
1552
1553 struct {
1554 struct vk_buffer *source;
1555 VkDeviceSize srcOffset;
1556 } buffer;
1557
1558 struct {
1559 struct vk_image *image;
1560 VkDescriptorImageInfo *info;
1561 VkFormat format;
1562 struct vk_meta_image_to_buffer_push_data push;
1563 } image;
1564 };
1565 };
1566
1567 static void
do_copy(struct vk_command_buffer * cmd,struct vk_meta_device * meta,size_t size,struct vk_buffer * dest,VkDeviceSize dstOffset,struct copy_desc * desc)1568 do_copy(struct vk_command_buffer *cmd, struct vk_meta_device *meta, size_t size,
1569 struct vk_buffer *dest, VkDeviceSize dstOffset, struct copy_desc *desc)
1570 {
1571 struct vk_device *device = cmd->base.device;
1572 const struct vk_device_dispatch_table *disp = &device->dispatch_table;
1573 VkResult result;
1574
1575 /* The "alignment" of the copy is the maximum alignment that all accesses
1576 * within the copy will satsify.
1577 */
1578 unsigned alignment = MIN2(alignment_of(dstOffset), alignment_of(size));
1579
1580 if (desc->source == COPY_SOURCE_BUFFER)
1581 alignment = MIN2(alignment, alignment_of(desc->buffer.srcOffset));
1582
1583 struct vk_meta_buffer_copy_key key = {
1584 .key_type = VK_META_OBJECT_KEY_FILL_PIPELINE,
1585 .source = desc->source,
1586 .blocksize = MIN2(alignment, 16),
1587 };
1588
1589 VkDescriptorSetLayout set_layout;
1590 result = get_buffer_copy_descriptor_set_layout(device, meta, &set_layout,
1591 desc->source);
1592 if (unlikely(result != VK_SUCCESS)) {
1593 vk_command_buffer_set_error(cmd, result);
1594 return;
1595 }
1596
1597 VkPipelineLayout pipeline_layout;
1598 result = get_buffer_copy_pipeline_layout(device, meta, &key, set_layout,
1599 &pipeline_layout);
1600 if (unlikely(result != VK_SUCCESS)) {
1601 vk_command_buffer_set_error(cmd, result);
1602 return;
1603 }
1604
1605 VkDescriptorBufferInfo buffer_infos[2];
1606 VkWriteDescriptorSet desc_writes[2];
1607
1608 for (unsigned i = 0; i < 2; ++i) {
1609 bool is_dest = (i == BINDING_OUTPUT);
1610
1611 if (!is_dest && desc->source != COPY_SOURCE_BUFFER)
1612 continue;
1613
1614 buffer_infos[i] = (VkDescriptorBufferInfo){
1615 .buffer = vk_buffer_to_handle(is_dest ? dest : desc->buffer.source),
1616 .offset = is_dest ? dstOffset : desc->buffer.srcOffset,
1617 .range = size,
1618 };
1619
1620 desc_writes[i] = (VkWriteDescriptorSet){
1621 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1622 .dstSet = 0,
1623 .dstBinding = i,
1624 .descriptorType = is_dest ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
1625 : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1626 .descriptorCount = 1,
1627 .pBufferInfo = &buffer_infos[i],
1628 };
1629 }
1630
1631 unsigned desc_count = desc->source == COPY_SOURCE_PATTERN ? 1 : 2;
1632 disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1633 VK_PIPELINE_BIND_POINT_COMPUTE,
1634 pipeline_layout, 0, desc_count, desc_writes);
1635
1636 VkPipeline pipeline;
1637 result =
1638 get_buffer_copy_pipeline(device, meta, &key, pipeline_layout, &pipeline);
1639 if (unlikely(result != VK_SUCCESS)) {
1640 vk_command_buffer_set_error(cmd, result);
1641 return;
1642 }
1643
1644 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1645 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1646
1647 if (desc->source == COPY_SOURCE_PATTERN) {
1648 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1649 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t),
1650 &desc->pattern);
1651 }
1652
1653 disp->CmdDispatch(vk_command_buffer_to_handle(cmd), size / key.blocksize, 1,
1654 1);
1655 }
1656
1657 static void
hk_meta_fill_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_buffer * dest,VkDeviceSize dstOffset,VkDeviceSize dstRange,uint32_t data)1658 hk_meta_fill_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1659 struct vk_buffer *dest, VkDeviceSize dstOffset,
1660 VkDeviceSize dstRange, uint32_t data)
1661 {
1662 size_t size = ROUND_DOWN_TO(vk_buffer_range(dest, dstOffset, dstRange), 4);
1663 dstOffset = ROUND_DOWN_TO(dstOffset, 4);
1664
1665 do_copy(cmd, meta, size, dest, dstOffset,
1666 &(struct copy_desc){
1667 .source = COPY_SOURCE_PATTERN,
1668 .pattern = data,
1669 });
1670 }
1671
1672 static void
hk_meta_update_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_buffer * dest,VkDeviceSize dstOffset,VkDeviceSize dstRange,const void * data)1673 hk_meta_update_buffer(struct vk_command_buffer *cmd,
1674 struct vk_meta_device *meta, struct vk_buffer *dest,
1675 VkDeviceSize dstOffset, VkDeviceSize dstRange,
1676 const void *data)
1677 {
1678 /* Create a buffer to hold the data */
1679 const VkBufferCreateInfo info = {
1680 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1681 .size = vk_buffer_range(dest, dstOffset, dstRange),
1682 .usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
1683 .queueFamilyIndexCount = 1,
1684 .pQueueFamilyIndices = &cmd->pool->queue_family_index,
1685 };
1686
1687 VkBuffer buffer;
1688 VkResult result = vk_meta_create_buffer(cmd, meta, &info, &buffer);
1689 if (unlikely(result != VK_SUCCESS))
1690 return;
1691
1692 /* Map the buffer for CPU access */
1693 void *map;
1694 result = meta->cmd_bind_map_buffer(cmd, meta, buffer, &map);
1695 if (unlikely(result != VK_SUCCESS))
1696 return;
1697
1698 /* Copy from the CPU input to the staging buffer */
1699 memcpy(map, data, info.size);
1700
1701 /* Copy between the buffers on the GPU */
1702 VK_FROM_HANDLE(vk_buffer, buffer_, buffer);
1703 size_t size = ROUND_DOWN_TO(vk_buffer_range(dest, dstOffset, dstRange), 4);
1704 dstOffset = ROUND_DOWN_TO(dstOffset, 4);
1705
1706 do_copy(cmd, meta, size, dest, dstOffset,
1707 &(struct copy_desc){
1708 .source = COPY_SOURCE_BUFFER,
1709 .buffer.source = buffer_,
1710 });
1711 }
1712
1713 static void
hk_meta_copy_buffer2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyBufferInfo2 * pCopyBufferInfo)1714 hk_meta_copy_buffer2(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1715 const VkCopyBufferInfo2 *pCopyBufferInfo)
1716 {
1717 VK_FROM_HANDLE(vk_buffer, dst, pCopyBufferInfo->dstBuffer);
1718 VK_FROM_HANDLE(vk_buffer, src, pCopyBufferInfo->srcBuffer);
1719
1720 for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) {
1721 const VkBufferCopy2 *copy = &pCopyBufferInfo->pRegions[i];
1722
1723 do_copy(cmd, meta, copy->size, dst, copy->dstOffset,
1724 &(struct copy_desc){
1725 .source = COPY_SOURCE_BUFFER,
1726 .buffer.source = src,
1727 .buffer.srcOffset = copy->srcOffset,
1728 });
1729 }
1730 }
1731
1732 VKAPI_ATTR void VKAPI_CALL
hk_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)1733 hk_CmdBlitImage2(VkCommandBuffer commandBuffer,
1734 const VkBlitImageInfo2 *pBlitImageInfo)
1735 {
1736 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1737 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1738
1739 struct hk_meta_save save;
1740 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1741 vk_meta_blit_image2(&cmd->vk, &dev->meta, pBlitImageInfo);
1742 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1743 }
1744
1745 VKAPI_ATTR void VKAPI_CALL
hk_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1746 hk_CmdResolveImage2(VkCommandBuffer commandBuffer,
1747 const VkResolveImageInfo2 *pResolveImageInfo)
1748 {
1749 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1750 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1751
1752 struct hk_meta_save save;
1753 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1754 vk_meta_resolve_image2(&cmd->vk, &dev->meta, pResolveImageInfo);
1755 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1756 }
1757
1758 void
hk_meta_resolve_rendering(struct hk_cmd_buffer * cmd,const VkRenderingInfo * pRenderingInfo)1759 hk_meta_resolve_rendering(struct hk_cmd_buffer *cmd,
1760 const VkRenderingInfo *pRenderingInfo)
1761 {
1762 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1763
1764 struct hk_meta_save save;
1765 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1766 vk_meta_resolve_rendering(&cmd->vk, &dev->meta, pRenderingInfo);
1767 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1768 }
1769
1770 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1771 hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1772 const VkCopyBufferInfo2 *pCopyBufferInfo)
1773 {
1774 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1775 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1776
1777 struct hk_meta_save save;
1778 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1779 hk_meta_copy_buffer2(&cmd->vk, &dev->meta, pCopyBufferInfo);
1780 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1781 }
1782
1783 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)1784 hk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
1785 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1786 {
1787 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1788 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1789
1790 struct hk_meta_save save;
1791 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1792 hk_meta_copy_buffer_to_image2(&cmd->vk, &dev->meta, pCopyBufferToImageInfo);
1793 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1794 }
1795
1796 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1797 hk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
1798 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1799 {
1800 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1801 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1802
1803 struct hk_meta_save save;
1804 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1805 hk_meta_copy_image_to_buffer2(&cmd->vk, &dev->meta, pCopyImageToBufferInfo);
1806 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1807 }
1808
1809 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)1810 hk_CmdCopyImage2(VkCommandBuffer commandBuffer,
1811 const VkCopyImageInfo2 *pCopyImageInfo)
1812 {
1813 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1814 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1815
1816 struct hk_meta_save save;
1817 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1818 hk_meta_copy_image2(&cmd->vk, &dev->meta, pCopyImageInfo);
1819 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1820 }
1821
1822 VKAPI_ATTR void VKAPI_CALL
hk_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dstRange,uint32_t data)1823 hk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1824 VkDeviceSize dstOffset, VkDeviceSize dstRange, uint32_t data)
1825 {
1826 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1827 VK_FROM_HANDLE(vk_buffer, buffer, dstBuffer);
1828 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1829
1830 struct hk_meta_save save;
1831 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1832 hk_meta_fill_buffer(&cmd->vk, &dev->meta, buffer, dstOffset, dstRange, data);
1833 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1834 }
1835
1836 VKAPI_ATTR void VKAPI_CALL
hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dstRange,const void * pData)1837 hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1838 VkDeviceSize dstOffset, VkDeviceSize dstRange,
1839 const void *pData)
1840 {
1841 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1842 VK_FROM_HANDLE(vk_buffer, buffer, dstBuffer);
1843 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1844
1845 struct hk_meta_save save;
1846 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1847 hk_meta_update_buffer(&cmd->vk, &dev->meta, buffer, dstOffset, dstRange,
1848 pData);
1849 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1850 }
1851
1852 VKAPI_ATTR void VKAPI_CALL
hk_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1853 hk_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
1854 const VkClearAttachment *pAttachments,
1855 uint32_t rectCount, const VkClearRect *pRects)
1856 {
1857 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1858 struct hk_device *dev = hk_cmd_buffer_device(cmd);
1859
1860 struct vk_meta_rendering_info render_info;
1861 hk_meta_init_render(cmd, &render_info);
1862
1863 struct hk_meta_save save;
1864 hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1865 vk_meta_clear_attachments(&cmd->vk, &dev->meta, &render_info,
1866 attachmentCount, pAttachments, rectCount, pRects);
1867 hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1868 }
1869