xref: /aosp_15_r20/external/mesa3d/src/asahi/vulkan/hk_cmd_meta.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Valve Corporation
3  * Copyright 2024 Alyssa Rosenzweig
4  * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5  * SPDX-License-Identifier: MIT
6  */
7 #include "util/format/u_format.h"
8 #include "util/format/u_formats.h"
9 #include "util/u_math.h"
10 #include "vulkan/vulkan_core.h"
11 #include "agx_pack.h"
12 #include "hk_buffer.h"
13 #include "hk_cmd_buffer.h"
14 #include "hk_device.h"
15 #include "hk_entrypoints.h"
16 #include "hk_image.h"
17 #include "hk_physical_device.h"
18 
19 #include "layout.h"
20 #include "nir_builder.h"
21 #include "nir_builder_opcodes.h"
22 #include "nir_format_convert.h"
23 #include "shader_enums.h"
24 #include "vk_format.h"
25 #include "vk_meta.h"
26 #include "vk_pipeline.h"
27 
28 /* For block based blit kernels, we hardcode the maximum tile size which we can
29  * always achieve. This simplifies our life.
30  */
31 #define TILE_WIDTH  32
32 #define TILE_HEIGHT 32
33 
34 static VkResult
hk_cmd_bind_map_buffer(struct vk_command_buffer * vk_cmd,struct vk_meta_device * meta,VkBuffer _buffer,void ** map_out)35 hk_cmd_bind_map_buffer(struct vk_command_buffer *vk_cmd,
36                        struct vk_meta_device *meta, VkBuffer _buffer,
37                        void **map_out)
38 {
39    struct hk_cmd_buffer *cmd = container_of(vk_cmd, struct hk_cmd_buffer, vk);
40    VK_FROM_HANDLE(hk_buffer, buffer, _buffer);
41 
42    assert(buffer->vk.size < UINT_MAX);
43    struct agx_ptr T = hk_pool_alloc(cmd, buffer->vk.size, 16);
44    if (unlikely(T.cpu == NULL))
45       return VK_ERROR_OUT_OF_POOL_MEMORY;
46 
47    buffer->addr = T.gpu;
48    *map_out = T.cpu;
49    return VK_SUCCESS;
50 }
51 
52 VkResult
hk_device_init_meta(struct hk_device * dev)53 hk_device_init_meta(struct hk_device *dev)
54 {
55    VkResult result = vk_meta_device_init(&dev->vk, &dev->meta);
56    if (result != VK_SUCCESS)
57       return result;
58 
59    dev->meta.use_gs_for_layer = false;
60    dev->meta.use_stencil_export = true;
61    dev->meta.cmd_bind_map_buffer = hk_cmd_bind_map_buffer;
62    dev->meta.max_bind_map_buffer_size_B = 64 * 1024;
63 
64    return VK_SUCCESS;
65 }
66 
67 void
hk_device_finish_meta(struct hk_device * dev)68 hk_device_finish_meta(struct hk_device *dev)
69 {
70    vk_meta_device_finish(&dev->vk, &dev->meta);
71 }
72 
73 struct hk_meta_save {
74    struct vk_vertex_input_state _dynamic_vi;
75    struct vk_sample_locations_state _dynamic_sl;
76    struct vk_dynamic_graphics_state dynamic;
77    struct hk_api_shader *shaders[MESA_SHADER_MESH + 1];
78    struct hk_addr_range vb0;
79    struct hk_descriptor_set *desc0;
80    bool has_push_desc0;
81    enum agx_visibility_mode occlusion;
82    struct hk_push_descriptor_set push_desc0;
83    VkQueryPipelineStatisticFlags pipeline_stats_flags;
84    uint8_t push[128];
85 };
86 
87 static void
hk_meta_begin(struct hk_cmd_buffer * cmd,struct hk_meta_save * save,VkPipelineBindPoint bind_point)88 hk_meta_begin(struct hk_cmd_buffer *cmd, struct hk_meta_save *save,
89               VkPipelineBindPoint bind_point)
90 {
91    struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
92 
93    if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
94       save->dynamic = cmd->vk.dynamic_graphics_state;
95       save->_dynamic_vi = cmd->state.gfx._dynamic_vi;
96       save->_dynamic_sl = cmd->state.gfx._dynamic_sl;
97 
98       static_assert(sizeof(cmd->state.gfx.shaders) == sizeof(save->shaders));
99       memcpy(save->shaders, cmd->state.gfx.shaders, sizeof(save->shaders));
100 
101       /* Pause queries */
102       save->occlusion = cmd->state.gfx.occlusion.mode;
103       cmd->state.gfx.occlusion.mode = AGX_VISIBILITY_MODE_NONE;
104       cmd->state.gfx.dirty |= HK_DIRTY_OCCLUSION;
105 
106       save->pipeline_stats_flags = desc->root.draw.pipeline_stats_flags;
107       desc->root.draw.pipeline_stats_flags = 0;
108       desc->root_dirty = true;
109    } else {
110       save->shaders[MESA_SHADER_COMPUTE] = cmd->state.cs.shader;
111    }
112 
113    save->vb0 = cmd->state.gfx.vb[0];
114 
115    save->desc0 = desc->sets[0];
116    save->has_push_desc0 = desc->push[0];
117    if (save->has_push_desc0)
118       save->push_desc0 = *desc->push[0];
119 
120    static_assert(sizeof(save->push) == sizeof(desc->root.push));
121    memcpy(save->push, desc->root.push, sizeof(save->push));
122 
123    cmd->in_meta = true;
124 }
125 
126 static void
hk_meta_init_render(struct hk_cmd_buffer * cmd,struct vk_meta_rendering_info * info)127 hk_meta_init_render(struct hk_cmd_buffer *cmd,
128                     struct vk_meta_rendering_info *info)
129 {
130    const struct hk_rendering_state *render = &cmd->state.gfx.render;
131 
132    *info = (struct vk_meta_rendering_info){
133       .samples = MAX2(render->tilebuffer.nr_samples, 1),
134       .view_mask = render->view_mask,
135       .color_attachment_count = render->color_att_count,
136       .depth_attachment_format = render->depth_att.vk_format,
137       .stencil_attachment_format = render->stencil_att.vk_format,
138    };
139    for (uint32_t a = 0; a < render->color_att_count; a++) {
140       info->color_attachment_formats[a] = render->color_att[a].vk_format;
141       info->color_attachment_write_masks[a] =
142          VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
143          VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
144    }
145 }
146 
147 static void
hk_meta_end(struct hk_cmd_buffer * cmd,struct hk_meta_save * save,VkPipelineBindPoint bind_point)148 hk_meta_end(struct hk_cmd_buffer *cmd, struct hk_meta_save *save,
149             VkPipelineBindPoint bind_point)
150 {
151    struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
152    desc->root_dirty = true;
153 
154    if (save->desc0) {
155       desc->sets[0] = save->desc0;
156       desc->root.sets[0] = hk_descriptor_set_addr(save->desc0);
157       desc->sets_dirty |= BITFIELD_BIT(0);
158       desc->push_dirty &= ~BITFIELD_BIT(0);
159    } else if (save->has_push_desc0) {
160       *desc->push[0] = save->push_desc0;
161       desc->push_dirty |= BITFIELD_BIT(0);
162    }
163 
164    if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
165       /* Restore the dynamic state */
166       assert(save->dynamic.vi == &cmd->state.gfx._dynamic_vi);
167       assert(save->dynamic.ms.sample_locations == &cmd->state.gfx._dynamic_sl);
168       cmd->vk.dynamic_graphics_state = save->dynamic;
169       cmd->state.gfx._dynamic_vi = save->_dynamic_vi;
170       cmd->state.gfx._dynamic_sl = save->_dynamic_sl;
171       memcpy(cmd->vk.dynamic_graphics_state.dirty,
172              cmd->vk.dynamic_graphics_state.set,
173              sizeof(cmd->vk.dynamic_graphics_state.set));
174 
175       for (uint32_t stage = 0; stage < ARRAY_SIZE(save->shaders); stage++) {
176          hk_cmd_bind_graphics_shader(cmd, stage, save->shaders[stage]);
177       }
178 
179       hk_cmd_bind_vertex_buffer(cmd, 0, save->vb0);
180 
181       /* Restore queries */
182       cmd->state.gfx.occlusion.mode = save->occlusion;
183       cmd->state.gfx.dirty |= HK_DIRTY_OCCLUSION;
184 
185       desc->root.draw.pipeline_stats_flags = save->pipeline_stats_flags;
186       desc->root_dirty = true;
187    } else {
188       hk_cmd_bind_compute_shader(cmd, save->shaders[MESA_SHADER_COMPUTE]);
189    }
190 
191    memcpy(desc->root.push, save->push, sizeof(save->push));
192    cmd->in_meta = false;
193 }
194 
195 #define VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE (0xcafe0000)
196 #define VK_META_OBJECT_KEY_FILL_PIPELINE                 (0xcafe0001)
197 
198 #define BINDING_OUTPUT 0
199 #define BINDING_INPUT  1
200 
201 static VkFormat
aspect_format(VkFormat fmt,VkImageAspectFlags aspect)202 aspect_format(VkFormat fmt, VkImageAspectFlags aspect)
203 {
204    bool depth = (aspect & VK_IMAGE_ASPECT_DEPTH_BIT);
205    bool stencil = (aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
206 
207    enum pipe_format p_format = vk_format_to_pipe_format(fmt);
208 
209    if (util_format_is_depth_or_stencil(p_format)) {
210       assert(depth ^ stencil);
211       if (depth) {
212          switch (fmt) {
213          case VK_FORMAT_D32_SFLOAT:
214          case VK_FORMAT_D32_SFLOAT_S8_UINT:
215             return VK_FORMAT_D32_SFLOAT;
216          case VK_FORMAT_D16_UNORM:
217          case VK_FORMAT_D16_UNORM_S8_UINT:
218             return VK_FORMAT_D16_UNORM;
219          default:
220             unreachable("invalid depth");
221          }
222       } else {
223          switch (fmt) {
224          case VK_FORMAT_S8_UINT:
225          case VK_FORMAT_D32_SFLOAT_S8_UINT:
226          case VK_FORMAT_D16_UNORM_S8_UINT:
227             return VK_FORMAT_S8_UINT;
228          default:
229             unreachable("invalid stencil");
230          }
231       }
232    }
233 
234    assert(!depth && !stencil);
235 
236    const struct vk_format_ycbcr_info *ycbcr_info =
237       vk_format_get_ycbcr_info(fmt);
238 
239    if (ycbcr_info) {
240       switch (aspect) {
241       case VK_IMAGE_ASPECT_PLANE_0_BIT:
242          return ycbcr_info->planes[0].format;
243       case VK_IMAGE_ASPECT_PLANE_1_BIT:
244          return ycbcr_info->planes[1].format;
245       case VK_IMAGE_ASPECT_PLANE_2_BIT:
246          return ycbcr_info->planes[2].format;
247       default:
248          unreachable("invalid ycbcr aspect");
249       }
250    }
251 
252    return fmt;
253 }
254 
255 /*
256  * Canonicalize formats to simplify the copies. The returned format must in the
257  * same compression class, and should roundtrip lossless (minifloat formats are
258  * the unfortunate exception).
259  */
260 static enum pipe_format
canonical_format_pipe(enum pipe_format fmt,bool canonicalize_zs)261 canonical_format_pipe(enum pipe_format fmt, bool canonicalize_zs)
262 {
263    if (!canonicalize_zs && util_format_is_depth_or_stencil(fmt))
264       return fmt;
265 
266    assert(ail_is_valid_pixel_format(fmt));
267 
268    if (util_format_is_compressed(fmt)) {
269       unsigned size_B = util_format_get_blocksize(fmt);
270       assert(size_B == 8 || size_B == 16);
271 
272       return size_B == 16 ? PIPE_FORMAT_R32G32B32A32_UINT
273                           : PIPE_FORMAT_R32G32_UINT;
274    }
275 
276 #define CASE(x, y) [AGX_CHANNELS_##x] = PIPE_FORMAT_##y
277    /* clang-format off */
278    static enum pipe_format map[] = {
279       CASE(R8,           R8_UINT),
280       CASE(R16,          R16_UNORM /* XXX: Hack for Z16 copies */),
281       CASE(R8G8,         R8G8_UINT),
282       CASE(R5G6B5,       R5G6B5_UNORM),
283       CASE(R4G4B4A4,     R4G4B4A4_UNORM),
284       CASE(A1R5G5B5,     A1R5G5B5_UNORM),
285       CASE(R5G5B5A1,     B5G5R5A1_UNORM),
286       CASE(R32,          R32_UINT),
287       CASE(R16G16,       R16G16_UINT),
288       CASE(R11G11B10,    R11G11B10_FLOAT),
289       CASE(R10G10B10A2,  R10G10B10A2_UNORM),
290       CASE(R9G9B9E5,     R9G9B9E5_FLOAT),
291       CASE(R8G8B8A8,     R8G8B8A8_UINT),
292       CASE(R32G32,       R32G32_UINT),
293       CASE(R16G16B16A16, R16G16B16A16_UINT),
294       CASE(R32G32B32A32, R32G32B32A32_UINT),
295    };
296    /* clang-format on */
297 #undef CASE
298 
299    enum agx_channels channels = ail_pixel_format[fmt].channels;
300    assert(channels < ARRAY_SIZE(map) && "all valid channels handled");
301    assert(map[channels] != PIPE_FORMAT_NONE && "all valid channels handled");
302    return map[channels];
303 }
304 
305 static VkFormat
canonical_format(VkFormat fmt)306 canonical_format(VkFormat fmt)
307 {
308    return vk_format_from_pipe_format(
309       canonical_format_pipe(vk_format_to_pipe_format(fmt), false));
310 }
311 
312 enum copy_type {
313    BUF2IMG,
314    IMG2BUF,
315    IMG2IMG,
316 };
317 
318 struct vk_meta_push_data {
319    uint64_t buffer;
320 
321    uint32_t row_extent;
322    uint32_t slice_or_layer_extent;
323 
324    int32_t src_offset_el[4];
325    int32_t dst_offset_el[4];
326    uint32_t grid_el[3];
327 } PACKED;
328 
329 #define get_push(b, name)                                                      \
330    nir_load_push_constant(                                                     \
331       b, 1, sizeof(((struct vk_meta_push_data *)0)->name) * 8,                 \
332       nir_imm_int(b, offsetof(struct vk_meta_push_data, name)))
333 
334 struct vk_meta_image_copy_key {
335    enum vk_meta_object_key_type key_type;
336    enum copy_type type;
337    enum pipe_format src_format, dst_format;
338    unsigned block_size;
339    unsigned nr_samples;
340    bool block_based;
341 };
342 
343 static nir_def *
linearize_coords(nir_builder * b,nir_def * coord,const struct vk_meta_image_copy_key * key)344 linearize_coords(nir_builder *b, nir_def *coord,
345                  const struct vk_meta_image_copy_key *key)
346 {
347    assert(key->nr_samples == 1 && "buffer<-->image copies not multisampled");
348 
349    nir_def *row_extent = get_push(b, row_extent);
350    nir_def *slice_or_layer_extent = get_push(b, slice_or_layer_extent);
351    nir_def *x = nir_channel(b, coord, 0);
352    nir_def *y = nir_channel(b, coord, 1);
353    nir_def *z_or_layer = nir_channel(b, coord, 2);
354 
355    nir_def *v = nir_imul_imm(b, x, key->block_size);
356 
357    v = nir_iadd(b, v, nir_imul(b, y, row_extent));
358    v = nir_iadd(b, v, nir_imul(b, z_or_layer, slice_or_layer_extent));
359 
360    return nir_udiv_imm(b, v, key->block_size);
361 }
362 
363 static bool
is_format_native(enum pipe_format format)364 is_format_native(enum pipe_format format)
365 {
366    switch (format) {
367    case PIPE_FORMAT_R8_UINT:
368    case PIPE_FORMAT_R8G8_UINT:
369    case PIPE_FORMAT_R32_UINT:
370    case PIPE_FORMAT_R32G32_UINT:
371    case PIPE_FORMAT_R16G16_UINT:
372    case PIPE_FORMAT_R16_UNORM:
373       /* TODO: debug me .. why do these fail */
374       return false;
375    case PIPE_FORMAT_R11G11B10_FLOAT:
376    case PIPE_FORMAT_R9G9B9E5_FLOAT:
377    case PIPE_FORMAT_R16G16B16A16_UINT:
378    case PIPE_FORMAT_R32G32B32A32_UINT:
379    case PIPE_FORMAT_R8G8B8A8_UINT:
380    case PIPE_FORMAT_R10G10B10A2_UNORM:
381       return true;
382    case PIPE_FORMAT_R5G6B5_UNORM:
383    case PIPE_FORMAT_R4G4B4A4_UNORM:
384    case PIPE_FORMAT_A1R5G5B5_UNORM:
385    case PIPE_FORMAT_B5G5R5A1_UNORM:
386       return false;
387    default:
388       unreachable("expected canonical");
389    }
390 }
391 
392 static nir_def *
load_store_formatted(nir_builder * b,nir_def * base,nir_def * index,nir_def * value,enum pipe_format format)393 load_store_formatted(nir_builder *b, nir_def *base, nir_def *index,
394                      nir_def *value, enum pipe_format format)
395 {
396    if (util_format_is_depth_or_stencil(format))
397       format = canonical_format_pipe(format, true);
398 
399    if (is_format_native(format)) {
400       enum pipe_format isa = ail_pixel_format[format].renderable;
401       unsigned isa_size = util_format_get_blocksize(isa);
402       unsigned isa_components = util_format_get_blocksize(format) / isa_size;
403       unsigned shift = util_logbase2(isa_components);
404 
405       if (value) {
406          nir_store_agx(b, value, base, index, .format = isa, .base = shift);
407       } else {
408          return nir_load_agx(b, 4, 32, base, index, .format = isa,
409                              .base = shift);
410       }
411    } else {
412       unsigned blocksize_B = util_format_get_blocksize(format);
413       nir_def *addr =
414          nir_iadd(b, base, nir_imul_imm(b, nir_u2u64(b, index), blocksize_B));
415 
416       if (value) {
417          nir_def *raw = nir_format_pack_rgba(b, format, value);
418 
419          if (blocksize_B <= 4) {
420             assert(raw->num_components == 1);
421             raw = nir_u2uN(b, raw, blocksize_B * 8);
422          } else {
423             assert(raw->bit_size == 32);
424             raw = nir_trim_vector(b, raw, blocksize_B / 4);
425          }
426 
427          nir_store_global(b, addr, blocksize_B, raw,
428                           nir_component_mask(raw->num_components));
429       } else {
430          nir_def *raw =
431             nir_load_global(b, addr, blocksize_B, DIV_ROUND_UP(blocksize_B, 4),
432                             MIN2(32, blocksize_B * 8));
433 
434          return nir_format_unpack_rgba(b, raw, format);
435       }
436    }
437 
438    return NULL;
439 }
440 
441 static nir_shader *
build_image_copy_shader(const struct vk_meta_image_copy_key * key)442 build_image_copy_shader(const struct vk_meta_image_copy_key *key)
443 {
444    nir_builder build =
445       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "vk-meta-copy");
446 
447    nir_builder *b = &build;
448    b->shader->info.workgroup_size[0] = TILE_WIDTH;
449    b->shader->info.workgroup_size[1] = TILE_HEIGHT;
450 
451    bool src_is_buf = key->type == BUF2IMG;
452    bool dst_is_buf = key->type == IMG2BUF;
453 
454    bool msaa = key->nr_samples > 1;
455    enum glsl_sampler_dim dim_2d =
456       msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
457    enum glsl_sampler_dim dim_src = src_is_buf ? GLSL_SAMPLER_DIM_BUF : dim_2d;
458    enum glsl_sampler_dim dim_dst = dst_is_buf ? GLSL_SAMPLER_DIM_BUF : dim_2d;
459 
460    const struct glsl_type *texture_type =
461       glsl_sampler_type(dim_src, false, !src_is_buf, GLSL_TYPE_UINT);
462 
463    const struct glsl_type *image_type =
464       glsl_image_type(dim_dst, !dst_is_buf, GLSL_TYPE_UINT);
465 
466    nir_variable *texture =
467       nir_variable_create(b->shader, nir_var_uniform, texture_type, "source");
468    nir_variable *image =
469       nir_variable_create(b->shader, nir_var_image, image_type, "dest");
470 
471    image->data.descriptor_set = 0;
472    image->data.binding = BINDING_OUTPUT;
473    image->data.access = ACCESS_NON_READABLE;
474 
475    texture->data.descriptor_set = 0;
476    texture->data.binding = BINDING_INPUT;
477 
478    /* Grab the offset vectors */
479    nir_def *src_offset_el = nir_load_push_constant(
480       b, 3, 32,
481       nir_imm_int(b, offsetof(struct vk_meta_push_data, src_offset_el)));
482 
483    nir_def *dst_offset_el = nir_load_push_constant(
484       b, 3, 32,
485       nir_imm_int(b, offsetof(struct vk_meta_push_data, dst_offset_el)));
486 
487    nir_def *grid_2d_el = nir_load_push_constant(
488       b, 2, 32, nir_imm_int(b, offsetof(struct vk_meta_push_data, grid_el)));
489 
490    /* We're done setting up variables, do the copy */
491    nir_def *coord = nir_load_global_invocation_id(b, 32);
492 
493    /* The destination format is already canonical, convert to an ISA format */
494    enum pipe_format isa_format;
495    if (key->block_based) {
496       isa_format =
497          ail_pixel_format[canonical_format_pipe(key->dst_format, true)]
498             .renderable;
499       assert(isa_format != PIPE_FORMAT_NONE);
500    }
501 
502    nir_def *local_offset = nir_imm_intN_t(b, 0, 16);
503    nir_def *lid = nir_trim_vector(b, nir_load_local_invocation_id(b), 2);
504    lid = nir_u2u16(b, lid);
505 
506    nir_def *src_coord = src_is_buf ? coord : nir_iadd(b, coord, src_offset_el);
507    nir_def *dst_coord = dst_is_buf ? coord : nir_iadd(b, coord, dst_offset_el);
508 
509    nir_def *image_deref = &nir_build_deref_var(b, image)->def;
510 
511    nir_def *coord_2d_el = nir_trim_vector(b, coord, 2);
512    nir_def *in_bounds;
513    if (key->block_based) {
514       nir_def *offset_in_block_el =
515          nir_umod_imm(b, nir_trim_vector(b, dst_offset_el, 2), TILE_WIDTH);
516 
517       dst_coord =
518          nir_vector_insert_imm(b, nir_isub(b, dst_coord, offset_in_block_el),
519                                nir_channel(b, dst_coord, 2), 2);
520 
521       src_coord =
522          nir_vector_insert_imm(b, nir_isub(b, src_coord, offset_in_block_el),
523                                nir_channel(b, src_coord, 2), 2);
524 
525       in_bounds = nir_uge(b, coord_2d_el, offset_in_block_el);
526       in_bounds = nir_iand(
527          b, in_bounds,
528          nir_ult(b, coord_2d_el, nir_iadd(b, offset_in_block_el, grid_2d_el)));
529    } else {
530       in_bounds = nir_ult(b, coord_2d_el, grid_2d_el);
531    }
532 
533    /* Special case handle buffer indexing */
534    if (dst_is_buf) {
535       assert(!key->block_based);
536       dst_coord = linearize_coords(b, dst_coord, key);
537    } else if (src_is_buf) {
538       src_coord = linearize_coords(b, src_coord, key);
539    }
540 
541    for (unsigned s = 0; s < key->nr_samples; ++s) {
542       nir_def *ms_index = nir_imm_int(b, s);
543       nir_def *value1, *value2;
544 
545       nir_push_if(b, nir_ball(b, in_bounds));
546       {
547          /* Copy formatted texel from texture to storage image */
548          nir_deref_instr *deref = nir_build_deref_var(b, texture);
549 
550          if (src_is_buf) {
551             value1 = load_store_formatted(b, get_push(b, buffer), src_coord,
552                                           NULL, key->dst_format);
553          } else {
554             if (msaa) {
555                value1 = nir_txf_ms_deref(b, deref, src_coord, ms_index);
556             } else {
557                value1 = nir_txf_deref(b, deref, src_coord, NULL);
558             }
559 
560             /* Munge according to the implicit conversions so we get a bit copy */
561             if (key->src_format != key->dst_format) {
562                nir_def *packed =
563                   nir_format_pack_rgba(b, key->src_format, value1);
564 
565                value1 = nir_format_unpack_rgba(b, packed, key->dst_format);
566             }
567          }
568 
569          if (dst_is_buf) {
570             load_store_formatted(b, get_push(b, buffer), dst_coord, value1,
571                                  key->dst_format);
572          } else if (!key->block_based) {
573             nir_image_deref_store(b, image_deref, nir_pad_vec4(b, dst_coord),
574                                   ms_index, value1, nir_imm_int(b, 0),
575                                   .image_dim = dim_dst,
576                                   .image_array = !dst_is_buf);
577          }
578       }
579       nir_push_else(b, NULL);
580       if (key->block_based) {
581          /* Copy back the existing destination content */
582          value2 = nir_image_deref_load(b, 4, 32, image_deref,
583                                        nir_pad_vec4(b, dst_coord), ms_index,
584                                        nir_imm_int(b, 0), .image_dim = dim_dst,
585                                        .image_array = !dst_is_buf);
586       }
587       nir_pop_if(b, NULL);
588 
589       if (key->block_based) {
590          nir_store_local_pixel_agx(b, nir_if_phi(b, value1, value2),
591                                    nir_imm_int(b, 1 << s), lid, .base = 0,
592                                    .write_mask = 0xf, .format = isa_format,
593                                    .explicit_coord = true);
594       }
595    }
596 
597    if (key->block_based) {
598       assert(!dst_is_buf);
599 
600       nir_barrier(b, .execution_scope = SCOPE_WORKGROUP);
601 
602       nir_push_if(b, nir_ball(b, nir_ieq_imm(b, lid, 0)));
603       {
604          nir_image_deref_store_block_agx(
605             b, image_deref, local_offset, dst_coord, .format = isa_format,
606             .image_dim = dim_2d, .image_array = true, .explicit_coord = true);
607       }
608       nir_pop_if(b, NULL);
609       b->shader->info.cs.image_block_size_per_thread_agx =
610          util_format_get_blocksize(key->dst_format);
611    }
612 
613    return b->shader;
614 }
615 
616 static VkResult
get_image_copy_descriptor_set_layout(struct vk_device * device,struct vk_meta_device * meta,VkDescriptorSetLayout * layout_out,enum copy_type type)617 get_image_copy_descriptor_set_layout(struct vk_device *device,
618                                      struct vk_meta_device *meta,
619                                      VkDescriptorSetLayout *layout_out,
620                                      enum copy_type type)
621 {
622    const char *keys[] = {
623       [IMG2BUF] = "vk-meta-copy-image-to-buffer-descriptor-set-layout",
624       [BUF2IMG] = "vk-meta-copy-buffer-to-image-descriptor-set-layout",
625       [IMG2IMG] = "vk-meta-copy-image-to-image-descriptor-set-layout",
626    };
627 
628    VkDescriptorSetLayout from_cache = vk_meta_lookup_descriptor_set_layout(
629       meta, keys[type], strlen(keys[type]));
630    if (from_cache != VK_NULL_HANDLE) {
631       *layout_out = from_cache;
632       return VK_SUCCESS;
633    }
634 
635    const VkDescriptorSetLayoutBinding bindings[] = {
636       {
637          .binding = BINDING_OUTPUT,
638          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
639          .descriptorCount = 1,
640          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
641       },
642       {
643          .binding = BINDING_INPUT,
644          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
645          .descriptorCount = 1,
646          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
647       },
648    };
649 
650    const VkDescriptorSetLayoutCreateInfo info = {
651       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
652       .bindingCount = ARRAY_SIZE(bindings),
653       .pBindings = bindings,
654    };
655 
656    return vk_meta_create_descriptor_set_layout(device, meta, &info, keys[type],
657                                                strlen(keys[type]), layout_out);
658 }
659 
660 static VkResult
get_image_copy_pipeline_layout(struct vk_device * device,struct vk_meta_device * meta,struct vk_meta_image_copy_key * key,VkDescriptorSetLayout set_layout,VkPipelineLayout * layout_out,enum copy_type type)661 get_image_copy_pipeline_layout(struct vk_device *device,
662                                struct vk_meta_device *meta,
663                                struct vk_meta_image_copy_key *key,
664                                VkDescriptorSetLayout set_layout,
665                                VkPipelineLayout *layout_out,
666                                enum copy_type type)
667 {
668    const char *keys[] = {
669       [IMG2BUF] = "vk-meta-copy-image-to-buffer-pipeline-layout",
670       [BUF2IMG] = "vk-meta-copy-buffer-to-image-pipeline-layout",
671       [IMG2IMG] = "vk-meta-copy-image-to-image-pipeline-layout",
672    };
673 
674    VkPipelineLayout from_cache =
675       vk_meta_lookup_pipeline_layout(meta, keys[type], strlen(keys[type]));
676    if (from_cache != VK_NULL_HANDLE) {
677       *layout_out = from_cache;
678       return VK_SUCCESS;
679    }
680 
681    VkPipelineLayoutCreateInfo info = {
682       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
683       .setLayoutCount = 1,
684       .pSetLayouts = &set_layout,
685    };
686 
687    const VkPushConstantRange push_range = {
688       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
689       .offset = 0,
690       .size = sizeof(struct vk_meta_push_data),
691    };
692 
693    info.pushConstantRangeCount = 1;
694    info.pPushConstantRanges = &push_range;
695 
696    return vk_meta_create_pipeline_layout(device, meta, &info, keys[type],
697                                          strlen(keys[type]), layout_out);
698 }
699 
700 static VkResult
get_image_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_image_copy_key * key,VkPipelineLayout layout,VkPipeline * pipeline_out)701 get_image_copy_pipeline(struct vk_device *device, struct vk_meta_device *meta,
702                         const struct vk_meta_image_copy_key *key,
703                         VkPipelineLayout layout, VkPipeline *pipeline_out)
704 {
705    VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key, sizeof(*key));
706    if (from_cache != VK_NULL_HANDLE) {
707       *pipeline_out = from_cache;
708       return VK_SUCCESS;
709    }
710 
711    const VkPipelineShaderStageNirCreateInfoMESA nir_info = {
712       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
713       .nir = build_image_copy_shader(key),
714    };
715    const VkPipelineShaderStageCreateInfo cs_info = {
716       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
717       .pNext = &nir_info,
718       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
719       .pName = "main",
720    };
721 
722    const VkComputePipelineCreateInfo info = {
723       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
724       .stage = cs_info,
725       .layout = layout,
726    };
727 
728    VkResult result = vk_meta_create_compute_pipeline(
729       device, meta, &info, key, sizeof(*key), pipeline_out);
730    ralloc_free(nir_info.nir);
731 
732    return result;
733 }
734 
735 static void
hk_meta_copy_image_to_buffer2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageToBufferInfo2 * pCopyBufferInfo)736 hk_meta_copy_image_to_buffer2(struct vk_command_buffer *cmd,
737                               struct vk_meta_device *meta,
738                               const VkCopyImageToBufferInfo2 *pCopyBufferInfo)
739 {
740    VK_FROM_HANDLE(vk_image, image, pCopyBufferInfo->srcImage);
741    VK_FROM_HANDLE(vk_image, src_image, pCopyBufferInfo->srcImage);
742    VK_FROM_HANDLE(hk_buffer, buffer, pCopyBufferInfo->dstBuffer);
743 
744    struct vk_device *device = cmd->base.device;
745    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
746 
747    VkResult result;
748 
749    VkDescriptorSetLayout set_layout;
750    result =
751       get_image_copy_descriptor_set_layout(device, meta, &set_layout, IMG2BUF);
752    if (unlikely(result != VK_SUCCESS)) {
753       vk_command_buffer_set_error(cmd, result);
754       return;
755    }
756 
757    bool per_layer =
758       util_format_is_compressed(vk_format_to_pipe_format(image->format));
759 
760    for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) {
761       const VkBufferImageCopy2 *region = &pCopyBufferInfo->pRegions[i];
762 
763       unsigned layers = MAX2(region->imageExtent.depth,
764                              vk_image_subresource_layer_count(
765                                 src_image, &region->imageSubresource));
766       unsigned layer_iters = per_layer ? layers : 1;
767 
768       for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
769 
770          VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
771          VkFormat aspect_fmt = aspect_format(image->format, aspect);
772          VkFormat canonical = canonical_format(aspect_fmt);
773 
774          uint32_t blocksize_B =
775             util_format_get_blocksize(vk_format_to_pipe_format(canonical));
776 
777          enum pipe_format p_format = vk_format_to_pipe_format(image->format);
778 
779          unsigned row_extent = util_format_get_nblocksx(
780                                   p_format, MAX2(region->bufferRowLength,
781                                                  region->imageExtent.width)) *
782                                blocksize_B;
783          unsigned slice_extent =
784             util_format_get_nblocksy(
785                p_format,
786                MAX2(region->bufferImageHeight, region->imageExtent.height)) *
787             row_extent;
788          unsigned layer_extent =
789             util_format_get_nblocksz(p_format, region->imageExtent.depth) *
790             slice_extent;
791 
792          bool is_3d = region->imageExtent.depth > 1;
793 
794          struct vk_meta_image_copy_key key = {
795             .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
796             .type = IMG2BUF,
797             .block_size = blocksize_B,
798             .nr_samples = image->samples,
799             .src_format = vk_format_to_pipe_format(canonical),
800             .dst_format = vk_format_to_pipe_format(canonical),
801          };
802 
803          VkPipelineLayout pipeline_layout;
804          result = get_image_copy_pipeline_layout(device, meta, &key, set_layout,
805                                                  &pipeline_layout, false);
806          if (unlikely(result != VK_SUCCESS)) {
807             vk_command_buffer_set_error(cmd, result);
808             return;
809          }
810 
811          VkImageView src_view;
812          const VkImageViewUsageCreateInfo src_view_usage = {
813             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
814             .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
815          };
816          const VkImageViewCreateInfo src_view_info = {
817             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
818             .flags = VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
819             .pNext = &src_view_usage,
820             .image = pCopyBufferInfo->srcImage,
821             .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
822             .format = canonical,
823             .subresourceRange =
824                {
825                   .aspectMask = region->imageSubresource.aspectMask,
826                   .baseMipLevel = region->imageSubresource.mipLevel,
827                   .baseArrayLayer =
828                      MAX2(region->imageOffset.z,
829                           region->imageSubresource.baseArrayLayer) +
830                      layer_offs,
831                   .layerCount = per_layer ? 1 : layers,
832                   .levelCount = 1,
833                },
834          };
835 
836          result =
837             vk_meta_create_image_view(cmd, meta, &src_view_info, &src_view);
838          if (unlikely(result != VK_SUCCESS)) {
839             vk_command_buffer_set_error(cmd, result);
840             return;
841          }
842 
843          VkDescriptorImageInfo src_info = {
844             .imageLayout = pCopyBufferInfo->srcImageLayout,
845             .imageView = src_view,
846          };
847 
848          VkWriteDescriptorSet desc_write = {
849             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
850             .dstSet = 0,
851             .dstBinding = BINDING_INPUT,
852             .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
853             .descriptorCount = 1,
854             .pImageInfo = &src_info,
855          };
856 
857          disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
858                                        VK_PIPELINE_BIND_POINT_COMPUTE,
859                                        pipeline_layout, 0, 1, &desc_write);
860 
861          VkPipeline pipeline;
862          result = get_image_copy_pipeline(device, meta, &key, pipeline_layout,
863                                           &pipeline);
864          if (unlikely(result != VK_SUCCESS)) {
865             vk_command_buffer_set_error(cmd, result);
866             return;
867          }
868 
869          disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
870                                VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
871 
872          enum pipe_format p_src_fmt =
873             vk_format_to_pipe_format(src_image->format);
874 
875          struct vk_meta_push_data push = {
876             .buffer = hk_buffer_address(buffer, region->bufferOffset),
877             .row_extent = row_extent,
878             .slice_or_layer_extent = is_3d ? slice_extent : layer_extent,
879 
880             .src_offset_el[0] =
881                util_format_get_nblocksx(p_src_fmt, region->imageOffset.x),
882             .src_offset_el[1] =
883                util_format_get_nblocksy(p_src_fmt, region->imageOffset.y),
884 
885             .grid_el[0] =
886                util_format_get_nblocksx(p_format, region->imageExtent.width),
887             .grid_el[1] =
888                util_format_get_nblocksy(p_format, region->imageExtent.height),
889             .grid_el[2] = per_layer ? 1 : layers,
890          };
891 
892          push.buffer += push.slice_or_layer_extent * layer_offs;
893 
894          disp->CmdPushConstants(vk_command_buffer_to_handle(cmd),
895                                 pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
896                                 sizeof(push), &push);
897 
898          disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
899                            DIV_ROUND_UP(push.grid_el[0], 32),
900                            DIV_ROUND_UP(push.grid_el[1], 32), push.grid_el[2]);
901       }
902    }
903 }
904 
905 static void
hk_meta_dispatch_to_image(struct vk_command_buffer * cmd,const struct vk_device_dispatch_table * disp,VkPipelineLayout pipeline_layout,struct vk_meta_push_data * push,VkOffset3D offset,VkExtent3D extent,bool per_layer,unsigned layers,enum pipe_format p_dst_fmt,enum pipe_format p_format)906 hk_meta_dispatch_to_image(struct vk_command_buffer *cmd,
907                           const struct vk_device_dispatch_table *disp,
908                           VkPipelineLayout pipeline_layout,
909                           struct vk_meta_push_data *push, VkOffset3D offset,
910                           VkExtent3D extent, bool per_layer, unsigned layers,
911                           enum pipe_format p_dst_fmt, enum pipe_format p_format)
912 {
913    push->dst_offset_el[0] = util_format_get_nblocksx(p_dst_fmt, offset.x);
914    push->dst_offset_el[1] = util_format_get_nblocksy(p_dst_fmt, offset.y);
915    push->dst_offset_el[2] = 0;
916 
917    push->grid_el[0] = util_format_get_nblocksx(p_format, extent.width);
918    push->grid_el[1] = util_format_get_nblocksy(p_format, extent.height);
919    push->grid_el[2] = per_layer ? 1 : layers;
920 
921    unsigned w_el = util_format_get_nblocksx(p_format, extent.width);
922    unsigned h_el = util_format_get_nblocksy(p_format, extent.height);
923 
924    /* Expand the grid so destinations are in tiles */
925    unsigned expanded_x0 = push->dst_offset_el[0] & ~(TILE_WIDTH - 1);
926    unsigned expanded_y0 = push->dst_offset_el[1] & ~(TILE_HEIGHT - 1);
927    unsigned expanded_x1 = align(push->dst_offset_el[0] + w_el, TILE_WIDTH);
928    unsigned expanded_y1 = align(push->dst_offset_el[1] + h_el, TILE_HEIGHT);
929 
930    /* TODO: clamp to the destination size to save some redundant threads? */
931 
932    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
933                           VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(*push), push);
934 
935    disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
936                      (expanded_x1 - expanded_x0) / TILE_WIDTH,
937                      (expanded_y1 - expanded_y0) / TILE_HEIGHT,
938                      push->grid_el[2]);
939 }
940 
941 static void
hk_meta_copy_buffer_to_image2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct VkCopyBufferToImageInfo2 * info)942 hk_meta_copy_buffer_to_image2(struct vk_command_buffer *cmd,
943                               struct vk_meta_device *meta,
944                               const struct VkCopyBufferToImageInfo2 *info)
945 {
946    VK_FROM_HANDLE(vk_image, image, info->dstImage);
947    VK_FROM_HANDLE(hk_buffer, buffer, info->srcBuffer);
948 
949    struct vk_device *device = cmd->base.device;
950    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
951 
952    VkDescriptorSetLayout set_layout;
953    VkResult result =
954       get_image_copy_descriptor_set_layout(device, meta, &set_layout, BUF2IMG);
955    if (unlikely(result != VK_SUCCESS)) {
956       vk_command_buffer_set_error(cmd, result);
957       return;
958    }
959 
960    bool per_layer =
961       util_format_is_compressed(vk_format_to_pipe_format(image->format));
962 
963    for (unsigned r = 0; r < info->regionCount; ++r) {
964       const VkBufferImageCopy2 *region = &info->pRegions[r];
965 
966       unsigned layers = MAX2(
967          region->imageExtent.depth,
968          vk_image_subresource_layer_count(image, &region->imageSubresource));
969       unsigned layer_iters = per_layer ? layers : 1;
970 
971       for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
972          VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
973          VkFormat aspect_fmt = aspect_format(image->format, aspect);
974          VkFormat canonical = canonical_format(aspect_fmt);
975          enum pipe_format p_format = vk_format_to_pipe_format(aspect_fmt);
976          uint32_t blocksize_B = util_format_get_blocksize(p_format);
977          bool is_3d = region->imageExtent.depth > 1;
978 
979          struct vk_meta_image_copy_key key = {
980             .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
981             .type = BUF2IMG,
982             .block_size = blocksize_B,
983             .nr_samples = image->samples,
984             .src_format = vk_format_to_pipe_format(canonical),
985             .dst_format = canonical_format_pipe(
986                vk_format_to_pipe_format(aspect_format(image->format, aspect)),
987                false),
988 
989             /* TODO: MSAA path */
990             .block_based =
991                (image->image_type != VK_IMAGE_TYPE_1D) && image->samples == 1,
992          };
993 
994          VkPipelineLayout pipeline_layout;
995          result = get_image_copy_pipeline_layout(device, meta, &key, set_layout,
996                                                  &pipeline_layout, true);
997          if (unlikely(result != VK_SUCCESS)) {
998             vk_command_buffer_set_error(cmd, result);
999             return;
1000          }
1001 
1002          unsigned row_extent = util_format_get_nblocksx(
1003                                   p_format, MAX2(region->bufferRowLength,
1004                                                  region->imageExtent.width)) *
1005                                blocksize_B;
1006          unsigned slice_extent =
1007             util_format_get_nblocksy(
1008                p_format,
1009                MAX2(region->bufferImageHeight, region->imageExtent.height)) *
1010             row_extent;
1011          unsigned layer_extent =
1012             util_format_get_nblocksz(p_format, region->imageExtent.depth) *
1013             slice_extent;
1014 
1015          VkImageView dst_view;
1016          const VkImageViewUsageCreateInfo dst_view_usage = {
1017             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1018             .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1019          };
1020          const VkImageViewCreateInfo dst_view_info = {
1021             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1022             .flags = VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
1023             .pNext = &dst_view_usage,
1024             .image = info->dstImage,
1025             .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1026             .format = canonical,
1027             .subresourceRange =
1028                {
1029                   .aspectMask = region->imageSubresource.aspectMask,
1030                   .baseMipLevel = region->imageSubresource.mipLevel,
1031                   .baseArrayLayer =
1032                      MAX2(region->imageOffset.z,
1033                           region->imageSubresource.baseArrayLayer) +
1034                      layer_offs,
1035                   .layerCount = per_layer ? 1 : layers,
1036                   .levelCount = 1,
1037                },
1038          };
1039 
1040          result =
1041             vk_meta_create_image_view(cmd, meta, &dst_view_info, &dst_view);
1042          if (unlikely(result != VK_SUCCESS)) {
1043             vk_command_buffer_set_error(cmd, result);
1044             return;
1045          }
1046 
1047          const VkDescriptorImageInfo dst_info = {
1048             .imageView = dst_view,
1049             .imageLayout = info->dstImageLayout,
1050          };
1051 
1052          VkWriteDescriptorSet desc_write = {
1053             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1054             .dstSet = 0,
1055             .dstBinding = BINDING_OUTPUT,
1056             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1057             .descriptorCount = 1,
1058             .pImageInfo = &dst_info,
1059          };
1060 
1061          disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1062                                        VK_PIPELINE_BIND_POINT_COMPUTE,
1063                                        pipeline_layout, 0, 1, &desc_write);
1064 
1065          VkPipeline pipeline;
1066          result = get_image_copy_pipeline(device, meta, &key, pipeline_layout,
1067                                           &pipeline);
1068          if (unlikely(result != VK_SUCCESS)) {
1069             vk_command_buffer_set_error(cmd, result);
1070             return;
1071          }
1072 
1073          disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1074                                VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1075 
1076          struct vk_meta_push_data push = {
1077             .buffer = hk_buffer_address(buffer, region->bufferOffset),
1078             .row_extent = row_extent,
1079             .slice_or_layer_extent = is_3d ? slice_extent : layer_extent,
1080          };
1081 
1082          push.buffer += push.slice_or_layer_extent * layer_offs;
1083 
1084          hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
1085                                    region->imageOffset, region->imageExtent,
1086                                    per_layer, layers, p_format, p_format);
1087       }
1088    }
1089 }
1090 
1091 static void
hk_meta_copy_image2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct VkCopyImageInfo2 * info)1092 hk_meta_copy_image2(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1093                     const struct VkCopyImageInfo2 *info)
1094 {
1095    VK_FROM_HANDLE(vk_image, src_image, info->srcImage);
1096    VK_FROM_HANDLE(vk_image, dst_image, info->dstImage);
1097 
1098    struct vk_device *device = cmd->base.device;
1099    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
1100 
1101    VkDescriptorSetLayout set_layout;
1102    VkResult result =
1103       get_image_copy_descriptor_set_layout(device, meta, &set_layout, BUF2IMG);
1104    if (unlikely(result != VK_SUCCESS)) {
1105       vk_command_buffer_set_error(cmd, result);
1106       return;
1107    }
1108 
1109    bool per_layer =
1110       util_format_is_compressed(vk_format_to_pipe_format(src_image->format)) ||
1111       util_format_is_compressed(vk_format_to_pipe_format(dst_image->format));
1112 
1113    for (unsigned r = 0; r < info->regionCount; ++r) {
1114       const VkImageCopy2 *region = &info->pRegions[r];
1115 
1116       unsigned layers = MAX2(
1117          vk_image_subresource_layer_count(src_image, &region->srcSubresource),
1118          region->extent.depth);
1119       unsigned layer_iters = per_layer ? layers : 1;
1120 
1121       for (unsigned layer_offs = 0; layer_offs < layer_iters; ++layer_offs) {
1122          u_foreach_bit(aspect, region->srcSubresource.aspectMask) {
1123             /* We use the source format throughout for consistent scaling with
1124              * compressed<-->uncompressed copies, where the extents are defined
1125              * to follow the source.
1126              */
1127             VkFormat aspect_fmt = aspect_format(src_image->format, 1 << aspect);
1128             VkFormat canonical = canonical_format(aspect_fmt);
1129             uint32_t blocksize_B =
1130                util_format_get_blocksize(vk_format_to_pipe_format(canonical));
1131 
1132             VkImageAspectFlagBits dst_aspect_mask =
1133                vk_format_get_ycbcr_info(dst_image->format) ||
1134                      vk_format_get_ycbcr_info(src_image->format)
1135                   ? region->dstSubresource.aspectMask
1136                   : (1 << aspect);
1137 
1138             struct vk_meta_image_copy_key key = {
1139                .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
1140                .type = IMG2IMG,
1141                .block_size = blocksize_B,
1142                .nr_samples = dst_image->samples,
1143                .src_format = vk_format_to_pipe_format(canonical),
1144                .dst_format =
1145                   canonical_format_pipe(vk_format_to_pipe_format(aspect_format(
1146                                            dst_image->format, dst_aspect_mask)),
1147                                         false),
1148 
1149                /* TODO: MSAA path */
1150                .block_based = (dst_image->image_type != VK_IMAGE_TYPE_1D) &&
1151                               dst_image->samples == 1,
1152             };
1153 
1154             assert(key.nr_samples == src_image->samples);
1155 
1156             VkPipelineLayout pipeline_layout;
1157             result = get_image_copy_pipeline_layout(
1158                device, meta, &key, set_layout, &pipeline_layout, true);
1159             if (unlikely(result != VK_SUCCESS)) {
1160                vk_command_buffer_set_error(cmd, result);
1161                return;
1162             }
1163 
1164             VkWriteDescriptorSet desc_writes[2];
1165 
1166             VkImageView src_view;
1167             const VkImageViewUsageCreateInfo src_view_usage = {
1168                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1169                .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
1170             };
1171             const VkImageViewCreateInfo src_view_info = {
1172                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1173                .flags = VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
1174                .pNext = &src_view_usage,
1175                .image = info->srcImage,
1176                .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1177                .format = canonical,
1178                .subresourceRange =
1179                   {
1180                      .aspectMask =
1181                         region->srcSubresource.aspectMask & (1 << aspect),
1182                      .baseMipLevel = region->srcSubresource.mipLevel,
1183                      .baseArrayLayer =
1184                         MAX2(region->srcOffset.z,
1185                              region->srcSubresource.baseArrayLayer) +
1186                         layer_offs,
1187                      .layerCount = per_layer ? 1 : layers,
1188                      .levelCount = 1,
1189                   },
1190             };
1191 
1192             result =
1193                vk_meta_create_image_view(cmd, meta, &src_view_info, &src_view);
1194             if (unlikely(result != VK_SUCCESS)) {
1195                vk_command_buffer_set_error(cmd, result);
1196                return;
1197             }
1198 
1199             VkDescriptorImageInfo src_info = {
1200                .imageLayout = info->srcImageLayout,
1201                .imageView = src_view,
1202             };
1203 
1204             VkImageView dst_view;
1205             const VkImageViewUsageCreateInfo dst_view_usage = {
1206                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
1207                .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1208             };
1209             const VkImageViewCreateInfo dst_view_info = {
1210                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1211                .flags = VK_IMAGE_VIEW_CREATE_INTERNAL_MESA,
1212                .pNext = &dst_view_usage,
1213                .image = info->dstImage,
1214                .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
1215                .format = vk_format_from_pipe_format(key.dst_format),
1216                .subresourceRange =
1217                   {
1218                      .aspectMask = dst_aspect_mask,
1219                      .baseMipLevel = region->dstSubresource.mipLevel,
1220                      .baseArrayLayer =
1221                         MAX2(region->dstOffset.z,
1222                              region->dstSubresource.baseArrayLayer) +
1223                         layer_offs,
1224                      .layerCount = per_layer ? 1 : layers,
1225                      .levelCount = 1,
1226                   },
1227             };
1228 
1229             result =
1230                vk_meta_create_image_view(cmd, meta, &dst_view_info, &dst_view);
1231             if (unlikely(result != VK_SUCCESS)) {
1232                vk_command_buffer_set_error(cmd, result);
1233                return;
1234             }
1235 
1236             const VkDescriptorImageInfo dst_info = {
1237                .imageView = dst_view,
1238                .imageLayout = info->dstImageLayout,
1239             };
1240 
1241             desc_writes[0] = (VkWriteDescriptorSet){
1242                .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1243                .dstSet = 0,
1244                .dstBinding = BINDING_OUTPUT,
1245                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1246                .descriptorCount = 1,
1247                .pImageInfo = &dst_info,
1248             };
1249 
1250             desc_writes[1] = (VkWriteDescriptorSet){
1251                .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1252                .dstSet = 0,
1253                .dstBinding = BINDING_INPUT,
1254                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1255                .descriptorCount = 1,
1256                .pImageInfo = &src_info,
1257             };
1258 
1259             disp->CmdPushDescriptorSetKHR(
1260                vk_command_buffer_to_handle(cmd), VK_PIPELINE_BIND_POINT_COMPUTE,
1261                pipeline_layout, 0, ARRAY_SIZE(desc_writes), desc_writes);
1262 
1263             VkPipeline pipeline;
1264             result = get_image_copy_pipeline(device, meta, &key,
1265                                              pipeline_layout, &pipeline);
1266             if (unlikely(result != VK_SUCCESS)) {
1267                vk_command_buffer_set_error(cmd, result);
1268                return;
1269             }
1270 
1271             disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1272                                   VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1273 
1274             enum pipe_format p_src_fmt =
1275                vk_format_to_pipe_format(src_image->format);
1276             enum pipe_format p_dst_fmt =
1277                vk_format_to_pipe_format(dst_image->format);
1278             enum pipe_format p_format = vk_format_to_pipe_format(aspect_fmt);
1279 
1280             struct vk_meta_push_data push = {
1281                .src_offset_el[0] =
1282                   util_format_get_nblocksx(p_src_fmt, region->srcOffset.x),
1283                .src_offset_el[1] =
1284                   util_format_get_nblocksy(p_src_fmt, region->srcOffset.y),
1285             };
1286 
1287             hk_meta_dispatch_to_image(cmd, disp, pipeline_layout, &push,
1288                                       region->dstOffset, region->extent,
1289                                       per_layer, layers, p_dst_fmt, p_format);
1290          }
1291       }
1292    }
1293 }
1294 
1295 struct vk_meta_image_to_buffer_push_data {
1296    uint32_t dest_offset_el;
1297 };
1298 
1299 #define get_image_push(b, name)                                                \
1300    nir_load_push_constant(                                                     \
1301       b, 1, sizeof(((struct vk_meta_image_to_buffer_push_data *)0)->name) * 8, \
1302       nir_imm_int(b,                                                           \
1303                   offsetof(struct vk_meta_image_to_buffer_push_data, name)))
1304 
1305 enum copy_source {
1306    COPY_SOURCE_PATTERN,
1307    COPY_SOURCE_BUFFER,
1308 };
1309 
1310 struct vk_meta_buffer_copy_key {
1311    enum vk_meta_object_key_type key_type;
1312    enum copy_source source;
1313 
1314    /* Power-of-two block size for the transfer, range [1, 16] */
1315    uint8_t blocksize;
1316    uint8_t pad[3];
1317 };
1318 static_assert(sizeof(struct vk_meta_buffer_copy_key) == 12, "packed");
1319 
1320 /* XXX: TODO: move to common */
1321 /* Copyright © Microsoft Corporation */
1322 static nir_def *
dzn_nir_create_bo_desc(nir_builder * b,nir_variable_mode mode,uint32_t desc_set,uint32_t binding,const char * name,unsigned access,const struct glsl_type * dummy_type)1323 dzn_nir_create_bo_desc(nir_builder *b, nir_variable_mode mode,
1324                        uint32_t desc_set, uint32_t binding, const char *name,
1325                        unsigned access, const struct glsl_type *dummy_type)
1326 {
1327    nir_variable *var = nir_variable_create(b->shader, mode, dummy_type, name);
1328    var->data.descriptor_set = desc_set;
1329    var->data.binding = binding;
1330    var->data.access = access;
1331 
1332    assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo);
1333    if (mode == nir_var_mem_ubo)
1334       b->shader->info.num_ubos++;
1335    else
1336       b->shader->info.num_ssbos++;
1337 
1338    VkDescriptorType desc_type = var->data.mode == nir_var_mem_ubo
1339                                    ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
1340                                    : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1341    nir_address_format addr_format =
1342       nir_address_format_64bit_global_32bit_offset; /* XXX */
1343    nir_def *index = nir_vulkan_resource_index(
1344       b, nir_address_format_num_components(addr_format),
1345       nir_address_format_bit_size(addr_format), nir_imm_int(b, 0),
1346       .desc_set = desc_set, .binding = binding, .desc_type = desc_type);
1347 
1348    nir_def *desc = nir_load_vulkan_descriptor(
1349       b, nir_address_format_num_components(addr_format),
1350       nir_address_format_bit_size(addr_format), index, .desc_type = desc_type);
1351 
1352    return desc;
1353 }
1354 
1355 static const struct glsl_type *
type_for_blocksize(uint8_t blocksize)1356 type_for_blocksize(uint8_t blocksize)
1357 {
1358    assert(util_is_power_of_two_nonzero(blocksize) && blocksize <= 16);
1359 
1360    if (blocksize > 4)
1361       return glsl_vector_type(GLSL_TYPE_UINT, blocksize / 4);
1362    else
1363       return glsl_uintN_t_type(8 * blocksize);
1364 }
1365 
1366 static nir_shader *
build_buffer_copy_shader(const struct vk_meta_buffer_copy_key * key)1367 build_buffer_copy_shader(const struct vk_meta_buffer_copy_key *key)
1368 {
1369    nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
1370                                                       "vk-meta-copy-to-buffer");
1371    nir_builder *b = &build;
1372 
1373    const struct glsl_type *type =
1374       glsl_array_type(type_for_blocksize(key->blocksize), 0, key->blocksize);
1375 
1376    nir_def *index = nir_channel(b, nir_load_global_invocation_id(b, 32), 0);
1377    nir_def *value;
1378 
1379    if (key->source == COPY_SOURCE_BUFFER) {
1380       nir_def *ubo =
1381          dzn_nir_create_bo_desc(b, nir_var_mem_ubo, 0, BINDING_INPUT, "source",
1382                                 ACCESS_NON_WRITEABLE, type);
1383       nir_deref_instr *ubo_deref =
1384          nir_build_deref_cast(b, ubo, nir_var_mem_ubo, type, key->blocksize);
1385 
1386       nir_deref_instr *element_deref = nir_build_deref_array(
1387          b, ubo_deref, nir_u2uN(b, index, ubo_deref->def.bit_size));
1388 
1389       value = nir_load_deref(b, element_deref);
1390    } else {
1391       nir_def *pattern = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0));
1392 
1393       assert(key->blocksize >= 4 && "fills at least 32-bit");
1394       value = nir_replicate(b, pattern, key->blocksize / 4);
1395    }
1396 
1397    /* Write out raw bytes to SSBO */
1398    nir_def *ssbo =
1399       dzn_nir_create_bo_desc(b, nir_var_mem_ssbo, 0, BINDING_OUTPUT,
1400                              "destination", ACCESS_NON_READABLE, type);
1401 
1402    nir_deref_instr *ssbo_deref =
1403       nir_build_deref_cast(b, ssbo, nir_var_mem_ssbo, type, key->blocksize);
1404 
1405    nir_deref_instr *element_deref = nir_build_deref_array(
1406       b, ssbo_deref, nir_u2uN(b, index, ssbo_deref->def.bit_size));
1407 
1408    nir_store_deref(b, element_deref, value,
1409                    nir_component_mask(value->num_components));
1410 
1411    return b->shader;
1412 }
1413 
1414 static VkResult
get_buffer_copy_descriptor_set_layout(struct vk_device * device,struct vk_meta_device * meta,VkDescriptorSetLayout * layout_out,enum copy_source source)1415 get_buffer_copy_descriptor_set_layout(struct vk_device *device,
1416                                       struct vk_meta_device *meta,
1417                                       VkDescriptorSetLayout *layout_out,
1418                                       enum copy_source source)
1419 {
1420    const char buffer_key[] = "vk-meta-buffer-copy-descriptor-set-layout";
1421    const char fill_key[] = "vk-meta-fill__-copy-descriptor-set-layout";
1422 
1423    static_assert(sizeof(buffer_key) == sizeof(fill_key));
1424    const char *key = source == COPY_SOURCE_BUFFER ? buffer_key : fill_key;
1425 
1426    VkDescriptorSetLayout from_cache =
1427       vk_meta_lookup_descriptor_set_layout(meta, key, sizeof(buffer_key));
1428    if (from_cache != VK_NULL_HANDLE) {
1429       *layout_out = from_cache;
1430       return VK_SUCCESS;
1431    }
1432 
1433    const VkDescriptorSetLayoutBinding bindings[] = {
1434       {
1435          .binding = BINDING_OUTPUT,
1436          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1437          .descriptorCount = 1,
1438          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1439       },
1440       {
1441          .binding = BINDING_INPUT,
1442          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1443          .descriptorCount = 1,
1444          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1445       },
1446    };
1447 
1448    const VkDescriptorSetLayoutCreateInfo info = {
1449       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1450       .bindingCount = ARRAY_SIZE(bindings),
1451       .pBindings = bindings,
1452    };
1453 
1454    return vk_meta_create_descriptor_set_layout(device, meta, &info, key,
1455                                                sizeof(key), layout_out);
1456 }
1457 
1458 static VkResult
get_buffer_copy_pipeline_layout(struct vk_device * device,struct vk_meta_device * meta,struct vk_meta_buffer_copy_key * key,VkDescriptorSetLayout set_layout,VkPipelineLayout * layout_out)1459 get_buffer_copy_pipeline_layout(struct vk_device *device,
1460                                 struct vk_meta_device *meta,
1461                                 struct vk_meta_buffer_copy_key *key,
1462                                 VkDescriptorSetLayout set_layout,
1463                                 VkPipelineLayout *layout_out)
1464 {
1465    const char copy_key[] = "vk-meta-buffer-copy-pipeline-layout";
1466    const char fill_key[] = "vk-meta-buffer-fill-pipeline-layout";
1467    const char cimg_key[] = "vk-meta-buffer-cimg-pipeline-layout";
1468 
1469    STATIC_ASSERT(sizeof(copy_key) == sizeof(fill_key));
1470    STATIC_ASSERT(sizeof(copy_key) == sizeof(cimg_key));
1471    const char *pipeline_key =
1472       key->source == COPY_SOURCE_BUFFER ? copy_key : fill_key;
1473 
1474    VkPipelineLayout from_cache =
1475       vk_meta_lookup_pipeline_layout(meta, pipeline_key, sizeof(copy_key));
1476    if (from_cache != VK_NULL_HANDLE) {
1477       *layout_out = from_cache;
1478       return VK_SUCCESS;
1479    }
1480 
1481    VkPipelineLayoutCreateInfo info = {
1482       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1483       .setLayoutCount = 1,
1484       .pSetLayouts = &set_layout,
1485    };
1486 
1487    size_t push_size = 0;
1488    if (key->source == COPY_SOURCE_PATTERN)
1489       push_size = sizeof(uint32_t);
1490 
1491    const VkPushConstantRange push_range = {
1492       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1493       .offset = 0,
1494       .size = push_size,
1495    };
1496 
1497    if (push_size) {
1498       info.pushConstantRangeCount = 1;
1499       info.pPushConstantRanges = &push_range;
1500    }
1501 
1502    return vk_meta_create_pipeline_layout(device, meta, &info, pipeline_key,
1503                                          sizeof(copy_key), layout_out);
1504 }
1505 
1506 static VkResult
get_buffer_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_buffer_copy_key * key,VkPipelineLayout layout,VkPipeline * pipeline_out)1507 get_buffer_copy_pipeline(struct vk_device *device, struct vk_meta_device *meta,
1508                          const struct vk_meta_buffer_copy_key *key,
1509                          VkPipelineLayout layout, VkPipeline *pipeline_out)
1510 {
1511    VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key, sizeof(*key));
1512    if (from_cache != VK_NULL_HANDLE) {
1513       *pipeline_out = from_cache;
1514       return VK_SUCCESS;
1515    }
1516 
1517    const VkPipelineShaderStageNirCreateInfoMESA nir_info = {
1518       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
1519       .nir = build_buffer_copy_shader(key),
1520    };
1521    const VkPipelineShaderStageCreateInfo cs_info = {
1522       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1523       .pNext = &nir_info,
1524       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1525       .pName = "main",
1526    };
1527 
1528    const VkComputePipelineCreateInfo info = {
1529       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1530       .stage = cs_info,
1531       .layout = layout,
1532    };
1533 
1534    VkResult result = vk_meta_create_compute_pipeline(
1535       device, meta, &info, key, sizeof(*key), pipeline_out);
1536    ralloc_free(nir_info.nir);
1537 
1538    return result;
1539 }
1540 
1541 static unsigned
alignment_of(unsigned x)1542 alignment_of(unsigned x)
1543 {
1544    return 1 << MIN2(__builtin_ctz(x), 31);
1545 }
1546 
1547 struct copy_desc {
1548    enum copy_source source;
1549 
1550    union {
1551       uint32_t pattern;
1552 
1553       struct {
1554          struct vk_buffer *source;
1555          VkDeviceSize srcOffset;
1556       } buffer;
1557 
1558       struct {
1559          struct vk_image *image;
1560          VkDescriptorImageInfo *info;
1561          VkFormat format;
1562          struct vk_meta_image_to_buffer_push_data push;
1563       } image;
1564    };
1565 };
1566 
1567 static void
do_copy(struct vk_command_buffer * cmd,struct vk_meta_device * meta,size_t size,struct vk_buffer * dest,VkDeviceSize dstOffset,struct copy_desc * desc)1568 do_copy(struct vk_command_buffer *cmd, struct vk_meta_device *meta, size_t size,
1569         struct vk_buffer *dest, VkDeviceSize dstOffset, struct copy_desc *desc)
1570 {
1571    struct vk_device *device = cmd->base.device;
1572    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
1573    VkResult result;
1574 
1575    /* The "alignment" of the copy is the maximum alignment that all accesses
1576     * within the copy will satsify.
1577     */
1578    unsigned alignment = MIN2(alignment_of(dstOffset), alignment_of(size));
1579 
1580    if (desc->source == COPY_SOURCE_BUFFER)
1581       alignment = MIN2(alignment, alignment_of(desc->buffer.srcOffset));
1582 
1583    struct vk_meta_buffer_copy_key key = {
1584       .key_type = VK_META_OBJECT_KEY_FILL_PIPELINE,
1585       .source = desc->source,
1586       .blocksize = MIN2(alignment, 16),
1587    };
1588 
1589    VkDescriptorSetLayout set_layout;
1590    result = get_buffer_copy_descriptor_set_layout(device, meta, &set_layout,
1591                                                   desc->source);
1592    if (unlikely(result != VK_SUCCESS)) {
1593       vk_command_buffer_set_error(cmd, result);
1594       return;
1595    }
1596 
1597    VkPipelineLayout pipeline_layout;
1598    result = get_buffer_copy_pipeline_layout(device, meta, &key, set_layout,
1599                                             &pipeline_layout);
1600    if (unlikely(result != VK_SUCCESS)) {
1601       vk_command_buffer_set_error(cmd, result);
1602       return;
1603    }
1604 
1605    VkDescriptorBufferInfo buffer_infos[2];
1606    VkWriteDescriptorSet desc_writes[2];
1607 
1608    for (unsigned i = 0; i < 2; ++i) {
1609       bool is_dest = (i == BINDING_OUTPUT);
1610 
1611       if (!is_dest && desc->source != COPY_SOURCE_BUFFER)
1612          continue;
1613 
1614       buffer_infos[i] = (VkDescriptorBufferInfo){
1615          .buffer = vk_buffer_to_handle(is_dest ? dest : desc->buffer.source),
1616          .offset = is_dest ? dstOffset : desc->buffer.srcOffset,
1617          .range = size,
1618       };
1619 
1620       desc_writes[i] = (VkWriteDescriptorSet){
1621          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1622          .dstSet = 0,
1623          .dstBinding = i,
1624          .descriptorType = is_dest ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
1625                                    : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1626          .descriptorCount = 1,
1627          .pBufferInfo = &buffer_infos[i],
1628       };
1629    }
1630 
1631    unsigned desc_count = desc->source == COPY_SOURCE_PATTERN ? 1 : 2;
1632    disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1633                                  VK_PIPELINE_BIND_POINT_COMPUTE,
1634                                  pipeline_layout, 0, desc_count, desc_writes);
1635 
1636    VkPipeline pipeline;
1637    result =
1638       get_buffer_copy_pipeline(device, meta, &key, pipeline_layout, &pipeline);
1639    if (unlikely(result != VK_SUCCESS)) {
1640       vk_command_buffer_set_error(cmd, result);
1641       return;
1642    }
1643 
1644    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1645                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1646 
1647    if (desc->source == COPY_SOURCE_PATTERN) {
1648       disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1649                              VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t),
1650                              &desc->pattern);
1651    }
1652 
1653    disp->CmdDispatch(vk_command_buffer_to_handle(cmd), size / key.blocksize, 1,
1654                      1);
1655 }
1656 
1657 static void
hk_meta_fill_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_buffer * dest,VkDeviceSize dstOffset,VkDeviceSize dstRange,uint32_t data)1658 hk_meta_fill_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1659                     struct vk_buffer *dest, VkDeviceSize dstOffset,
1660                     VkDeviceSize dstRange, uint32_t data)
1661 {
1662    size_t size = ROUND_DOWN_TO(vk_buffer_range(dest, dstOffset, dstRange), 4);
1663    dstOffset = ROUND_DOWN_TO(dstOffset, 4);
1664 
1665    do_copy(cmd, meta, size, dest, dstOffset,
1666            &(struct copy_desc){
1667               .source = COPY_SOURCE_PATTERN,
1668               .pattern = data,
1669            });
1670 }
1671 
1672 static void
hk_meta_update_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_buffer * dest,VkDeviceSize dstOffset,VkDeviceSize dstRange,const void * data)1673 hk_meta_update_buffer(struct vk_command_buffer *cmd,
1674                       struct vk_meta_device *meta, struct vk_buffer *dest,
1675                       VkDeviceSize dstOffset, VkDeviceSize dstRange,
1676                       const void *data)
1677 {
1678    /* Create a buffer to hold the data */
1679    const VkBufferCreateInfo info = {
1680       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1681       .size = vk_buffer_range(dest, dstOffset, dstRange),
1682       .usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
1683       .queueFamilyIndexCount = 1,
1684       .pQueueFamilyIndices = &cmd->pool->queue_family_index,
1685    };
1686 
1687    VkBuffer buffer;
1688    VkResult result = vk_meta_create_buffer(cmd, meta, &info, &buffer);
1689    if (unlikely(result != VK_SUCCESS))
1690       return;
1691 
1692    /* Map the buffer for CPU access */
1693    void *map;
1694    result = meta->cmd_bind_map_buffer(cmd, meta, buffer, &map);
1695    if (unlikely(result != VK_SUCCESS))
1696       return;
1697 
1698    /* Copy from the CPU input to the staging buffer */
1699    memcpy(map, data, info.size);
1700 
1701    /* Copy between the buffers on the GPU */
1702    VK_FROM_HANDLE(vk_buffer, buffer_, buffer);
1703    size_t size = ROUND_DOWN_TO(vk_buffer_range(dest, dstOffset, dstRange), 4);
1704    dstOffset = ROUND_DOWN_TO(dstOffset, 4);
1705 
1706    do_copy(cmd, meta, size, dest, dstOffset,
1707            &(struct copy_desc){
1708               .source = COPY_SOURCE_BUFFER,
1709               .buffer.source = buffer_,
1710            });
1711 }
1712 
1713 static void
hk_meta_copy_buffer2(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyBufferInfo2 * pCopyBufferInfo)1714 hk_meta_copy_buffer2(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1715                      const VkCopyBufferInfo2 *pCopyBufferInfo)
1716 {
1717    VK_FROM_HANDLE(vk_buffer, dst, pCopyBufferInfo->dstBuffer);
1718    VK_FROM_HANDLE(vk_buffer, src, pCopyBufferInfo->srcBuffer);
1719 
1720    for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) {
1721       const VkBufferCopy2 *copy = &pCopyBufferInfo->pRegions[i];
1722 
1723       do_copy(cmd, meta, copy->size, dst, copy->dstOffset,
1724               &(struct copy_desc){
1725                  .source = COPY_SOURCE_BUFFER,
1726                  .buffer.source = src,
1727                  .buffer.srcOffset = copy->srcOffset,
1728               });
1729    }
1730 }
1731 
1732 VKAPI_ATTR void VKAPI_CALL
hk_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)1733 hk_CmdBlitImage2(VkCommandBuffer commandBuffer,
1734                  const VkBlitImageInfo2 *pBlitImageInfo)
1735 {
1736    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1737    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1738 
1739    struct hk_meta_save save;
1740    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1741    vk_meta_blit_image2(&cmd->vk, &dev->meta, pBlitImageInfo);
1742    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1743 }
1744 
1745 VKAPI_ATTR void VKAPI_CALL
hk_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1746 hk_CmdResolveImage2(VkCommandBuffer commandBuffer,
1747                     const VkResolveImageInfo2 *pResolveImageInfo)
1748 {
1749    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1750    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1751 
1752    struct hk_meta_save save;
1753    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1754    vk_meta_resolve_image2(&cmd->vk, &dev->meta, pResolveImageInfo);
1755    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1756 }
1757 
1758 void
hk_meta_resolve_rendering(struct hk_cmd_buffer * cmd,const VkRenderingInfo * pRenderingInfo)1759 hk_meta_resolve_rendering(struct hk_cmd_buffer *cmd,
1760                           const VkRenderingInfo *pRenderingInfo)
1761 {
1762    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1763 
1764    struct hk_meta_save save;
1765    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1766    vk_meta_resolve_rendering(&cmd->vk, &dev->meta, pRenderingInfo);
1767    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1768 }
1769 
1770 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1771 hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1772                   const VkCopyBufferInfo2 *pCopyBufferInfo)
1773 {
1774    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1775    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1776 
1777    struct hk_meta_save save;
1778    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1779    hk_meta_copy_buffer2(&cmd->vk, &dev->meta, pCopyBufferInfo);
1780    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1781 }
1782 
1783 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)1784 hk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
1785                          const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1786 {
1787    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1788    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1789 
1790    struct hk_meta_save save;
1791    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1792    hk_meta_copy_buffer_to_image2(&cmd->vk, &dev->meta, pCopyBufferToImageInfo);
1793    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1794 }
1795 
1796 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1797 hk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
1798                          const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1799 {
1800    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1801    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1802 
1803    struct hk_meta_save save;
1804    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1805    hk_meta_copy_image_to_buffer2(&cmd->vk, &dev->meta, pCopyImageToBufferInfo);
1806    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1807 }
1808 
1809 VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)1810 hk_CmdCopyImage2(VkCommandBuffer commandBuffer,
1811                  const VkCopyImageInfo2 *pCopyImageInfo)
1812 {
1813    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1814    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1815 
1816    struct hk_meta_save save;
1817    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1818    hk_meta_copy_image2(&cmd->vk, &dev->meta, pCopyImageInfo);
1819    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1820 }
1821 
1822 VKAPI_ATTR void VKAPI_CALL
hk_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dstRange,uint32_t data)1823 hk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1824                  VkDeviceSize dstOffset, VkDeviceSize dstRange, uint32_t data)
1825 {
1826    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1827    VK_FROM_HANDLE(vk_buffer, buffer, dstBuffer);
1828    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1829 
1830    struct hk_meta_save save;
1831    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1832    hk_meta_fill_buffer(&cmd->vk, &dev->meta, buffer, dstOffset, dstRange, data);
1833    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1834 }
1835 
1836 VKAPI_ATTR void VKAPI_CALL
hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dstRange,const void * pData)1837 hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
1838                    VkDeviceSize dstOffset, VkDeviceSize dstRange,
1839                    const void *pData)
1840 {
1841    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1842    VK_FROM_HANDLE(vk_buffer, buffer, dstBuffer);
1843    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1844 
1845    struct hk_meta_save save;
1846    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1847    hk_meta_update_buffer(&cmd->vk, &dev->meta, buffer, dstOffset, dstRange,
1848                          pData);
1849    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
1850 }
1851 
1852 VKAPI_ATTR void VKAPI_CALL
hk_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1853 hk_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
1854                        const VkClearAttachment *pAttachments,
1855                        uint32_t rectCount, const VkClearRect *pRects)
1856 {
1857    VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
1858    struct hk_device *dev = hk_cmd_buffer_device(cmd);
1859 
1860    struct vk_meta_rendering_info render_info;
1861    hk_meta_init_render(cmd, &render_info);
1862 
1863    struct hk_meta_save save;
1864    hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1865    vk_meta_clear_attachments(&cmd->vk, &dev->meta, &render_info,
1866                              attachmentCount, pAttachments, rectCount, pRects);
1867    hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
1868 }
1869