xref: /aosp_15_r20/external/mesa3d/src/vulkan/runtime/vk_meta_copy_fill_update.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir/nir_builder.h"
25 #include "nir/nir_format_convert.h"
26 
27 #include "vk_buffer.h"
28 #include "vk_command_buffer.h"
29 #include "vk_command_pool.h"
30 #include "vk_device.h"
31 #include "vk_format.h"
32 #include "vk_meta.h"
33 #include "vk_meta_private.h"
34 #include "vk_physical_device.h"
35 #include "vk_pipeline.h"
36 
37 #include "util/format/u_format.h"
38 
39 struct vk_meta_fill_buffer_key {
40    enum vk_meta_object_key_type key_type;
41 };
42 
43 struct vk_meta_copy_buffer_key {
44    enum vk_meta_object_key_type key_type;
45 
46    uint32_t chunk_size;
47 };
48 
49 struct vk_meta_copy_image_view {
50    VkImageViewType type;
51 
52    union {
53       struct {
54          VkFormat format;
55       } color;
56       struct {
57          struct {
58             VkFormat format;
59             nir_component_mask_t component_mask;
60          } depth, stencil;
61       };
62    };
63 };
64 
65 struct vk_meta_copy_buffer_image_key {
66    enum vk_meta_object_key_type key_type;
67 
68    VkPipelineBindPoint bind_point;
69 
70    struct {
71       struct vk_meta_copy_image_view view;
72 
73       VkImageAspectFlagBits aspect;
74    } img;
75 
76    uint32_t wg_size[3];
77 };
78 
79 struct vk_meta_copy_image_key {
80    enum vk_meta_object_key_type key_type;
81 
82    VkPipelineBindPoint bind_point;
83 
84    /* One source per-aspect being copied. */
85    struct {
86       struct vk_meta_copy_image_view view;
87    } src, dst;
88 
89    VkImageAspectFlagBits aspects;
90    VkSampleCountFlagBits samples;
91 
92    uint32_t wg_size[3];
93 };
94 
95 #define load_info(__b, __type, __field_name)                                   \
96    nir_load_push_constant((__b), 1,                                            \
97                           sizeof(((__type *)NULL)->__field_name) * 8,          \
98                           nir_imm_int(b, offsetof(__type, __field_name)))
99 
100 struct vk_meta_fill_buffer_info {
101    uint64_t buf_addr;
102    uint32_t data;
103    uint32_t size;
104 };
105 
106 struct vk_meta_copy_buffer_info {
107    uint64_t src_addr;
108    uint64_t dst_addr;
109    uint32_t size;
110 };
111 
112 struct vk_meta_copy_buffer_image_info {
113    struct {
114       uint64_t addr;
115       uint32_t row_stride;
116       uint32_t image_stride;
117    } buf;
118 
119    struct {
120       struct {
121          uint32_t x, y, z;
122       } offset;
123    } img;
124 
125    /* Workgroup size should be selected based on the image tile size. This
126     * means we can issue threads outside the image area we want to copy
127     * from/to. This field encodes the copy IDs that should be skipped, and
128     * also serve as an adjustment for the buffer/image coordinates. */
129    struct {
130       struct {
131          uint32_t x, y, z;
132       } start, end;
133    } copy_id_range;
134 };
135 
136 struct vk_meta_copy_image_fs_info {
137    struct {
138       int32_t x, y, z;
139    } dst_to_src_offs;
140 };
141 
142 struct vk_meta_copy_image_cs_info {
143    struct {
144       struct {
145          uint32_t x, y, z;
146       } offset;
147    } src_img, dst_img;
148 
149    /* Workgroup size should be selected based on the image tile size. This
150     * means we can issue threads outside the image area we want to copy
151     * from/to. This field encodes the copy IDs that should be skipped, and
152     * also serve as an adjustment for the buffer/image coordinates. */
153    struct {
154       struct {
155          uint32_t x, y, z;
156       } start, end;
157    } copy_id_range;
158 };
159 
160 static VkOffset3D
base_layer_as_offset(VkImageViewType view_type,VkOffset3D offset,uint32_t base_layer)161 base_layer_as_offset(VkImageViewType view_type, VkOffset3D offset,
162                      uint32_t base_layer)
163 {
164    switch (view_type) {
165    case VK_IMAGE_VIEW_TYPE_1D:
166       return (VkOffset3D){
167          .x = offset.x,
168       };
169 
170    case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
171       return (VkOffset3D){
172          .x = offset.x,
173          .y = base_layer,
174       };
175 
176    case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
177    case VK_IMAGE_VIEW_TYPE_CUBE:
178    case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
179       return (VkOffset3D){
180          .x = offset.x,
181          .y = offset.y,
182          .z = base_layer,
183       };
184 
185    case VK_IMAGE_VIEW_TYPE_2D:
186    case VK_IMAGE_VIEW_TYPE_3D:
187       return offset;
188 
189    default:
190       assert(!"Invalid view type");
191       return (VkOffset3D){0};
192    }
193 }
194 
195 static VkExtent3D
layer_count_as_extent(VkImageViewType view_type,VkExtent3D extent,uint32_t layer_count)196 layer_count_as_extent(VkImageViewType view_type, VkExtent3D extent,
197                       uint32_t layer_count)
198 {
199    switch (view_type) {
200    case VK_IMAGE_VIEW_TYPE_1D:
201       return (VkExtent3D){
202          .width = extent.width,
203          .height = 1,
204          .depth = 1,
205       };
206 
207    case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
208       return (VkExtent3D){
209          .width = extent.width,
210          .height = layer_count,
211          .depth = 1,
212       };
213 
214    case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
215    case VK_IMAGE_VIEW_TYPE_CUBE:
216    case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
217       return (VkExtent3D){
218          .width = extent.width,
219          .height = extent.height,
220          .depth = layer_count,
221       };
222 
223    case VK_IMAGE_VIEW_TYPE_2D:
224    case VK_IMAGE_VIEW_TYPE_3D:
225       return extent;
226 
227    default:
228       assert(!"Invalid view type");
229       return (VkExtent3D){0};
230    }
231 }
232 
233 #define COPY_SHADER_BINDING(__binding, __type, __stage)                        \
234    {                                                                           \
235       .binding = __binding,                                                    \
236       .descriptorCount = 1,                                                    \
237       .descriptorType = VK_DESCRIPTOR_TYPE_##__type,                           \
238       .stageFlags = VK_SHADER_STAGE_##__stage##_BIT,                           \
239    }
240 
241 static VkResult
get_copy_pipeline_layout(struct vk_device * device,struct vk_meta_device * meta,const char * key,VkShaderStageFlagBits shader_stage,size_t push_const_size,const struct VkDescriptorSetLayoutBinding * bindings,uint32_t binding_count,VkPipelineLayout * layout_out)242 get_copy_pipeline_layout(struct vk_device *device, struct vk_meta_device *meta,
243                          const char *key, VkShaderStageFlagBits shader_stage,
244                          size_t push_const_size,
245                          const struct VkDescriptorSetLayoutBinding *bindings,
246                          uint32_t binding_count, VkPipelineLayout *layout_out)
247 {
248    const VkDescriptorSetLayoutCreateInfo set_layout = {
249       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
250       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
251       .bindingCount = binding_count,
252       .pBindings = bindings,
253    };
254 
255    const VkPushConstantRange push_range = {
256       .stageFlags = shader_stage,
257       .offset = 0,
258       .size = push_const_size,
259    };
260 
261    return vk_meta_get_pipeline_layout(device, meta, &set_layout, &push_range,
262                                       key, strlen(key) + 1, layout_out);
263 }
264 
265 #define COPY_PUSH_SET_IMG_DESC(__binding, __type, __iview, __layout)           \
266    {                                                                           \
267       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,                         \
268       .dstBinding = __binding,                                                 \
269       .descriptorType = VK_DESCRIPTOR_TYPE_##__type##_IMAGE,                   \
270       .descriptorCount = 1,                                                    \
271       .pImageInfo =  &(VkDescriptorImageInfo){                                 \
272          .imageView = __iview,                                                 \
273          .imageLayout = __layout,                                              \
274       },                                                                       \
275    }
276 
277 static VkFormat
copy_img_view_format_for_aspect(const struct vk_meta_copy_image_view * info,VkImageAspectFlagBits aspect)278 copy_img_view_format_for_aspect(const struct vk_meta_copy_image_view *info,
279                                 VkImageAspectFlagBits aspect)
280 {
281    switch (aspect) {
282    case VK_IMAGE_ASPECT_COLOR_BIT:
283       return info->color.format;
284 
285    case VK_IMAGE_ASPECT_DEPTH_BIT:
286       return info->depth.format;
287 
288    case VK_IMAGE_ASPECT_STENCIL_BIT:
289       return info->stencil.format;
290 
291    default:
292       assert(!"Unsupported aspect");
293       return VK_FORMAT_UNDEFINED;
294    }
295 }
296 
297 static bool
depth_stencil_interleaved(const struct vk_meta_copy_image_view * view)298 depth_stencil_interleaved(const struct vk_meta_copy_image_view *view)
299 {
300    return view->stencil.format != VK_FORMAT_UNDEFINED &&
301           view->depth.format != VK_FORMAT_UNDEFINED &&
302           view->stencil.format == view->depth.format &&
303           view->stencil.component_mask != 0 &&
304           view->depth.component_mask != 0 &&
305           (view->stencil.component_mask & view->depth.component_mask) == 0;
306 }
307 
308 static VkResult
get_gfx_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,VkPipelineLayout layout,VkSampleCountFlagBits samples,nir_shader * (* build_nir)(const struct vk_meta_device *,const void *),VkImageAspectFlagBits aspects,const struct vk_meta_copy_image_view * view,const void * key_data,size_t key_size,VkPipeline * pipeline_out)309 get_gfx_copy_pipeline(
310    struct vk_device *device, struct vk_meta_device *meta,
311    VkPipelineLayout layout, VkSampleCountFlagBits samples,
312    nir_shader *(*build_nir)(const struct vk_meta_device *, const void *),
313    VkImageAspectFlagBits aspects, const struct vk_meta_copy_image_view *view,
314    const void *key_data, size_t key_size, VkPipeline *pipeline_out)
315 {
316    VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size);
317    if (from_cache != VK_NULL_HANDLE) {
318       *pipeline_out = from_cache;
319       return VK_SUCCESS;
320    }
321 
322    const VkPipelineShaderStageNirCreateInfoMESA fs_nir_info = {
323       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
324       .nir = build_nir(meta, key_data),
325    };
326    const VkPipelineShaderStageCreateInfo fs_info = {
327       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
328       .pNext = &fs_nir_info,
329       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
330       .pName = "main",
331    };
332 
333    VkPipelineDepthStencilStateCreateInfo ds_info = {
334       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
335    };
336    VkPipelineDynamicStateCreateInfo dyn_info = {
337       .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
338    };
339    struct vk_meta_rendering_info render = {
340       .samples = samples,
341    };
342 
343    const VkGraphicsPipelineCreateInfo info = {
344       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
345       .stageCount = 1,
346       .pStages = &fs_info,
347       .pDepthStencilState = &ds_info,
348       .pDynamicState = &dyn_info,
349       .layout = layout,
350    };
351 
352    if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
353       VkFormat fmt =
354          copy_img_view_format_for_aspect(view, aspects);
355 
356       render.color_attachment_formats[render.color_attachment_count] = fmt;
357       render.color_attachment_write_masks[render.color_attachment_count] =
358          VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
359          VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
360       render.color_attachment_count++;
361    }
362 
363    if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
364       VkFormat fmt =
365          copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_DEPTH_BIT);
366 
367       render.color_attachment_formats[render.color_attachment_count] = fmt;
368       render.color_attachment_write_masks[render.color_attachment_count] =
369          (VkColorComponentFlags)view->depth.component_mask;
370       render.color_attachment_count++;
371    }
372 
373    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
374       VkFormat fmt =
375          copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_STENCIL_BIT);
376 
377       if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT &&
378           depth_stencil_interleaved(view)) {
379          render.color_attachment_write_masks[0] |= view->stencil.component_mask;
380       } else {
381          render.color_attachment_formats[render.color_attachment_count] = fmt;
382          render.color_attachment_write_masks[render.color_attachment_count] =
383             (VkColorComponentFlags)view->stencil.component_mask;
384          render.color_attachment_count++;
385       }
386    }
387 
388    VkResult result = vk_meta_create_graphics_pipeline(
389       device, meta, &info, &render, key_data, key_size, pipeline_out);
390 
391    ralloc_free(fs_nir_info.nir);
392 
393    return result;
394 }
395 
396 static VkResult
get_compute_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,VkPipelineLayout layout,nir_shader * (* build_nir)(const struct vk_meta_device *,const void *),const void * key_data,size_t key_size,VkPipeline * pipeline_out)397 get_compute_copy_pipeline(
398    struct vk_device *device, struct vk_meta_device *meta,
399    VkPipelineLayout layout,
400    nir_shader *(*build_nir)(const struct vk_meta_device *, const void *),
401    const void *key_data, size_t key_size, VkPipeline *pipeline_out)
402 {
403    VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size);
404    if (from_cache != VK_NULL_HANDLE) {
405       *pipeline_out = from_cache;
406       return VK_SUCCESS;
407    }
408 
409    const VkPipelineShaderStageNirCreateInfoMESA cs_nir_info = {
410       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
411       .nir = build_nir(meta, key_data),
412    };
413 
414    const VkComputePipelineCreateInfo info = {
415       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
416       .stage = {
417          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
418          .pNext = &cs_nir_info,
419          .stage = VK_SHADER_STAGE_COMPUTE_BIT,
420          .pName = "main",
421       },
422       .layout = layout,
423    };
424 
425    VkResult result = vk_meta_create_compute_pipeline(
426       device, meta, &info, key_data, key_size, pipeline_out);
427 
428    ralloc_free(cs_nir_info.nir);
429 
430    return result;
431 }
432 
433 static VkResult
copy_create_src_image_view(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,const struct vk_meta_copy_image_view * view_info,VkImageAspectFlags aspect,const VkImageSubresourceLayers * subres,VkImageView * view_out)434 copy_create_src_image_view(struct vk_command_buffer *cmd,
435                            struct vk_meta_device *meta, struct vk_image *img,
436                            const struct vk_meta_copy_image_view *view_info,
437                            VkImageAspectFlags aspect,
438                            const VkImageSubresourceLayers *subres,
439                            VkImageView *view_out)
440 {
441    const VkImageViewUsageCreateInfo usage = {
442       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
443       .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
444    };
445 
446    VkFormat format = copy_img_view_format_for_aspect(view_info, aspect);
447 
448    VkImageViewCreateInfo info = {
449       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
450       .pNext = &usage,
451       .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
452       .image = vk_image_to_handle(img),
453       .viewType = view_info->type,
454       .format = format,
455       .subresourceRange = {
456          .aspectMask = vk_format_aspects(format),
457          .baseMipLevel = subres->mipLevel,
458          .levelCount = 1,
459          .baseArrayLayer = 0,
460          .layerCount = img->array_layers,
461       },
462    };
463 
464    if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
465       nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_STENCIL_BIT
466                                           ? view_info->stencil.component_mask
467                                           : view_info->depth.component_mask;
468       assert(comp_mask != 0);
469 
470       VkComponentSwizzle *swizzle = &info.components.r;
471       unsigned num_comps = util_bitcount(comp_mask);
472       unsigned first_comp = ffs(comp_mask) - 1;
473 
474       assert(first_comp + num_comps <= 4);
475 
476       for (unsigned i = 0; i < num_comps; i++)
477          swizzle[i] = first_comp + i + VK_COMPONENT_SWIZZLE_R;
478    }
479 
480    return vk_meta_create_image_view(cmd, meta, &info, view_out);
481 }
482 
483 static VkResult
copy_create_dst_image_view(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,const struct vk_meta_copy_image_view * view_info,VkImageAspectFlags aspect,const VkOffset3D * offset,const VkExtent3D * extent,const VkImageSubresourceLayers * subres,VkPipelineBindPoint bind_point,VkImageView * view_out)484 copy_create_dst_image_view(struct vk_command_buffer *cmd,
485                            struct vk_meta_device *meta, struct vk_image *img,
486                            const struct vk_meta_copy_image_view *view_info,
487                            VkImageAspectFlags aspect, const VkOffset3D *offset,
488                            const VkExtent3D *extent,
489                            const VkImageSubresourceLayers *subres,
490                            VkPipelineBindPoint bind_point,
491                            VkImageView *view_out)
492 {
493    uint32_t layer_count, base_layer;
494    VkFormat format = copy_img_view_format_for_aspect(view_info, aspect);
495    VkImageAspectFlags fmt_aspects = vk_format_aspects(format);
496    const VkImageViewUsageCreateInfo usage = {
497       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
498       .usage = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE
499                   ? VK_IMAGE_USAGE_STORAGE_BIT
500                   : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
501    };
502 
503    if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
504       layer_count =
505          MAX2(extent->depth, vk_image_subresource_layer_count(img, subres));
506       base_layer = img->image_type == VK_IMAGE_TYPE_3D ? offset->z
507                                                        : subres->baseArrayLayer;
508    } else {
509       /* Always create a view covering the whole image in case of compute. */
510       layer_count = img->image_type == VK_IMAGE_TYPE_3D ? 1 : img->array_layers;
511       base_layer = 0;
512    }
513 
514    const VkImageViewCreateInfo info = {
515       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
516       .pNext = &usage,
517       .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
518       .image = vk_image_to_handle(img),
519       .viewType = bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS
520                      ? vk_image_render_view_type(img, layer_count)
521                      : vk_image_storage_view_type(img),
522       .format = format,
523       .subresourceRange = {
524          .aspectMask = fmt_aspects,
525          .baseMipLevel = subres->mipLevel,
526          .levelCount = 1,
527          .baseArrayLayer = base_layer,
528          .layerCount = layer_count,
529       },
530    };
531 
532    return vk_meta_create_image_view(cmd, meta, &info, view_out);
533 }
534 
535 static nir_def *
trim_img_coords(nir_builder * b,VkImageViewType view_type,nir_def * coords)536 trim_img_coords(nir_builder *b, VkImageViewType view_type, nir_def *coords)
537 {
538    switch (view_type) {
539    case VK_IMAGE_VIEW_TYPE_1D:
540       return nir_channel(b, coords, 0);
541 
542    case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
543    case VK_IMAGE_VIEW_TYPE_2D:
544       return nir_trim_vector(b, coords, 2);
545 
546    default:
547       return nir_trim_vector(b, coords, 3);
548    }
549 }
550 
551 static nir_def *
copy_img_buf_addr(nir_builder * b,enum pipe_format pfmt,nir_def * coords)552 copy_img_buf_addr(nir_builder *b, enum pipe_format pfmt, nir_def *coords)
553 {
554    nir_def *buf_row_stride =
555       load_info(b, struct vk_meta_copy_buffer_image_info, buf.row_stride);
556    nir_def *buf_img_stride =
557       load_info(b, struct vk_meta_copy_buffer_image_info, buf.image_stride);
558    nir_def *buf_addr =
559       load_info(b, struct vk_meta_copy_buffer_image_info, buf.addr);
560    nir_def *offset = nir_imul(b, nir_channel(b, coords, 2), buf_img_stride);
561    unsigned blk_sz = util_format_get_blocksize(pfmt);
562 
563    offset = nir_iadd(b, offset,
564                      nir_imul(b, nir_channel(b, coords, 1), buf_row_stride));
565    offset = nir_iadd(b, offset,
566                      nir_imul_imm(b, nir_channel(b, coords, 0), blk_sz));
567 
568    return nir_iadd(b, buf_addr, nir_u2u64(b, offset));
569 }
570 
571 static VkFormat
copy_img_buf_format_for_aspect(const struct vk_meta_copy_image_view * info,VkImageAspectFlagBits aspect)572 copy_img_buf_format_for_aspect(const struct vk_meta_copy_image_view *info,
573                                VkImageAspectFlagBits aspect)
574 {
575    if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
576       enum pipe_format pfmt = vk_format_to_pipe_format(info->depth.format);
577       unsigned num_comps = util_format_get_nr_components(pfmt);
578       unsigned depth_comp_bits = 0;
579 
580       for (unsigned i = 0; i < num_comps; i++) {
581          if (info->depth.component_mask & BITFIELD_BIT(i))
582             depth_comp_bits += util_format_get_component_bits(
583                pfmt, UTIL_FORMAT_COLORSPACE_RGB, i);
584       }
585 
586       switch (depth_comp_bits) {
587       case 16:
588          return VK_FORMAT_R16_UINT;
589       case 24:
590       case 32:
591          return VK_FORMAT_R32_UINT;
592       default:
593          assert(!"Unsupported format");
594          return VK_FORMAT_UNDEFINED;
595       }
596    } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
597       return VK_FORMAT_R8_UINT;
598    }
599 
600    enum pipe_format pfmt = vk_format_to_pipe_format(info->color.format);
601 
602    switch (util_format_get_blocksize(pfmt)) {
603    case 1:
604       return VK_FORMAT_R8_UINT;
605    case 2:
606       return VK_FORMAT_R16_UINT;
607    case 3:
608       return VK_FORMAT_R8G8B8_UINT;
609    case 4:
610       return VK_FORMAT_R32_UINT;
611    case 6:
612       return VK_FORMAT_R16G16B16_UINT;
613    case 8:
614       return VK_FORMAT_R32G32_UINT;
615    case 12:
616       return VK_FORMAT_R32G32B32_UINT;
617    case 16:
618       return VK_FORMAT_R32G32B32A32_UINT;
619    default:
620       assert(!"Unsupported format");
621       return VK_FORMAT_UNDEFINED;
622    }
623 }
624 
625 static nir_def *
convert_texel(nir_builder * b,VkFormat src_fmt,VkFormat dst_fmt,nir_def * texel)626 convert_texel(nir_builder *b, VkFormat src_fmt, VkFormat dst_fmt,
627               nir_def *texel)
628 {
629    enum pipe_format src_pfmt = vk_format_to_pipe_format(src_fmt);
630    enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_fmt);
631 
632    if (src_pfmt == dst_pfmt)
633       return texel;
634 
635    unsigned src_blksz = util_format_get_blocksize(src_pfmt);
636    unsigned dst_blksz = util_format_get_blocksize(dst_pfmt);
637 
638    nir_def *packed = nir_format_pack_rgba(b, src_pfmt, texel);
639 
640    /* Needed for depth/stencil copies where the source/dest formats might
641     * have a different size. */
642    if (src_blksz < dst_blksz)
643       packed = nir_pad_vector_imm_int(b, packed, 0, 4);
644 
645    nir_def *unpacked = nir_format_unpack_rgba(b, packed, dst_pfmt);
646 
647    return unpacked;
648 }
649 
650 static nir_def *
place_ds_texel(nir_builder * b,VkFormat fmt,nir_component_mask_t comp_mask,nir_def * texel)651 place_ds_texel(nir_builder *b, VkFormat fmt, nir_component_mask_t comp_mask,
652                nir_def *texel)
653 {
654    assert(comp_mask != 0);
655 
656    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
657    unsigned num_comps = util_format_get_nr_components(pfmt);
658 
659    if (comp_mask == nir_component_mask(num_comps))
660       return texel;
661 
662    assert(num_comps <= 4);
663 
664    nir_def *comps[4];
665    unsigned c = 0;
666 
667    for (unsigned i = 0; i < num_comps; i++) {
668       if (comp_mask & BITFIELD_BIT(i))
669          comps[i] = nir_channel(b, texel, c++);
670       else
671          comps[i] = nir_imm_intN_t(b, 0, texel->bit_size);
672    }
673 
674    return nir_vec(b, comps, num_comps);
675 }
676 
677 static nir_deref_instr *
tex_deref(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,unsigned binding)678 tex_deref(nir_builder *b, const struct vk_meta_copy_image_view *view,
679           VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
680           unsigned binding)
681 {
682    VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
683    bool is_array = vk_image_view_type_is_array(view->type);
684    enum glsl_sampler_dim sampler_dim =
685       samples != VK_SAMPLE_COUNT_1_BIT
686          ? GLSL_SAMPLER_DIM_MS
687          : vk_image_view_type_to_sampler_dim(view->type);
688    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
689    enum glsl_base_type base_type =
690       util_format_is_pure_sint(pfmt)   ? GLSL_TYPE_INT
691       : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
692                                        : GLSL_TYPE_FLOAT;
693    const char *tex_name;
694    switch (aspect) {
695    case VK_IMAGE_ASPECT_COLOR_BIT:
696       tex_name = "color_tex";
697       break;
698    case VK_IMAGE_ASPECT_DEPTH_BIT:
699       tex_name = "depth_tex";
700       break;
701    case VK_IMAGE_ASPECT_STENCIL_BIT:
702       tex_name = "stencil_tex";
703       break;
704    default:
705       assert(!"Unsupported aspect");
706       return NULL;
707    }
708 
709    const struct glsl_type *texture_type =
710       glsl_sampler_type(sampler_dim, false, is_array, base_type);
711    nir_variable *texture =
712       nir_variable_create(b->shader, nir_var_uniform, texture_type, tex_name);
713    texture->data.descriptor_set = 0;
714    texture->data.binding = binding;
715 
716    return nir_build_deref_var(b, texture);
717 }
718 
719 static nir_deref_instr *
img_deref(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,unsigned binding)720 img_deref(nir_builder *b, const struct vk_meta_copy_image_view *view,
721           VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
722           unsigned binding)
723 {
724    VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
725    bool is_array = vk_image_view_type_is_array(view->type);
726    enum glsl_sampler_dim sampler_dim =
727       samples != VK_SAMPLE_COUNT_1_BIT
728          ? GLSL_SAMPLER_DIM_MS
729          : vk_image_view_type_to_sampler_dim(view->type);
730    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
731    enum glsl_base_type base_type =
732       util_format_is_pure_sint(pfmt)   ? GLSL_TYPE_INT
733       : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
734                                        : GLSL_TYPE_FLOAT;
735    const char *img_name;
736    switch (aspect) {
737    case VK_IMAGE_ASPECT_COLOR_BIT:
738       img_name = "color_img";
739       break;
740    case VK_IMAGE_ASPECT_DEPTH_BIT:
741       img_name = "depth_img";
742       break;
743    case VK_IMAGE_ASPECT_STENCIL_BIT:
744       img_name = "stencil_img";
745       break;
746    default:
747       assert(!"Unsupported aspect");
748       return NULL;
749    }
750    const struct glsl_type *image_type =
751       glsl_image_type(sampler_dim, is_array, base_type);
752    nir_variable *image_var =
753       nir_variable_create(b->shader, nir_var_uniform, image_type, img_name);
754    image_var->data.descriptor_set = 0;
755    image_var->data.binding = binding;
756 
757    return nir_build_deref_var(b, image_var);
758 }
759 
760 static nir_def *
read_texel(nir_builder * b,nir_deref_instr * tex_deref,nir_def * coords,nir_def * sample_id)761 read_texel(nir_builder *b, nir_deref_instr *tex_deref, nir_def *coords,
762            nir_def *sample_id)
763 {
764    return sample_id ? nir_txf_ms_deref(b, tex_deref, coords, sample_id)
765                     : nir_txf_deref(b, tex_deref, coords, NULL);
766 }
767 
768 static nir_variable *
frag_var(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,uint32_t rt)769 frag_var(nir_builder *b, const struct vk_meta_copy_image_view *view,
770          VkImageAspectFlags aspect, uint32_t rt)
771 {
772    VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
773    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
774    enum glsl_base_type base_type =
775       util_format_is_pure_sint(pfmt)   ? GLSL_TYPE_INT
776       : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
777                                        : GLSL_TYPE_FLOAT;
778    const struct glsl_type *var_type = glsl_vector_type(base_type, 4);
779    static const char *var_names[] = {
780       "gl_FragData[0]",
781       "gl_FragData[1]",
782    };
783 
784    assert(rt < ARRAY_SIZE(var_names));
785 
786    nir_variable *var = nir_variable_create(b->shader, nir_var_shader_out,
787                                            var_type, var_names[rt]);
788    var->data.location = FRAG_RESULT_DATA0 + rt;
789 
790    return var;
791 }
792 
793 static void
write_frag(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,nir_variable * frag_var,nir_def * frag_val)794 write_frag(nir_builder *b, const struct vk_meta_copy_image_view *view,
795            VkImageAspectFlags aspect, nir_variable *frag_var, nir_def *frag_val)
796 {
797    nir_component_mask_t comp_mask;
798 
799    if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
800       VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
801 
802       comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT
803                      ? view->depth.component_mask
804                      : view->stencil.component_mask;
805       frag_val = place_ds_texel(b, fmt, comp_mask, frag_val);
806    } else {
807       comp_mask = nir_component_mask(4);
808    }
809 
810    if (frag_val->bit_size != 32) {
811       switch (glsl_get_base_type(frag_var->type)) {
812       case GLSL_TYPE_INT:
813          frag_val = nir_i2i32(b, frag_val);
814          break;
815       case GLSL_TYPE_UINT:
816          frag_val = nir_u2u32(b, frag_val);
817          break;
818       case GLSL_TYPE_FLOAT:
819          frag_val = nir_f2f32(b, frag_val);
820          break;
821       default:
822          assert(!"Invalid type");
823          frag_val = NULL;
824          break;
825       }
826    }
827 
828    frag_val = nir_pad_vector_imm_int(b, frag_val, 0, 4);
829 
830    nir_store_var(b, frag_var, frag_val, comp_mask);
831 }
832 
833 static void
write_img(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,nir_deref_instr * img_deref,nir_def * coords,nir_def * sample_id,nir_def * val)834 write_img(nir_builder *b, const struct vk_meta_copy_image_view *view,
835           VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
836           nir_deref_instr *img_deref, nir_def *coords, nir_def *sample_id,
837           nir_def *val)
838 {
839    VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
840    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
841    enum glsl_base_type base_type =
842       util_format_is_pure_sint(pfmt)   ? GLSL_TYPE_INT
843       : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
844                                        : GLSL_TYPE_FLOAT;
845    enum glsl_sampler_dim sampler_dim =
846       samples != VK_SAMPLE_COUNT_1_BIT
847          ? GLSL_SAMPLER_DIM_MS
848          : vk_image_view_type_to_sampler_dim(view->type);
849    bool is_array = vk_image_view_type_is_array(view->type);
850 
851    if (!sample_id) {
852       assert(samples == VK_SAMPLE_COUNT_1_BIT);
853       sample_id = nir_imm_int(b, 0);
854    }
855 
856    unsigned access_flags = ACCESS_NON_READABLE;
857    nir_def *zero_lod = nir_imm_int(b, 0);
858 
859    if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
860       nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT
861                                           ? view->depth.component_mask
862                                           : view->stencil.component_mask;
863       unsigned num_comps = util_format_get_nr_components(pfmt);
864 
865       val = place_ds_texel(b, fmt, comp_mask, val);
866 
867       if (comp_mask != nir_component_mask(num_comps)) {
868          nir_def *comps[4];
869          access_flags = 0;
870 
871          nir_def *old_val = nir_image_deref_load(b,
872             val->num_components, val->bit_size, &img_deref->def, coords,
873             sample_id, zero_lod, .image_dim = sampler_dim,
874             .image_array = is_array, .format = pfmt, .access = access_flags,
875             .dest_type = nir_get_nir_type_for_glsl_base_type(base_type));
876 
877          for (unsigned i = 0; i < val->num_components; i++) {
878             if (comp_mask & BITFIELD_BIT(i))
879                comps[i] = nir_channel(b, val, i);
880             else
881                comps[i] = nir_channel(b, old_val, i);
882          }
883 
884          val = nir_vec(b, comps, val->num_components);
885       }
886    }
887 
888    nir_image_deref_store(b,
889        &img_deref->def, coords, sample_id, val, zero_lod,
890       .image_dim = sampler_dim, .image_array = is_array, .format = pfmt,
891       .access = access_flags,
892       .src_type = nir_get_nir_type_for_glsl_base_type(base_type));
893 }
894 
895 static nir_shader *
build_image_to_buffer_shader(const struct vk_meta_device * meta,const void * key_data)896 build_image_to_buffer_shader(const struct vk_meta_device *meta,
897                              const void *key_data)
898 {
899    const struct vk_meta_copy_buffer_image_key *key = key_data;
900 
901    assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
902 
903    nir_builder builder = nir_builder_init_simple_shader(
904       MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-to-buffer");
905    nir_builder *b = &builder;
906 
907    b->shader->info.workgroup_size[0] = key->wg_size[0];
908    b->shader->info.workgroup_size[1] = key->wg_size[1];
909    b->shader->info.workgroup_size[2] = key->wg_size[2];
910 
911    VkFormat buf_fmt =
912       copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
913    enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
914 
915    nir_def *copy_id = nir_load_global_invocation_id(b, 32);
916    nir_def *copy_id_start =
917       nir_vec3(b,
918                load_info(b, struct vk_meta_copy_buffer_image_info,
919                          copy_id_range.start.x),
920                load_info(b, struct vk_meta_copy_buffer_image_info,
921                          copy_id_range.start.y),
922                load_info(b, struct vk_meta_copy_buffer_image_info,
923                          copy_id_range.start.z));
924    nir_def *copy_id_end = nir_vec3(b,
925       load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x),
926       load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y),
927       load_info(b, struct vk_meta_copy_buffer_image_info,
928                 copy_id_range.end.z));
929 
930    nir_def *in_bounds =
931       nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
932                nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
933 
934    nir_push_if(b, in_bounds);
935 
936    copy_id = nir_isub(b, copy_id, copy_id_start);
937 
938    nir_def *img_offs = nir_vec3(b,
939       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
940       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
941       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
942 
943    nir_def *img_coords =
944       trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs));
945 
946    VkFormat iview_fmt =
947       copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
948    nir_deref_instr *tex =
949       tex_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0);
950    nir_def *texel = read_texel(b, tex, img_coords, NULL);
951 
952    texel = convert_texel(b, iview_fmt, buf_fmt, texel);
953 
954    unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
955    unsigned comp_count = util_format_get_nr_components(buf_pfmt);
956    assert(blk_sz % comp_count == 0);
957    unsigned comp_sz = (blk_sz / comp_count) * 8;
958 
959    /* nir_format_unpack() (which is called in convert_texel()) always
960     * returns a 32-bit result, which we might have to downsize to match
961     * the component size we want, hence the u2uN().
962     */
963    texel = nir_u2uN(b, texel, comp_sz);
964 
965    /* nir_format_unpack_rgba() (which is called from convert_texel()) returns
966     * a vec4, which means we might have more components than we need, but
967     * that's fine because we pass a write_mask to store_global.
968     */
969    assert(texel->num_components >= comp_count);
970    nir_store_global(b, copy_img_buf_addr(b, buf_pfmt, copy_id),
971                     comp_sz / 8, texel, nir_component_mask(comp_count));
972 
973    nir_pop_if(b, NULL);
974 
975    return b->shader;
976 }
977 
978 static VkResult
get_copy_image_to_buffer_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)979 get_copy_image_to_buffer_pipeline(
980    struct vk_device *device, struct vk_meta_device *meta,
981    const struct vk_meta_copy_buffer_image_key *key,
982    VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
983 {
984    const VkDescriptorSetLayoutBinding bindings[] = {
985       COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE),
986    };
987 
988    VkResult result = get_copy_pipeline_layout(
989       device, meta, "vk-meta-copy-image-to-buffer-pipeline-layout",
990       VK_SHADER_STAGE_COMPUTE_BIT,
991       sizeof(struct vk_meta_copy_buffer_image_info), bindings,
992       ARRAY_SIZE(bindings), layout_out);
993 
994    if (unlikely(result != VK_SUCCESS))
995       return result;
996 
997    return get_compute_copy_pipeline(device, meta, *layout_out,
998                                     build_image_to_buffer_shader, key,
999                                     sizeof(*key), pipeline_out);
1000 }
1001 
1002 static nir_shader *
build_buffer_to_image_fs(const struct vk_meta_device * meta,const void * key_data)1003 build_buffer_to_image_fs(const struct vk_meta_device *meta,
1004                          const void *key_data)
1005 {
1006    const struct vk_meta_copy_buffer_image_key *key = key_data;
1007 
1008    assert(key->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
1009 
1010    nir_builder builder = nir_builder_init_simple_shader(
1011       MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-buffer-to-image-frag");
1012    nir_builder *b = &builder;
1013 
1014    VkFormat buf_fmt =
1015       copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
1016 
1017    enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
1018    nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b));
1019    nir_def *out_layer = nir_load_layer_id(b);
1020 
1021    nir_def *img_offs = nir_vec3(b,
1022       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
1023       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
1024       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
1025 
1026    /* Move the layer ID to the second coordinate if we're dealing with a 1D
1027     * array, as this is where the texture instruction expects it. */
1028    nir_def *coords = key->img.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY
1029                         ? nir_vec3(b, nir_channel(b, out_coord_xy, 0),
1030                                    out_layer, nir_imm_int(b, 0))
1031                         : nir_vec3(b, nir_channel(b, out_coord_xy, 0),
1032                                    nir_channel(b, out_coord_xy, 1), out_layer);
1033 
1034    unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
1035    unsigned comp_count = util_format_get_nr_components(buf_pfmt);
1036    assert(blk_sz % comp_count == 0);
1037    unsigned comp_sz = (blk_sz / comp_count) * 8;
1038 
1039    coords = nir_isub(b, coords, img_offs);
1040 
1041    nir_def *texel = nir_build_load_global(b,
1042       comp_count, comp_sz, copy_img_buf_addr(b, buf_pfmt, coords),
1043       .align_mul = 1 << (ffs(blk_sz) - 1));
1044 
1045    /* We don't do compressed formats. The driver should select a non-compressed
1046     * format with the same block size. */
1047    assert(!util_format_is_compressed(buf_pfmt));
1048 
1049    VkFormat iview_fmt =
1050       copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
1051    nir_variable *out_var = frag_var(b, &key->img.view, key->img.aspect, 0);
1052 
1053    texel = convert_texel(b, buf_fmt, iview_fmt, texel);
1054    write_frag(b, &key->img.view, key->img.aspect, out_var, texel);
1055    return b->shader;
1056 }
1057 
1058 static VkResult
get_copy_buffer_to_image_gfx_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1059 get_copy_buffer_to_image_gfx_pipeline(
1060    struct vk_device *device, struct vk_meta_device *meta,
1061    const struct vk_meta_copy_buffer_image_key *key,
1062    VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
1063 {
1064    VkResult result = get_copy_pipeline_layout(
1065       device, meta, "vk-meta-copy-buffer-to-image-gfx-pipeline-layout",
1066       VK_SHADER_STAGE_FRAGMENT_BIT,
1067       sizeof(struct vk_meta_copy_buffer_image_info), NULL, 0, layout_out);
1068 
1069    if (unlikely(result != VK_SUCCESS))
1070       return result;
1071 
1072    return get_gfx_copy_pipeline(device, meta, *layout_out,
1073                                 VK_SAMPLE_COUNT_1_BIT, build_buffer_to_image_fs,
1074                                 key->img.aspect, &key->img.view, key,
1075                                 sizeof(*key), pipeline_out);
1076 }
1077 
1078 static nir_shader *
build_buffer_to_image_cs(const struct vk_meta_device * meta,const void * key_data)1079 build_buffer_to_image_cs(const struct vk_meta_device *meta,
1080                          const void *key_data)
1081 {
1082    const struct vk_meta_copy_buffer_image_key *key = key_data;
1083 
1084    assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
1085 
1086    nir_builder builder = nir_builder_init_simple_shader(
1087       MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer-to-image-compute");
1088    nir_builder *b = &builder;
1089 
1090    b->shader->info.workgroup_size[0] = key->wg_size[0];
1091    b->shader->info.workgroup_size[1] = key->wg_size[1];
1092    b->shader->info.workgroup_size[2] = key->wg_size[2];
1093 
1094    VkFormat buf_fmt =
1095       copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
1096    VkFormat img_fmt =
1097       copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
1098    enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
1099    nir_deref_instr *image_deref =
1100       img_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0);
1101 
1102    nir_def *copy_id = nir_load_global_invocation_id(b, 32);
1103    nir_def *copy_id_start =
1104       nir_vec3(b,
1105                load_info(b, struct vk_meta_copy_buffer_image_info,
1106                          copy_id_range.start.x),
1107                load_info(b, struct vk_meta_copy_buffer_image_info,
1108                          copy_id_range.start.y),
1109                load_info(b, struct vk_meta_copy_buffer_image_info,
1110                          copy_id_range.start.z));
1111    nir_def *copy_id_end = nir_vec3(b,
1112       load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x),
1113       load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y),
1114       load_info(b, struct vk_meta_copy_buffer_image_info,
1115                 copy_id_range.end.z));
1116 
1117    nir_def *in_bounds =
1118       nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
1119                nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
1120 
1121    nir_push_if(b, in_bounds);
1122 
1123    /* Adjust the copy ID such that we can directly deduce the image coords and
1124     * buffer offset from it. */
1125    copy_id = nir_isub(b, copy_id, copy_id_start);
1126 
1127    nir_def *img_offs = nir_vec3(b,
1128       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
1129       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
1130       load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
1131 
1132    nir_def *img_coords =
1133       trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs));
1134 
1135    img_coords = nir_pad_vector_imm_int(b, img_coords, 0, 4);
1136 
1137    unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
1138    unsigned bit_sz = blk_sz & 1 ? 8 : blk_sz & 2 ? 16 : 32;
1139    unsigned comp_count = blk_sz * 8 / bit_sz;
1140 
1141    nir_def *texel = nir_build_load_global(b,
1142          comp_count, bit_sz, copy_img_buf_addr(b, buf_pfmt, copy_id),
1143          .align_mul = 1 << (ffs(blk_sz) - 1));
1144 
1145    texel = convert_texel(b, buf_fmt, img_fmt, texel);
1146 
1147    write_img(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT,
1148              image_deref, img_coords, NULL, texel);
1149 
1150    nir_pop_if(b, NULL);
1151 
1152    return b->shader;
1153 }
1154 
1155 static VkResult
get_copy_buffer_to_image_compute_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1156 get_copy_buffer_to_image_compute_pipeline(
1157    struct vk_device *device, struct vk_meta_device *meta,
1158    const struct vk_meta_copy_buffer_image_key *key,
1159    VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
1160 {
1161    const VkDescriptorSetLayoutBinding bindings[] = {
1162       COPY_SHADER_BINDING(0, STORAGE_IMAGE, COMPUTE),
1163    };
1164 
1165    VkResult result = get_copy_pipeline_layout(
1166       device, meta, "vk-meta-copy-buffer-to-image-compute-pipeline-layout",
1167       VK_SHADER_STAGE_COMPUTE_BIT,
1168       sizeof(struct vk_meta_copy_buffer_image_info), bindings,
1169       ARRAY_SIZE(bindings), layout_out);
1170 
1171    if (unlikely(result != VK_SUCCESS))
1172       return result;
1173 
1174    return get_compute_copy_pipeline(device, meta, *layout_out,
1175                                     build_buffer_to_image_cs, key, sizeof(*key),
1176                                     pipeline_out);
1177 }
1178 
1179 static VkResult
copy_buffer_image_prepare_gfx_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout pipeline_layout,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,struct vk_image * img,const VkBufferImageCopy2 * region)1180 copy_buffer_image_prepare_gfx_push_const(
1181    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1182    const struct vk_meta_copy_buffer_image_key *key,
1183    VkPipelineLayout pipeline_layout, VkBuffer buffer,
1184    const struct vk_image_buffer_layout *buf_layout, struct vk_image *img,
1185    const VkBufferImageCopy2 *region)
1186 {
1187    struct vk_device *dev = cmd->base.device;
1188    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1189    uint32_t depth_or_layer_count =
1190       MAX2(region->imageExtent.depth,
1191            vk_image_subresource_layer_count(img, &region->imageSubresource));
1192    VkImageViewType img_view_type =
1193       vk_image_render_view_type(img, depth_or_layer_count);
1194    VkOffset3D img_offs =
1195       base_layer_as_offset(img_view_type, region->imageOffset,
1196                            region->imageSubresource.baseArrayLayer);
1197 
1198    /* vk_meta_copy_buffer_image_info::image_stride is 32-bit for now.
1199     * We might want to make it a 64-bit integer (and patch the shader code
1200     * accordingly) if that becomes a limiting factor for vk_meta_copy users.
1201     */
1202    assert(buf_layout->image_stride_B <= UINT32_MAX);
1203 
1204    struct vk_meta_copy_buffer_image_info info = {
1205       .buf = {
1206          .row_stride = buf_layout->row_stride_B,
1207          .image_stride = buf_layout->image_stride_B,
1208          .addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset,
1209                                         VK_WHOLE_SIZE),
1210       },
1211       .img.offset = {
1212          .x = img_offs.x,
1213          .y = img_offs.y,
1214          .z = img_offs.z,
1215       },
1216    };
1217 
1218    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1219                           VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info);
1220    return VK_SUCCESS;
1221 }
1222 
1223 static VkResult
copy_buffer_image_prepare_compute_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout pipeline_layout,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,struct vk_image * img,const VkBufferImageCopy2 * region,uint32_t * wg_count)1224 copy_buffer_image_prepare_compute_push_const(
1225    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1226    const struct vk_meta_copy_buffer_image_key *key,
1227    VkPipelineLayout pipeline_layout, VkBuffer buffer,
1228    const struct vk_image_buffer_layout *buf_layout, struct vk_image *img,
1229    const VkBufferImageCopy2 *region, uint32_t *wg_count)
1230 {
1231    struct vk_device *dev = cmd->base.device;
1232    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1233    VkImageViewType img_view_type = key->img.view.type;
1234    VkOffset3D img_offs =
1235       base_layer_as_offset(img_view_type, region->imageOffset,
1236                            region->imageSubresource.baseArrayLayer);
1237    uint32_t layer_count =
1238       vk_image_subresource_layer_count(img, &region->imageSubresource);
1239    VkExtent3D img_extent =
1240       layer_count_as_extent(img_view_type, region->imageExtent, layer_count);
1241 
1242    struct vk_meta_copy_buffer_image_info info = {
1243       .buf = {
1244          .row_stride = buf_layout->row_stride_B,
1245          .image_stride = buf_layout->image_stride_B,
1246          .addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset,
1247                                         VK_WHOLE_SIZE),
1248       },
1249       .img.offset = {
1250          .x = img_offs.x,
1251          .y = img_offs.y,
1252          .z = img_offs.z,
1253       },
1254    };
1255 
1256    info.copy_id_range.start.x = img_offs.x % key->wg_size[0];
1257    info.copy_id_range.start.y = img_offs.y % key->wg_size[1];
1258    info.copy_id_range.start.z = img_offs.z % key->wg_size[2];
1259    info.copy_id_range.end.x = info.copy_id_range.start.x + img_extent.width;
1260    info.copy_id_range.end.y = info.copy_id_range.start.y + img_extent.height;
1261    info.copy_id_range.end.z = info.copy_id_range.start.z + img_extent.depth;
1262    wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]);
1263    wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]);
1264    wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]);
1265 
1266    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1267                           VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info);
1268    return VK_SUCCESS;
1269 }
1270 
1271 static bool
format_is_supported(VkFormat fmt)1272 format_is_supported(VkFormat fmt)
1273 {
1274    enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
1275    const struct util_format_description *fdesc = util_format_description(pfmt);
1276 
1277    /* We only support RGB formats in the copy path to keep things simple. */
1278    return fdesc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
1279           fdesc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB;
1280 }
1281 
1282 static struct vk_meta_copy_image_view
img_copy_view_info(VkImageViewType view_type,VkImageAspectFlags aspects,const struct vk_image * img,const struct vk_meta_copy_image_properties * img_props)1283 img_copy_view_info(VkImageViewType view_type, VkImageAspectFlags aspects,
1284                    const struct vk_image *img,
1285                    const struct vk_meta_copy_image_properties *img_props)
1286 {
1287    struct vk_meta_copy_image_view view = {
1288       .type = view_type,
1289    };
1290 
1291    /* We only support color/depth/stencil aspects. */
1292    assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT |
1293                      VK_IMAGE_ASPECT_STENCIL_BIT));
1294 
1295    if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1296       /* Color aspect can't be combined with other aspects. */
1297       assert(!(aspects & ~VK_IMAGE_ASPECT_COLOR_BIT));
1298       view.color.format = img_props->color.view_format;
1299       assert(format_is_supported(view.color.format));
1300       return view;
1301    }
1302 
1303 
1304    view.depth.format = img_props->depth.view_format;
1305    view.depth.component_mask = img_props->depth.component_mask;
1306    view.stencil.format = img_props->stencil.view_format;
1307    view.stencil.component_mask = img_props->stencil.component_mask;
1308 
1309    assert(view.depth.format == VK_FORMAT_UNDEFINED ||
1310           format_is_supported(view.depth.format));
1311    assert(view.stencil.format == VK_FORMAT_UNDEFINED ||
1312           format_is_supported(view.stencil.format));
1313    return view;
1314 }
1315 
1316 static void
copy_image_to_buffer_region(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,VkImageLayout img_layout,const struct vk_meta_copy_image_properties * img_props,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,const VkBufferImageCopy2 * region)1317 copy_image_to_buffer_region(
1318    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1319    struct vk_image *img, VkImageLayout img_layout,
1320    const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
1321    const struct vk_image_buffer_layout *buf_layout,
1322    const VkBufferImageCopy2 *region)
1323 {
1324    struct vk_device *dev = cmd->base.device;
1325    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1326    struct vk_meta_copy_buffer_image_key key = {
1327       .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
1328       .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE,
1329       .img = {
1330          .view = img_copy_view_info(vk_image_sampled_view_type(img),
1331                                     region->imageSubresource.aspectMask, img,
1332                                     img_props),
1333          .aspect = region->imageSubresource.aspectMask,
1334       },
1335       .wg_size = {
1336          img_props->tile_size.width,
1337          img_props->tile_size.height,
1338          img_props->tile_size.depth,
1339       },
1340    };
1341 
1342    VkPipelineLayout pipeline_layout;
1343    VkPipeline pipeline;
1344    VkResult result = get_copy_image_to_buffer_pipeline(
1345       dev, meta, &key, &pipeline_layout, &pipeline);
1346    if (unlikely(result != VK_SUCCESS)) {
1347       vk_command_buffer_set_error(cmd, result);
1348       return;
1349    }
1350 
1351    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1352                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1353 
1354    VkImageView iview;
1355    result = copy_create_src_image_view(cmd, meta, img, &key.img.view,
1356                                        region->imageSubresource.aspectMask,
1357                                        &region->imageSubresource, &iview);
1358 
1359    if (unlikely(result != VK_SUCCESS)) {
1360       vk_command_buffer_set_error(cmd, result);
1361       return;
1362    }
1363 
1364    const VkWriteDescriptorSet descs[] = {
1365       COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iview, img_layout),
1366    };
1367 
1368    disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1369                                  VK_PIPELINE_BIND_POINT_COMPUTE,
1370                                  pipeline_layout, 0, ARRAY_SIZE(descs), descs);
1371 
1372    uint32_t wg_count[3] = {0};
1373 
1374    result = copy_buffer_image_prepare_compute_push_const(
1375       cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region,
1376       wg_count);
1377    if (unlikely(result != VK_SUCCESS)) {
1378       vk_command_buffer_set_error(cmd, result);
1379       return;
1380    }
1381 
1382    disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1],
1383                      wg_count[2]);
1384 }
1385 
1386 void
vk_meta_copy_image_to_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageToBufferInfo2 * info,const struct vk_meta_copy_image_properties * img_props)1387 vk_meta_copy_image_to_buffer(
1388    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1389    const VkCopyImageToBufferInfo2 *info,
1390    const struct vk_meta_copy_image_properties *img_props)
1391 {
1392    VK_FROM_HANDLE(vk_image, img, info->srcImage);
1393 
1394    for (uint32_t i = 0; i < info->regionCount; i++) {
1395       VkBufferImageCopy2 region = info->pRegions[i];
1396       struct vk_image_buffer_layout buf_layout =
1397          vk_image_buffer_copy_layout(img, &region);
1398 
1399       region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent);
1400       region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset);
1401 
1402       copy_image_to_buffer_region(cmd, meta, img, info->srcImageLayout,
1403                                   img_props, info->dstBuffer, &buf_layout,
1404                                   &region);
1405    }
1406 }
1407 
1408 static void
copy_draw(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * dst_img,VkImageLayout dst_img_layout,const VkImageSubresourceLayers * dst_img_subres,const VkOffset3D * dst_img_offset,const VkExtent3D * copy_extent,const struct vk_meta_copy_image_view * view_info)1409 copy_draw(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1410           struct vk_image *dst_img, VkImageLayout dst_img_layout,
1411           const VkImageSubresourceLayers *dst_img_subres,
1412           const VkOffset3D *dst_img_offset, const VkExtent3D *copy_extent,
1413           const struct vk_meta_copy_image_view *view_info)
1414 {
1415    struct vk_device *dev = cmd->base.device;
1416    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1417    uint32_t depth_or_layer_count =
1418       MAX2(copy_extent->depth,
1419            vk_image_subresource_layer_count(dst_img, dst_img_subres));
1420    struct vk_meta_rect rect = {
1421       .x0 = dst_img_offset->x,
1422       .x1 = dst_img_offset->x + copy_extent->width,
1423       .y0 = dst_img_offset->y,
1424       .y1 = dst_img_offset->y + copy_extent->height,
1425    };
1426    VkRenderingAttachmentInfo vk_atts[2];
1427    VkRenderingInfo vk_render = {
1428       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
1429       .renderArea = {
1430          .offset = {
1431             dst_img_offset->x,
1432             dst_img_offset->y,
1433          },
1434          .extent = {
1435             copy_extent->width,
1436             copy_extent->height,
1437          },
1438       },
1439       .layerCount = depth_or_layer_count,
1440       .pColorAttachments = vk_atts,
1441    };
1442    VkImageView iview = VK_NULL_HANDLE;
1443 
1444    u_foreach_bit(a, dst_img_subres->aspectMask) {
1445       VkImageAspectFlagBits aspect = 1 << a;
1446 
1447       if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && iview != VK_NULL_HANDLE &&
1448           depth_stencil_interleaved(view_info))
1449          continue;
1450 
1451       VkResult result = copy_create_dst_image_view(
1452          cmd, meta, dst_img, view_info, aspect, dst_img_offset, copy_extent,
1453          dst_img_subres, VK_PIPELINE_BIND_POINT_GRAPHICS, &iview);
1454       if (unlikely(result != VK_SUCCESS)) {
1455          vk_command_buffer_set_error(cmd, result);
1456          return;
1457       }
1458 
1459       vk_atts[vk_render.colorAttachmentCount] = (VkRenderingAttachmentInfo){
1460          .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1461          .imageView = iview,
1462          .imageLayout = dst_img_layout,
1463          .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1464          .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1465       };
1466 
1467       /* If we have interleaved depth/stencil and only one aspect is copied, we
1468        * need to load the attachment to preserve the other component. */
1469       if (vk_format_has_depth(dst_img->format) &&
1470           vk_format_has_stencil(dst_img->format) &&
1471           depth_stencil_interleaved(view_info) &&
1472           (dst_img_subres->aspectMask !=
1473            (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
1474          vk_atts[vk_render.colorAttachmentCount].loadOp =
1475             VK_ATTACHMENT_LOAD_OP_LOAD;
1476       }
1477 
1478       vk_render.colorAttachmentCount++;
1479    }
1480 
1481    disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd), &vk_render);
1482    meta->cmd_draw_volume(cmd, meta, &rect, vk_render.layerCount);
1483    disp->CmdEndRendering(vk_command_buffer_to_handle(cmd));
1484 }
1485 
1486 static void
copy_buffer_to_image_region_gfx(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,VkImageLayout img_layout,const struct vk_meta_copy_image_properties * img_props,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,const VkBufferImageCopy2 * region)1487 copy_buffer_to_image_region_gfx(
1488    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1489    struct vk_image *img, VkImageLayout img_layout,
1490    const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
1491    const struct vk_image_buffer_layout *buf_layout,
1492    const VkBufferImageCopy2 *region)
1493 {
1494    struct vk_device *dev = cmd->base.device;
1495    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1496 
1497    /* We only special-case 1D_ARRAY to move the layer ID to the second
1498     * component instead of the third. For all other view types, let's pick an
1499     * invalid VkImageViewType value so we don't end up creating the same
1500     * pipeline multiple times. */
1501    VkImageViewType view_type =
1502       img->image_type == VK_IMAGE_TYPE_1D && img->array_layers > 1
1503          ? VK_IMAGE_VIEW_TYPE_1D_ARRAY
1504          : (VkImageViewType)-1;
1505 
1506    struct vk_meta_copy_buffer_image_key key = {
1507       .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE,
1508       .bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS,
1509       .img = {
1510          .view = img_copy_view_info(view_type,
1511                                     region->imageSubresource.aspectMask, img,
1512                                     img_props),
1513          .aspect = region->imageSubresource.aspectMask,
1514       },
1515    };
1516 
1517    VkPipelineLayout pipeline_layout;
1518    VkPipeline pipeline;
1519    VkResult result = get_copy_buffer_to_image_gfx_pipeline(
1520       dev, meta, &key, &pipeline_layout, &pipeline);
1521    if (unlikely(result != VK_SUCCESS)) {
1522       vk_command_buffer_set_error(cmd, result);
1523       return;
1524    }
1525 
1526    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1527                          VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
1528 
1529    result = copy_buffer_image_prepare_gfx_push_const(
1530       cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region);
1531    if (unlikely(result != VK_SUCCESS)) {
1532       vk_command_buffer_set_error(cmd, result);
1533       return;
1534    }
1535 
1536    copy_draw(cmd, meta, img, img_layout, &region->imageSubresource,
1537              &region->imageOffset, &region->imageExtent, &key.img.view);
1538 }
1539 
1540 static void
copy_buffer_to_image_region_compute(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,VkImageLayout img_layout,const struct vk_meta_copy_image_properties * img_props,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,const VkBufferImageCopy2 * region)1541 copy_buffer_to_image_region_compute(
1542    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1543    struct vk_image *img, VkImageLayout img_layout,
1544    const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
1545    const struct vk_image_buffer_layout *buf_layout,
1546    const VkBufferImageCopy2 *region)
1547 {
1548    struct vk_device *dev = cmd->base.device;
1549    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1550    VkImageViewType view_type = vk_image_storage_view_type(img);
1551    struct vk_meta_copy_buffer_image_key key = {
1552       .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE,
1553       .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE,
1554       .img = {
1555          .view = img_copy_view_info(view_type,
1556                                     region->imageSubresource.aspectMask, img,
1557                                     img_props),
1558          .aspect = region->imageSubresource.aspectMask,
1559       },
1560       .wg_size = {
1561          img_props->tile_size.width,
1562          img_props->tile_size.height,
1563          img_props->tile_size.depth,
1564       },
1565    };
1566 
1567    VkPipelineLayout pipeline_layout;
1568    VkPipeline pipeline;
1569    VkResult result = get_copy_buffer_to_image_compute_pipeline(
1570       dev, meta, &key, &pipeline_layout, &pipeline);
1571    if (unlikely(result != VK_SUCCESS)) {
1572       vk_command_buffer_set_error(cmd, result);
1573       return;
1574    }
1575 
1576    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1577                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1578 
1579    VkImageView iview;
1580    result = copy_create_dst_image_view(
1581       cmd, meta, img, &key.img.view, region->imageSubresource.aspectMask,
1582       &region->imageOffset, &region->imageExtent, &region->imageSubresource,
1583       VK_PIPELINE_BIND_POINT_COMPUTE, &iview);
1584 
1585    if (unlikely(result != VK_SUCCESS)) {
1586       vk_command_buffer_set_error(cmd, result);
1587       return;
1588    }
1589 
1590    const VkWriteDescriptorSet descs[] = {
1591       COPY_PUSH_SET_IMG_DESC(0, STORAGE, iview, img_layout),
1592    };
1593 
1594    disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1595                                  VK_PIPELINE_BIND_POINT_COMPUTE,
1596                                  pipeline_layout, 0, ARRAY_SIZE(descs), descs);
1597 
1598    uint32_t wg_count[3] = {0};
1599 
1600    result = copy_buffer_image_prepare_compute_push_const(
1601       cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region,
1602       wg_count);
1603    if (unlikely(result != VK_SUCCESS)) {
1604       vk_command_buffer_set_error(cmd, result);
1605       return;
1606    }
1607 
1608    disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
1609                      wg_count[0], wg_count[1], wg_count[2]);
1610 }
1611 
1612 void
vk_meta_copy_buffer_to_image(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyBufferToImageInfo2 * info,const struct vk_meta_copy_image_properties * img_props,VkPipelineBindPoint bind_point)1613 vk_meta_copy_buffer_to_image(
1614    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1615    const VkCopyBufferToImageInfo2 *info,
1616    const struct vk_meta_copy_image_properties *img_props,
1617    VkPipelineBindPoint bind_point)
1618 {
1619    VK_FROM_HANDLE(vk_image, img, info->dstImage);
1620 
1621    for (uint32_t i = 0; i < info->regionCount; i++) {
1622       VkBufferImageCopy2 region = info->pRegions[i];
1623       struct vk_image_buffer_layout buf_layout =
1624          vk_image_buffer_copy_layout(img, &region);
1625 
1626       region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent);
1627       region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset);
1628 
1629       if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
1630          copy_buffer_to_image_region_gfx(cmd, meta, img, info->dstImageLayout,
1631                                          img_props, info->srcBuffer,
1632                                          &buf_layout, &region);
1633       } else {
1634          copy_buffer_to_image_region_compute(cmd, meta, img,
1635                                              info->dstImageLayout, img_props,
1636                                              info->srcBuffer, &buf_layout,
1637                                              &region);
1638       }
1639    }
1640 }
1641 
1642 static nir_shader *
build_copy_image_fs(const struct vk_meta_device * meta,const void * key_data)1643 build_copy_image_fs(const struct vk_meta_device *meta, const void *key_data)
1644 {
1645    const struct vk_meta_copy_image_key *key = key_data;
1646 
1647    assert(key->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
1648 
1649    nir_builder builder = nir_builder_init_simple_shader(
1650       MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-image-frag");
1651    nir_builder *b = &builder;
1652 
1653    b->shader->info.fs.uses_sample_shading =
1654       key->samples != VK_SAMPLE_COUNT_1_BIT;
1655 
1656    nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b));
1657    nir_def *out_layer = nir_load_layer_id(b);
1658 
1659    nir_def *src_offset = nir_vec3(b,
1660       load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.x),
1661       load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.y),
1662       load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.z));
1663 
1664    /* Move the layer ID to the second coordinate if we're dealing with a 1D
1665     * array, as this is where the texture instruction expects it. */
1666    nir_def *src_coords =
1667       key->dst.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY
1668          ? nir_vec3(b, nir_channel(b, out_coord_xy, 0), out_layer,
1669                     nir_imm_int(b, 0))
1670          : nir_vec3(b, nir_channel(b, out_coord_xy, 0),
1671                     nir_channel(b, out_coord_xy, 1), out_layer);
1672 
1673    src_coords = trim_img_coords(b, key->src.view.type,
1674                                 nir_iadd(b, src_coords, src_offset));
1675 
1676    nir_def *sample_id =
1677       key->samples != VK_SAMPLE_COUNT_1_BIT ? nir_load_sample_id(b) : NULL;
1678    nir_variable *color_var = NULL;
1679    uint32_t tex_binding = 0;
1680 
1681    u_foreach_bit(a, key->aspects) {
1682       VkImageAspectFlagBits aspect = 1 << a;
1683       VkFormat src_fmt =
1684          copy_img_view_format_for_aspect(&key->src.view, aspect);
1685       VkFormat dst_fmt =
1686          copy_img_view_format_for_aspect(&key->dst.view, aspect);
1687       nir_deref_instr *tex =
1688          tex_deref(b, &key->src.view, aspect, key->samples, tex_binding++);
1689       nir_def *texel = read_texel(b, tex, src_coords, sample_id);
1690 
1691       if (!color_var || !depth_stencil_interleaved(&key->dst.view)) {
1692          color_var =
1693             frag_var(b, &key->dst.view, aspect, color_var != NULL ? 1 : 0);
1694       }
1695 
1696       texel = convert_texel(b, src_fmt, dst_fmt, texel);
1697       write_frag(b, &key->dst.view, aspect, color_var, texel);
1698    }
1699 
1700    return b->shader;
1701 }
1702 
1703 static VkResult
get_copy_image_gfx_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1704 get_copy_image_gfx_pipeline(struct vk_device *device,
1705                             struct vk_meta_device *meta,
1706                             const struct vk_meta_copy_image_key *key,
1707                             VkPipelineLayout *layout_out,
1708                             VkPipeline *pipeline_out)
1709 {
1710    const struct VkDescriptorSetLayoutBinding bindings[] = {
1711       COPY_SHADER_BINDING(0, SAMPLED_IMAGE, FRAGMENT),
1712       COPY_SHADER_BINDING(1, SAMPLED_IMAGE, FRAGMENT),
1713    };
1714 
1715    VkResult result = get_copy_pipeline_layout(
1716       device, meta, "vk-meta-copy-image-gfx-pipeline-layout",
1717       VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(struct vk_meta_copy_image_fs_info),
1718       bindings, ARRAY_SIZE(bindings), layout_out);
1719    if (unlikely(result != VK_SUCCESS))
1720       return result;
1721 
1722    return get_gfx_copy_pipeline(
1723       device, meta, *layout_out, key->samples, build_copy_image_fs,
1724       key->aspects, &key->dst.view, key, sizeof(*key), pipeline_out);
1725 }
1726 
1727 static nir_shader *
build_copy_image_cs(const struct vk_meta_device * meta,const void * key_data)1728 build_copy_image_cs(const struct vk_meta_device *meta, const void *key_data)
1729 {
1730    const struct vk_meta_copy_image_key *key = key_data;
1731 
1732    assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
1733 
1734    nir_builder builder = nir_builder_init_simple_shader(
1735       MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-compute");
1736    nir_builder *b = &builder;
1737 
1738    b->shader->info.workgroup_size[0] = key->wg_size[0];
1739    b->shader->info.workgroup_size[1] = key->wg_size[1];
1740    b->shader->info.workgroup_size[2] = key->wg_size[2];
1741 
1742    nir_def *copy_id = nir_load_global_invocation_id(b, 32);
1743    nir_def *copy_id_start = nir_vec3(b,
1744       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.x),
1745       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.y),
1746       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.z));
1747    nir_def *copy_id_end = nir_vec3(b,
1748       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.x),
1749       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.y),
1750       load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.z));
1751 
1752    nir_def *in_bounds =
1753       nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
1754                nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
1755 
1756    nir_push_if(b, in_bounds);
1757 
1758    nir_def *src_offset = nir_vec3(b,
1759       load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.x),
1760       load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.y),
1761       load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.z));
1762    nir_def *dst_offset = nir_vec3(b,
1763       load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.x),
1764       load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.y),
1765       load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.z));
1766 
1767    nir_def *src_coords = trim_img_coords(b, key->src.view.type,
1768                                          nir_iadd(b, copy_id, src_offset));
1769    nir_def *dst_coords = trim_img_coords(b, key->dst.view.type,
1770                                          nir_iadd(b, copy_id, dst_offset));
1771 
1772    dst_coords = nir_pad_vector_imm_int(b, dst_coords, 0, 4);
1773 
1774    uint32_t binding = 0;
1775    u_foreach_bit(a, key->aspects) {
1776       VkImageAspectFlagBits aspect = 1 << a;
1777       VkFormat src_fmt =
1778          copy_img_view_format_for_aspect(&key->src.view, aspect);
1779       VkFormat dst_fmt =
1780          copy_img_view_format_for_aspect(&key->dst.view, aspect);
1781       nir_deref_instr *tex =
1782          tex_deref(b, &key->src.view, aspect, key->samples, binding);
1783       nir_deref_instr *img =
1784          img_deref(b, &key->dst.view, aspect, key->samples, binding + 1);
1785 
1786       for (uint32_t s = 0; s < key->samples; s++) {
1787          nir_def *sample_id =
1788             key->samples == VK_SAMPLE_COUNT_1_BIT ? NULL : nir_imm_int(b, s);
1789          nir_def *texel = read_texel(b, tex, src_coords, sample_id);
1790 
1791          texel = convert_texel(b, src_fmt, dst_fmt, texel);
1792          write_img(b, &key->dst.view, aspect, key->samples, img, dst_coords,
1793                    sample_id, texel);
1794       }
1795 
1796       binding += 2;
1797    }
1798 
1799    nir_pop_if(b, NULL);
1800 
1801    return b->shader;
1802 }
1803 
1804 static VkResult
get_copy_image_compute_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1805 get_copy_image_compute_pipeline(struct vk_device *device,
1806                                 struct vk_meta_device *meta,
1807                                 const struct vk_meta_copy_image_key *key,
1808                                 VkPipelineLayout *layout_out,
1809                                 VkPipeline *pipeline_out)
1810 {
1811    const VkDescriptorSetLayoutBinding bindings[] = {
1812       COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE),
1813       COPY_SHADER_BINDING(1, STORAGE_IMAGE, COMPUTE),
1814       COPY_SHADER_BINDING(2, SAMPLED_IMAGE, COMPUTE),
1815       COPY_SHADER_BINDING(3, STORAGE_IMAGE, COMPUTE),
1816    };
1817 
1818    VkResult result = get_copy_pipeline_layout(
1819       device, meta, "vk-meta-copy-image-compute-pipeline-layout",
1820       VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_image_cs_info),
1821       bindings, ARRAY_SIZE(bindings), layout_out);
1822 
1823    if (unlikely(result != VK_SUCCESS))
1824       return result;
1825 
1826    return get_compute_copy_pipeline(device, meta, *layout_out,
1827                                     build_copy_image_cs, key, sizeof(*key),
1828                                     pipeline_out);
1829 }
1830 
1831 static VkResult
copy_image_prepare_gfx_desc_set(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,struct vk_image * src_img,VkImageLayout src_img_layout,struct vk_image * dst_img,VkImageLayout dst_img_layout,const VkImageCopy2 * region)1832 copy_image_prepare_gfx_desc_set(
1833    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1834    const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
1835    struct vk_image *src_img, VkImageLayout src_img_layout,
1836    struct vk_image *dst_img, VkImageLayout dst_img_layout,
1837    const VkImageCopy2 *region)
1838 {
1839    struct vk_device *dev = cmd->base.device;
1840    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1841    VkImageAspectFlags aspects = key->aspects;
1842    VkImageView iviews[] = {
1843       VK_NULL_HANDLE,
1844       VK_NULL_HANDLE,
1845    };
1846    uint32_t desc_count = 0;
1847 
1848    u_foreach_bit(a, aspects) {
1849       assert(desc_count < ARRAY_SIZE(iviews));
1850 
1851       VkResult result = copy_create_src_image_view(
1852          cmd, meta, src_img, &key->src.view, 1 << a, &region->srcSubresource,
1853          &iviews[desc_count++]);
1854       if (unlikely(result != VK_SUCCESS))
1855          return result;
1856    }
1857 
1858    VkWriteDescriptorSet descs[2] = {
1859       COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout),
1860       COPY_PUSH_SET_IMG_DESC(1, SAMPLED, iviews[1], src_img_layout),
1861    };
1862 
1863    disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1864                                  VK_PIPELINE_BIND_POINT_GRAPHICS,
1865                                  pipeline_layout, 0, desc_count, descs);
1866    return VK_SUCCESS;
1867 }
1868 
1869 static VkResult
copy_image_prepare_compute_desc_set(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,struct vk_image * src_img,VkImageLayout src_img_layout,struct vk_image * dst_img,VkImageLayout dst_img_layout,const VkImageCopy2 * region)1870 copy_image_prepare_compute_desc_set(
1871    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1872    const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
1873    struct vk_image *src_img, VkImageLayout src_img_layout,
1874    struct vk_image *dst_img, VkImageLayout dst_img_layout,
1875    const VkImageCopy2 *region)
1876 {
1877    struct vk_device *dev = cmd->base.device;
1878    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1879    VkImageAspectFlags aspects = key->aspects;
1880    VkImageView iviews[] = {
1881       VK_NULL_HANDLE,
1882       VK_NULL_HANDLE,
1883       VK_NULL_HANDLE,
1884       VK_NULL_HANDLE,
1885    };
1886    unsigned desc_count = 0;
1887 
1888    u_foreach_bit(a, aspects) {
1889       VkImageAspectFlagBits aspect = 1 << a;
1890 
1891       assert(desc_count + 2 <= ARRAY_SIZE(iviews));
1892 
1893       VkResult result = copy_create_src_image_view(
1894          cmd, meta, src_img, &key->src.view, aspect, &region->srcSubresource,
1895          &iviews[desc_count++]);
1896       if (unlikely(result != VK_SUCCESS))
1897          return result;
1898 
1899       result = copy_create_dst_image_view(
1900          cmd, meta, dst_img, &key->dst.view, aspect, &region->dstOffset,
1901          &region->extent, &region->dstSubresource,
1902          VK_PIPELINE_BIND_POINT_COMPUTE, &iviews[desc_count++]);
1903       if (unlikely(result != VK_SUCCESS))
1904          return result;
1905    }
1906 
1907    VkWriteDescriptorSet descs[] = {
1908       COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout),
1909       COPY_PUSH_SET_IMG_DESC(1, STORAGE, iviews[1], dst_img_layout),
1910       COPY_PUSH_SET_IMG_DESC(2, SAMPLED, iviews[2], src_img_layout),
1911       COPY_PUSH_SET_IMG_DESC(3, STORAGE, iviews[3], dst_img_layout),
1912    };
1913 
1914    disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1915                                  VK_PIPELINE_BIND_POINT_COMPUTE,
1916                                  pipeline_layout, 0, desc_count, descs);
1917    return VK_SUCCESS;
1918 }
1919 
1920 enum vk_meta_copy_image_align_policy {
1921    VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE,
1922    VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE,
1923 };
1924 
1925 static VkResult
copy_image_prepare_compute_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,const struct vk_image * src,const struct vk_image * dst,enum vk_meta_copy_image_align_policy align_policy,const VkImageCopy2 * region,uint32_t * wg_count)1926 copy_image_prepare_compute_push_const(
1927    struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1928    const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
1929    const struct vk_image *src, const struct vk_image *dst,
1930    enum vk_meta_copy_image_align_policy align_policy,
1931    const VkImageCopy2 *region, uint32_t *wg_count)
1932 {
1933    struct vk_device *dev = cmd->base.device;
1934    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1935    VkOffset3D src_offs =
1936       base_layer_as_offset(key->src.view.type, region->srcOffset,
1937                            region->srcSubresource.baseArrayLayer);
1938    uint32_t layer_count =
1939       vk_image_subresource_layer_count(src, &region->srcSubresource);
1940    VkExtent3D src_extent =
1941       layer_count_as_extent(key->src.view.type, region->extent, layer_count);
1942    VkOffset3D dst_offs =
1943       base_layer_as_offset(key->dst.view.type, region->dstOffset,
1944                            region->dstSubresource.baseArrayLayer);
1945 
1946    struct vk_meta_copy_image_cs_info info = {0};
1947 
1948    /* We can't necessarily optimize the read+write path, so align things
1949     * on the biggest tile size. */
1950    if (align_policy == VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE) {
1951       info.copy_id_range.start.x = src_offs.x % key->wg_size[0];
1952       info.copy_id_range.start.y = src_offs.y % key->wg_size[1];
1953       info.copy_id_range.start.z = src_offs.z % key->wg_size[2];
1954    } else {
1955       info.copy_id_range.start.x = dst_offs.x % key->wg_size[0];
1956       info.copy_id_range.start.y = dst_offs.y % key->wg_size[1];
1957       info.copy_id_range.start.z = dst_offs.z % key->wg_size[2];
1958    }
1959 
1960    info.copy_id_range.end.x = info.copy_id_range.start.x + src_extent.width;
1961    info.copy_id_range.end.y = info.copy_id_range.start.y + src_extent.height;
1962    info.copy_id_range.end.z = info.copy_id_range.start.z + src_extent.depth;
1963 
1964    info.src_img.offset.x = src_offs.x - info.copy_id_range.start.x;
1965    info.src_img.offset.y = src_offs.y - info.copy_id_range.start.y;
1966    info.src_img.offset.z = src_offs.z - info.copy_id_range.start.z;
1967    info.dst_img.offset.x = dst_offs.x - info.copy_id_range.start.x;
1968    info.dst_img.offset.y = dst_offs.y - info.copy_id_range.start.y;
1969    info.dst_img.offset.z = dst_offs.z - info.copy_id_range.start.z;
1970    wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]);
1971    wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]);
1972    wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]);
1973 
1974    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1975                           VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info);
1976 
1977    return VK_SUCCESS;
1978 }
1979 
1980 static VkResult
copy_image_prepare_gfx_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,struct vk_image * src_img,struct vk_image * dst_img,const VkImageCopy2 * region)1981 copy_image_prepare_gfx_push_const(struct vk_command_buffer *cmd,
1982                                   struct vk_meta_device *meta,
1983                                   const struct vk_meta_copy_image_key *key,
1984                                   VkPipelineLayout pipeline_layout,
1985                                   struct vk_image *src_img,
1986                                   struct vk_image *dst_img,
1987                                   const VkImageCopy2 *region)
1988 {
1989    struct vk_device *dev = cmd->base.device;
1990    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1991    VkOffset3D src_img_offs =
1992       base_layer_as_offset(key->src.view.type, region->srcOffset,
1993                            region->srcSubresource.baseArrayLayer);
1994 
1995    struct vk_meta_copy_image_fs_info info = {
1996       .dst_to_src_offs = {
1997          /* The subtraction may lead to negative values, but that's fine
1998 	  * because the shader does the mirror operation thus guaranteeing
1999 	  * a src_coords >= 0. */
2000          .x = src_img_offs.x - region->dstOffset.x,
2001          .y = src_img_offs.y - region->dstOffset.y,
2002          /* Render image view only contains the layers needed for rendering,
2003           * so we consider the coordinate containing the layer to always be
2004           * zero.
2005 	  */
2006          .z = src_img_offs.z,
2007       },
2008    };
2009 
2010    disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
2011                           VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info);
2012 
2013    return VK_SUCCESS;
2014 }
2015 
2016 static void
copy_image_region_gfx(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * src_img,VkImageLayout src_image_layout,const struct vk_meta_copy_image_properties * src_props,struct vk_image * dst_img,VkImageLayout dst_image_layout,const struct vk_meta_copy_image_properties * dst_props,const VkImageCopy2 * region)2017 copy_image_region_gfx(struct vk_command_buffer *cmd,
2018                       struct vk_meta_device *meta, struct vk_image *src_img,
2019                       VkImageLayout src_image_layout,
2020                       const struct vk_meta_copy_image_properties *src_props,
2021                       struct vk_image *dst_img, VkImageLayout dst_image_layout,
2022                       const struct vk_meta_copy_image_properties *dst_props,
2023                       const VkImageCopy2 *region)
2024 {
2025    struct vk_device *dev = cmd->base.device;
2026    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2027 
2028    /* We only special-case 1D_ARRAY to move the layer ID to the second
2029     * component instead of the third. For all other view types, let's pick an
2030     * invalid VkImageViewType value so we don't end up creating the same
2031     * pipeline multiple times. */
2032    VkImageViewType dst_view_type =
2033       dst_img->image_type == VK_IMAGE_TYPE_1D && dst_img->array_layers > 1
2034          ? VK_IMAGE_VIEW_TYPE_1D_ARRAY
2035          : (VkImageViewType)-1;
2036 
2037    assert(region->srcSubresource.aspectMask ==
2038           region->dstSubresource.aspectMask);
2039 
2040    struct vk_meta_copy_image_key key = {
2041       .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE,
2042       .bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS,
2043       .samples = src_img->samples,
2044       .aspects = region->srcSubresource.aspectMask,
2045       .src.view = img_copy_view_info(vk_image_sampled_view_type(src_img),
2046                                      region->srcSubresource.aspectMask, src_img,
2047                                      src_props),
2048       .dst.view = img_copy_view_info(dst_view_type,
2049                                      region->dstSubresource.aspectMask, dst_img,
2050                                      dst_props),
2051    };
2052 
2053    VkPipelineLayout pipeline_layout;
2054    VkPipeline pipeline;
2055    VkResult result =
2056       get_copy_image_gfx_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
2057    if (unlikely(result != VK_SUCCESS)) {
2058       vk_command_buffer_set_error(cmd, result);
2059       return;
2060    }
2061 
2062    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2063                          VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
2064 
2065    result = copy_image_prepare_gfx_desc_set(cmd, meta, &key, pipeline_layout,
2066                                             src_img, src_image_layout, dst_img,
2067                                             dst_image_layout, region);
2068    if (unlikely(result != VK_SUCCESS)) {
2069       vk_command_buffer_set_error(cmd, result);
2070       return;
2071    }
2072 
2073    result = copy_image_prepare_gfx_push_const(cmd, meta, &key, pipeline_layout,
2074                                               src_img, dst_img, region);
2075    if (unlikely(result != VK_SUCCESS)) {
2076       vk_command_buffer_set_error(cmd, result);
2077       return;
2078    }
2079 
2080    copy_draw(cmd, meta, dst_img, dst_image_layout, &region->dstSubresource,
2081              &region->dstOffset, &region->extent, &key.dst.view);
2082 }
2083 
2084 static void
copy_image_region_compute(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * src_img,VkImageLayout src_image_layout,const struct vk_meta_copy_image_properties * src_props,struct vk_image * dst_img,VkImageLayout dst_image_layout,const struct vk_meta_copy_image_properties * dst_props,const VkImageCopy2 * region)2085 copy_image_region_compute(struct vk_command_buffer *cmd,
2086                           struct vk_meta_device *meta, struct vk_image *src_img,
2087                           VkImageLayout src_image_layout,
2088                           const struct vk_meta_copy_image_properties *src_props,
2089                           struct vk_image *dst_img,
2090                           VkImageLayout dst_image_layout,
2091                           const struct vk_meta_copy_image_properties *dst_props,
2092                           const VkImageCopy2 *region)
2093 {
2094    struct vk_device *dev = cmd->base.device;
2095    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2096    VkImageViewType dst_view_type = vk_image_storage_view_type(dst_img);
2097 
2098    assert(region->srcSubresource.aspectMask ==
2099           region->dstSubresource.aspectMask);
2100 
2101    struct vk_meta_copy_image_key key = {
2102       .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE,
2103       .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE,
2104       .samples = src_img->samples,
2105       .aspects = region->srcSubresource.aspectMask,
2106       .src.view = img_copy_view_info(vk_image_sampled_view_type(src_img),
2107                                      region->srcSubresource.aspectMask, src_img,
2108                                      src_props),
2109       .dst.view = img_copy_view_info(
2110          dst_view_type, region->dstSubresource.aspectMask, dst_img, dst_props),
2111    };
2112 
2113    uint32_t src_pix_per_tile = src_props->tile_size.width *
2114                                src_props->tile_size.height *
2115                                src_props->tile_size.depth;
2116    uint32_t dst_pix_per_tile = dst_props->tile_size.width *
2117                                dst_props->tile_size.height *
2118                                dst_props->tile_size.depth;
2119    enum vk_meta_copy_image_align_policy align_policy;
2120 
2121    if (src_pix_per_tile >= dst_pix_per_tile) {
2122       key.wg_size[0] = src_props->tile_size.width;
2123       key.wg_size[1] = src_props->tile_size.height;
2124       key.wg_size[2] = src_props->tile_size.depth;
2125       align_policy = VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE;
2126    } else {
2127       key.wg_size[0] = dst_props->tile_size.width;
2128       key.wg_size[1] = dst_props->tile_size.height;
2129       key.wg_size[2] = dst_props->tile_size.depth;
2130       align_policy = VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE;
2131    }
2132 
2133    VkPipelineLayout pipeline_layout;
2134    VkPipeline pipeline;
2135    VkResult result = get_copy_image_compute_pipeline(
2136       dev, meta, &key, &pipeline_layout, &pipeline);
2137    if (unlikely(result != VK_SUCCESS)) {
2138       vk_command_buffer_set_error(cmd, result);
2139       return;
2140    }
2141 
2142    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2143                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2144 
2145    result = copy_image_prepare_compute_desc_set(
2146       cmd, meta, &key, pipeline_layout, src_img, src_image_layout, dst_img,
2147       dst_image_layout, region);
2148    if (unlikely(result != VK_SUCCESS)) {
2149       vk_command_buffer_set_error(cmd, result);
2150       return;
2151    }
2152 
2153    assert(key.wg_size[0] && key.wg_size[1] && key.wg_size[2]);
2154 
2155    uint32_t wg_count[3] = {0};
2156 
2157    result = copy_image_prepare_compute_push_const(
2158       cmd, meta, &key, pipeline_layout, src_img, dst_img, align_policy, region,
2159       wg_count);
2160    if (unlikely(result != VK_SUCCESS)) {
2161       vk_command_buffer_set_error(cmd, result);
2162       return;
2163    }
2164 
2165    disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1],
2166                      wg_count[2]);
2167 }
2168 
2169 void
vk_meta_copy_image(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageInfo2 * info,const struct vk_meta_copy_image_properties * src_props,const struct vk_meta_copy_image_properties * dst_props,VkPipelineBindPoint bind_point)2170 vk_meta_copy_image(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2171                    const VkCopyImageInfo2 *info,
2172                    const struct vk_meta_copy_image_properties *src_props,
2173                    const struct vk_meta_copy_image_properties *dst_props,
2174                    VkPipelineBindPoint bind_point)
2175 {
2176    VK_FROM_HANDLE(vk_image, src_img, info->srcImage);
2177    VK_FROM_HANDLE(vk_image, dst_img, info->dstImage);
2178 
2179    for (uint32_t i = 0; i < info->regionCount; i++) {
2180       VkImageCopy2 region = info->pRegions[i];
2181 
2182       region.extent = vk_image_extent_to_elements(src_img, region.extent);
2183       region.srcOffset = vk_image_offset_to_elements(src_img, region.srcOffset);
2184       region.dstOffset = vk_image_offset_to_elements(dst_img, region.dstOffset);
2185 
2186       if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2187          copy_image_region_gfx(cmd, meta, src_img, info->srcImageLayout,
2188                                src_props, dst_img, info->dstImageLayout,
2189                                dst_props, &region);
2190       } else {
2191          copy_image_region_compute(cmd, meta, src_img, info->srcImageLayout,
2192                                    src_props, dst_img, info->dstImageLayout,
2193                                    dst_props, &region);
2194       }
2195    }
2196 }
2197 
2198 static nir_shader *
build_copy_buffer_shader(const struct vk_meta_device * meta,const void * key_data)2199 build_copy_buffer_shader(const struct vk_meta_device *meta,
2200                          const void *key_data)
2201 {
2202    const struct vk_meta_copy_buffer_key *key = key_data;
2203    nir_builder builder = nir_builder_init_simple_shader(
2204       MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer");
2205    nir_builder *b = &builder;
2206 
2207    b->shader->info.workgroup_size[0] =
2208       vk_meta_buffer_access_wg_size(meta, key->chunk_size);
2209    b->shader->info.workgroup_size[1] = 1;
2210    b->shader->info.workgroup_size[2] = 1;
2211 
2212    uint32_t chunk_bit_size, chunk_comp_count;
2213 
2214    assert(util_is_power_of_two_nonzero(key->chunk_size));
2215    if (key->chunk_size <= 4) {
2216       chunk_bit_size = key->chunk_size * 8;
2217       chunk_comp_count = 1;
2218    } else {
2219       chunk_bit_size = 32;
2220       chunk_comp_count = key->chunk_size / 4;
2221    }
2222 
2223    assert(chunk_comp_count < NIR_MAX_VEC_COMPONENTS);
2224 
2225    nir_def *global_id = nir_load_global_invocation_id(b, 32);
2226    nir_def *copy_id = nir_channel(b, global_id, 0);
2227    nir_def *offset = nir_imul_imm(b, copy_id, key->chunk_size);
2228    nir_def *size = load_info(b, struct vk_meta_copy_buffer_info, size);
2229 
2230    nir_push_if(b, nir_ult(b, offset, size));
2231 
2232    offset = nir_u2u64(b, offset);
2233 
2234    nir_def *src_addr = load_info(b, struct vk_meta_copy_buffer_info, src_addr);
2235    nir_def *dst_addr = nir_load_push_constant(b, 1, 64, nir_imm_int(b, 8));
2236    nir_def *data = nir_build_load_global(b, chunk_comp_count, chunk_bit_size,
2237                                          nir_iadd(b, src_addr, offset),
2238                                          .align_mul = chunk_bit_size / 8);
2239 
2240    nir_build_store_global(b, data, nir_iadd(b, dst_addr, offset),
2241                           .align_mul = key->chunk_size);
2242 
2243    nir_pop_if(b, NULL);
2244 
2245    return b->shader;
2246 }
2247 
2248 static VkResult
get_copy_buffer_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)2249 get_copy_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta,
2250                          const struct vk_meta_copy_buffer_key *key,
2251                          VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
2252 {
2253    VkResult result = get_copy_pipeline_layout(
2254       device, meta, "vk-meta-copy-buffer-pipeline-layout",
2255       VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_buffer_info),
2256       NULL, 0, layout_out);
2257 
2258    if (unlikely(result != VK_SUCCESS))
2259       return result;
2260 
2261    return get_compute_copy_pipeline(device, meta, *layout_out,
2262                                     build_copy_buffer_shader, key, sizeof(*key),
2263                                     pipeline_out);
2264 }
2265 
2266 static void
copy_buffer_region(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer src,VkBuffer dst,const VkBufferCopy2 * region)2267 copy_buffer_region(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2268                    VkBuffer src, VkBuffer dst, const VkBufferCopy2 *region)
2269 {
2270    struct vk_device *dev = cmd->base.device;
2271    const struct vk_physical_device *pdev = dev->physical;
2272    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2273    VkResult result;
2274 
2275    struct vk_meta_copy_buffer_key key = {
2276       .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_PIPELINE,
2277    };
2278 
2279    VkDeviceSize size = region->size;
2280    VkDeviceAddress src_addr =
2281       vk_meta_buffer_address(dev, src, region->srcOffset, size);
2282    VkDeviceAddress dst_addr =
2283       vk_meta_buffer_address(dev, dst, region->dstOffset, size);
2284 
2285    /* Combine the size and src/dst address to extract the alignment. */
2286    uint64_t align = src_addr | dst_addr | size;
2287 
2288    assert(align != 0);
2289 
2290    /* Pick the first power-of-two of the combined src/dst address and size as
2291     * our alignment. We limit the chunk size to 16 bytes (a uvec4) for now.
2292     */
2293    key.chunk_size = MIN2(16, 1 << (ffs(align) - 1));
2294 
2295    VkPipelineLayout pipeline_layout;
2296    VkPipeline pipeline;
2297    result =
2298       get_copy_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
2299    if (unlikely(result != VK_SUCCESS)) {
2300       vk_command_buffer_set_error(cmd, result);
2301       return;
2302    }
2303 
2304    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2305                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2306 
2307    const uint32_t optimal_wg_size =
2308       vk_meta_buffer_access_wg_size(meta, key.chunk_size);
2309    const uint32_t per_wg_copy_size = optimal_wg_size * key.chunk_size;
2310    uint32_t max_per_dispatch_size =
2311       pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size;
2312 
2313    assert(optimal_wg_size <= pdev->properties.maxComputeWorkGroupSize[0]);
2314 
2315    while (size) {
2316       struct vk_meta_copy_buffer_info args = {
2317          .size = MIN2(size, max_per_dispatch_size),
2318          .src_addr = src_addr,
2319          .dst_addr = dst_addr,
2320       };
2321       uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size);
2322 
2323       disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
2324                              VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
2325                              &args);
2326 
2327       disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1);
2328 
2329       src_addr += args.size;
2330       dst_addr += args.size;
2331       size -= args.size;
2332    }
2333 }
2334 
2335 void
vk_meta_copy_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyBufferInfo2 * info)2336 vk_meta_copy_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2337                     const VkCopyBufferInfo2 *info)
2338 {
2339    for (unsigned i = 0; i < info->regionCount; i++) {
2340       const VkBufferCopy2 *region = &info->pRegions[i];
2341 
2342       copy_buffer_region(cmd, meta, info->srcBuffer, info->dstBuffer, region);
2343    }
2344 }
2345 
2346 void
vk_meta_update_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size,const void * data)2347 vk_meta_update_buffer(struct vk_command_buffer *cmd,
2348                       struct vk_meta_device *meta, VkBuffer buffer,
2349                       VkDeviceSize offset, VkDeviceSize size, const void *data)
2350 {
2351    VkResult result;
2352 
2353    const VkBufferCreateInfo tmp_buffer_info = {
2354       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2355       .size = size,
2356       .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
2357       .queueFamilyIndexCount = 1,
2358       .pQueueFamilyIndices = &cmd->pool->queue_family_index,
2359    };
2360 
2361    VkBuffer tmp_buffer;
2362    result = vk_meta_create_buffer(cmd, meta, &tmp_buffer_info, &tmp_buffer);
2363    if (unlikely(result != VK_SUCCESS)) {
2364       vk_command_buffer_set_error(cmd, result);
2365       return;
2366    }
2367 
2368    void *tmp_buffer_map;
2369    result = meta->cmd_bind_map_buffer(cmd, meta, tmp_buffer, &tmp_buffer_map);
2370    if (unlikely(result != VK_SUCCESS)) {
2371       vk_command_buffer_set_error(cmd, result);
2372       return;
2373    }
2374 
2375    memcpy(tmp_buffer_map, data, size);
2376 
2377    const VkBufferCopy2 copy_region = {
2378       .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
2379       .srcOffset = 0,
2380       .dstOffset = offset,
2381       .size = size,
2382    };
2383    const VkCopyBufferInfo2 copy_info = {
2384       .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
2385       .srcBuffer = tmp_buffer,
2386       .dstBuffer = buffer,
2387       .regionCount = 1,
2388       .pRegions = &copy_region,
2389    };
2390 
2391    vk_meta_copy_buffer(cmd, meta, &copy_info);
2392 }
2393 
2394 static nir_shader *
build_fill_buffer_shader(const struct vk_meta_device * meta,UNUSED const void * key_data)2395 build_fill_buffer_shader(const struct vk_meta_device *meta,
2396                          UNUSED const void *key_data)
2397 {
2398    nir_builder builder = nir_builder_init_simple_shader(
2399       MESA_SHADER_COMPUTE, NULL, "vk-meta-fill-buffer");
2400    nir_builder *b = &builder;
2401 
2402    b->shader->info.workgroup_size[0] = vk_meta_buffer_access_wg_size(meta, 4);
2403    b->shader->info.workgroup_size[1] = 1;
2404    b->shader->info.workgroup_size[2] = 1;
2405 
2406    nir_def *global_id = nir_load_global_invocation_id(b, 32);
2407    nir_def *copy_id = nir_channel(b, global_id, 0);
2408    nir_def *offset = nir_imul_imm(b, copy_id, 4);
2409    nir_def *size = load_info(b, struct vk_meta_fill_buffer_info, size);
2410    nir_def *data = load_info(b, struct vk_meta_fill_buffer_info, data);
2411 
2412    nir_push_if(b, nir_ult(b, offset, size));
2413 
2414    offset = nir_u2u64(b, offset);
2415 
2416    nir_def *buf_addr =
2417       load_info(b, struct vk_meta_fill_buffer_info, buf_addr);
2418 
2419    nir_build_store_global(b, data, nir_iadd(b, buf_addr, offset),
2420                           .align_mul = 4);
2421 
2422    nir_pop_if(b, NULL);
2423 
2424    return b->shader;
2425 }
2426 
2427 static VkResult
get_fill_buffer_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_fill_buffer_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)2428 get_fill_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta,
2429                          const struct vk_meta_fill_buffer_key *key,
2430                          VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
2431 {
2432    VkResult result = get_copy_pipeline_layout(
2433       device, meta, "vk-meta-fill-buffer-pipeline-layout",
2434       VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_fill_buffer_info), NULL, 0,
2435       layout_out);
2436    if (unlikely(result != VK_SUCCESS))
2437       return result;
2438 
2439    return get_compute_copy_pipeline(device, meta, *layout_out,
2440                                     build_fill_buffer_shader, key, sizeof(*key),
2441                                     pipeline_out);
2442 }
2443 
2444 void
vk_meta_fill_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size,uint32_t data)2445 vk_meta_fill_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2446                     VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
2447                     uint32_t data)
2448 {
2449    VK_FROM_HANDLE(vk_buffer, buf, buffer);
2450    struct vk_device *dev = cmd->base.device;
2451    const struct vk_physical_device *pdev = dev->physical;
2452    const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2453    VkResult result;
2454 
2455    struct vk_meta_fill_buffer_key key = {
2456       .key_type = VK_META_OBJECT_KEY_FILL_BUFFER_PIPELINE,
2457    };
2458 
2459    VkPipelineLayout pipeline_layout;
2460    VkPipeline pipeline;
2461    result =
2462       get_fill_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
2463    if (unlikely(result != VK_SUCCESS)) {
2464       vk_command_buffer_set_error(cmd, result);
2465       return;
2466    }
2467 
2468    disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2469                          VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2470 
2471    /* From the Vulkan 1.3.290 spec:
2472     *
2473     *   "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not a
2474     *    multiple of 4, then the nearest smaller multiple is used."
2475     *
2476     * hence the mask to align the size on 4 bytes here.
2477     */
2478    size = vk_buffer_range(buf, offset, size) & ~3u;
2479 
2480    const uint32_t optimal_wg_size = vk_meta_buffer_access_wg_size(meta, 4);
2481    const uint32_t per_wg_copy_size = optimal_wg_size * 4;
2482    uint32_t max_per_dispatch_size =
2483       pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size;
2484 
2485    while (size > 0) {
2486       struct vk_meta_fill_buffer_info args = {
2487          .size = MIN2(size, max_per_dispatch_size),
2488          .buf_addr = vk_meta_buffer_address(dev, buffer, offset, size),
2489          .data = data,
2490       };
2491       uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size);
2492 
2493       disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
2494                              VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
2495                              &args);
2496 
2497       disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1);
2498 
2499       offset += args.size;
2500       size -= args.size;
2501    }
2502 }
2503