1 /*
2 * Copyright © 2023 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir/nir_builder.h"
25 #include "nir/nir_format_convert.h"
26
27 #include "vk_buffer.h"
28 #include "vk_command_buffer.h"
29 #include "vk_command_pool.h"
30 #include "vk_device.h"
31 #include "vk_format.h"
32 #include "vk_meta.h"
33 #include "vk_meta_private.h"
34 #include "vk_physical_device.h"
35 #include "vk_pipeline.h"
36
37 #include "util/format/u_format.h"
38
39 struct vk_meta_fill_buffer_key {
40 enum vk_meta_object_key_type key_type;
41 };
42
43 struct vk_meta_copy_buffer_key {
44 enum vk_meta_object_key_type key_type;
45
46 uint32_t chunk_size;
47 };
48
49 struct vk_meta_copy_image_view {
50 VkImageViewType type;
51
52 union {
53 struct {
54 VkFormat format;
55 } color;
56 struct {
57 struct {
58 VkFormat format;
59 nir_component_mask_t component_mask;
60 } depth, stencil;
61 };
62 };
63 };
64
65 struct vk_meta_copy_buffer_image_key {
66 enum vk_meta_object_key_type key_type;
67
68 VkPipelineBindPoint bind_point;
69
70 struct {
71 struct vk_meta_copy_image_view view;
72
73 VkImageAspectFlagBits aspect;
74 } img;
75
76 uint32_t wg_size[3];
77 };
78
79 struct vk_meta_copy_image_key {
80 enum vk_meta_object_key_type key_type;
81
82 VkPipelineBindPoint bind_point;
83
84 /* One source per-aspect being copied. */
85 struct {
86 struct vk_meta_copy_image_view view;
87 } src, dst;
88
89 VkImageAspectFlagBits aspects;
90 VkSampleCountFlagBits samples;
91
92 uint32_t wg_size[3];
93 };
94
95 #define load_info(__b, __type, __field_name) \
96 nir_load_push_constant((__b), 1, \
97 sizeof(((__type *)NULL)->__field_name) * 8, \
98 nir_imm_int(b, offsetof(__type, __field_name)))
99
100 struct vk_meta_fill_buffer_info {
101 uint64_t buf_addr;
102 uint32_t data;
103 uint32_t size;
104 };
105
106 struct vk_meta_copy_buffer_info {
107 uint64_t src_addr;
108 uint64_t dst_addr;
109 uint32_t size;
110 };
111
112 struct vk_meta_copy_buffer_image_info {
113 struct {
114 uint64_t addr;
115 uint32_t row_stride;
116 uint32_t image_stride;
117 } buf;
118
119 struct {
120 struct {
121 uint32_t x, y, z;
122 } offset;
123 } img;
124
125 /* Workgroup size should be selected based on the image tile size. This
126 * means we can issue threads outside the image area we want to copy
127 * from/to. This field encodes the copy IDs that should be skipped, and
128 * also serve as an adjustment for the buffer/image coordinates. */
129 struct {
130 struct {
131 uint32_t x, y, z;
132 } start, end;
133 } copy_id_range;
134 };
135
136 struct vk_meta_copy_image_fs_info {
137 struct {
138 int32_t x, y, z;
139 } dst_to_src_offs;
140 };
141
142 struct vk_meta_copy_image_cs_info {
143 struct {
144 struct {
145 uint32_t x, y, z;
146 } offset;
147 } src_img, dst_img;
148
149 /* Workgroup size should be selected based on the image tile size. This
150 * means we can issue threads outside the image area we want to copy
151 * from/to. This field encodes the copy IDs that should be skipped, and
152 * also serve as an adjustment for the buffer/image coordinates. */
153 struct {
154 struct {
155 uint32_t x, y, z;
156 } start, end;
157 } copy_id_range;
158 };
159
160 static VkOffset3D
base_layer_as_offset(VkImageViewType view_type,VkOffset3D offset,uint32_t base_layer)161 base_layer_as_offset(VkImageViewType view_type, VkOffset3D offset,
162 uint32_t base_layer)
163 {
164 switch (view_type) {
165 case VK_IMAGE_VIEW_TYPE_1D:
166 return (VkOffset3D){
167 .x = offset.x,
168 };
169
170 case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
171 return (VkOffset3D){
172 .x = offset.x,
173 .y = base_layer,
174 };
175
176 case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
177 case VK_IMAGE_VIEW_TYPE_CUBE:
178 case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
179 return (VkOffset3D){
180 .x = offset.x,
181 .y = offset.y,
182 .z = base_layer,
183 };
184
185 case VK_IMAGE_VIEW_TYPE_2D:
186 case VK_IMAGE_VIEW_TYPE_3D:
187 return offset;
188
189 default:
190 assert(!"Invalid view type");
191 return (VkOffset3D){0};
192 }
193 }
194
195 static VkExtent3D
layer_count_as_extent(VkImageViewType view_type,VkExtent3D extent,uint32_t layer_count)196 layer_count_as_extent(VkImageViewType view_type, VkExtent3D extent,
197 uint32_t layer_count)
198 {
199 switch (view_type) {
200 case VK_IMAGE_VIEW_TYPE_1D:
201 return (VkExtent3D){
202 .width = extent.width,
203 .height = 1,
204 .depth = 1,
205 };
206
207 case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
208 return (VkExtent3D){
209 .width = extent.width,
210 .height = layer_count,
211 .depth = 1,
212 };
213
214 case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
215 case VK_IMAGE_VIEW_TYPE_CUBE:
216 case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
217 return (VkExtent3D){
218 .width = extent.width,
219 .height = extent.height,
220 .depth = layer_count,
221 };
222
223 case VK_IMAGE_VIEW_TYPE_2D:
224 case VK_IMAGE_VIEW_TYPE_3D:
225 return extent;
226
227 default:
228 assert(!"Invalid view type");
229 return (VkExtent3D){0};
230 }
231 }
232
233 #define COPY_SHADER_BINDING(__binding, __type, __stage) \
234 { \
235 .binding = __binding, \
236 .descriptorCount = 1, \
237 .descriptorType = VK_DESCRIPTOR_TYPE_##__type, \
238 .stageFlags = VK_SHADER_STAGE_##__stage##_BIT, \
239 }
240
241 static VkResult
get_copy_pipeline_layout(struct vk_device * device,struct vk_meta_device * meta,const char * key,VkShaderStageFlagBits shader_stage,size_t push_const_size,const struct VkDescriptorSetLayoutBinding * bindings,uint32_t binding_count,VkPipelineLayout * layout_out)242 get_copy_pipeline_layout(struct vk_device *device, struct vk_meta_device *meta,
243 const char *key, VkShaderStageFlagBits shader_stage,
244 size_t push_const_size,
245 const struct VkDescriptorSetLayoutBinding *bindings,
246 uint32_t binding_count, VkPipelineLayout *layout_out)
247 {
248 const VkDescriptorSetLayoutCreateInfo set_layout = {
249 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
250 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
251 .bindingCount = binding_count,
252 .pBindings = bindings,
253 };
254
255 const VkPushConstantRange push_range = {
256 .stageFlags = shader_stage,
257 .offset = 0,
258 .size = push_const_size,
259 };
260
261 return vk_meta_get_pipeline_layout(device, meta, &set_layout, &push_range,
262 key, strlen(key) + 1, layout_out);
263 }
264
265 #define COPY_PUSH_SET_IMG_DESC(__binding, __type, __iview, __layout) \
266 { \
267 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, \
268 .dstBinding = __binding, \
269 .descriptorType = VK_DESCRIPTOR_TYPE_##__type##_IMAGE, \
270 .descriptorCount = 1, \
271 .pImageInfo = &(VkDescriptorImageInfo){ \
272 .imageView = __iview, \
273 .imageLayout = __layout, \
274 }, \
275 }
276
277 static VkFormat
copy_img_view_format_for_aspect(const struct vk_meta_copy_image_view * info,VkImageAspectFlagBits aspect)278 copy_img_view_format_for_aspect(const struct vk_meta_copy_image_view *info,
279 VkImageAspectFlagBits aspect)
280 {
281 switch (aspect) {
282 case VK_IMAGE_ASPECT_COLOR_BIT:
283 return info->color.format;
284
285 case VK_IMAGE_ASPECT_DEPTH_BIT:
286 return info->depth.format;
287
288 case VK_IMAGE_ASPECT_STENCIL_BIT:
289 return info->stencil.format;
290
291 default:
292 assert(!"Unsupported aspect");
293 return VK_FORMAT_UNDEFINED;
294 }
295 }
296
297 static bool
depth_stencil_interleaved(const struct vk_meta_copy_image_view * view)298 depth_stencil_interleaved(const struct vk_meta_copy_image_view *view)
299 {
300 return view->stencil.format != VK_FORMAT_UNDEFINED &&
301 view->depth.format != VK_FORMAT_UNDEFINED &&
302 view->stencil.format == view->depth.format &&
303 view->stencil.component_mask != 0 &&
304 view->depth.component_mask != 0 &&
305 (view->stencil.component_mask & view->depth.component_mask) == 0;
306 }
307
308 static VkResult
get_gfx_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,VkPipelineLayout layout,VkSampleCountFlagBits samples,nir_shader * (* build_nir)(const struct vk_meta_device *,const void *),VkImageAspectFlagBits aspects,const struct vk_meta_copy_image_view * view,const void * key_data,size_t key_size,VkPipeline * pipeline_out)309 get_gfx_copy_pipeline(
310 struct vk_device *device, struct vk_meta_device *meta,
311 VkPipelineLayout layout, VkSampleCountFlagBits samples,
312 nir_shader *(*build_nir)(const struct vk_meta_device *, const void *),
313 VkImageAspectFlagBits aspects, const struct vk_meta_copy_image_view *view,
314 const void *key_data, size_t key_size, VkPipeline *pipeline_out)
315 {
316 VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size);
317 if (from_cache != VK_NULL_HANDLE) {
318 *pipeline_out = from_cache;
319 return VK_SUCCESS;
320 }
321
322 const VkPipelineShaderStageNirCreateInfoMESA fs_nir_info = {
323 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
324 .nir = build_nir(meta, key_data),
325 };
326 const VkPipelineShaderStageCreateInfo fs_info = {
327 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
328 .pNext = &fs_nir_info,
329 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
330 .pName = "main",
331 };
332
333 VkPipelineDepthStencilStateCreateInfo ds_info = {
334 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
335 };
336 VkPipelineDynamicStateCreateInfo dyn_info = {
337 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
338 };
339 struct vk_meta_rendering_info render = {
340 .samples = samples,
341 };
342
343 const VkGraphicsPipelineCreateInfo info = {
344 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
345 .stageCount = 1,
346 .pStages = &fs_info,
347 .pDepthStencilState = &ds_info,
348 .pDynamicState = &dyn_info,
349 .layout = layout,
350 };
351
352 if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
353 VkFormat fmt =
354 copy_img_view_format_for_aspect(view, aspects);
355
356 render.color_attachment_formats[render.color_attachment_count] = fmt;
357 render.color_attachment_write_masks[render.color_attachment_count] =
358 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
359 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
360 render.color_attachment_count++;
361 }
362
363 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
364 VkFormat fmt =
365 copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_DEPTH_BIT);
366
367 render.color_attachment_formats[render.color_attachment_count] = fmt;
368 render.color_attachment_write_masks[render.color_attachment_count] =
369 (VkColorComponentFlags)view->depth.component_mask;
370 render.color_attachment_count++;
371 }
372
373 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
374 VkFormat fmt =
375 copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_STENCIL_BIT);
376
377 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT &&
378 depth_stencil_interleaved(view)) {
379 render.color_attachment_write_masks[0] |= view->stencil.component_mask;
380 } else {
381 render.color_attachment_formats[render.color_attachment_count] = fmt;
382 render.color_attachment_write_masks[render.color_attachment_count] =
383 (VkColorComponentFlags)view->stencil.component_mask;
384 render.color_attachment_count++;
385 }
386 }
387
388 VkResult result = vk_meta_create_graphics_pipeline(
389 device, meta, &info, &render, key_data, key_size, pipeline_out);
390
391 ralloc_free(fs_nir_info.nir);
392
393 return result;
394 }
395
396 static VkResult
get_compute_copy_pipeline(struct vk_device * device,struct vk_meta_device * meta,VkPipelineLayout layout,nir_shader * (* build_nir)(const struct vk_meta_device *,const void *),const void * key_data,size_t key_size,VkPipeline * pipeline_out)397 get_compute_copy_pipeline(
398 struct vk_device *device, struct vk_meta_device *meta,
399 VkPipelineLayout layout,
400 nir_shader *(*build_nir)(const struct vk_meta_device *, const void *),
401 const void *key_data, size_t key_size, VkPipeline *pipeline_out)
402 {
403 VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size);
404 if (from_cache != VK_NULL_HANDLE) {
405 *pipeline_out = from_cache;
406 return VK_SUCCESS;
407 }
408
409 const VkPipelineShaderStageNirCreateInfoMESA cs_nir_info = {
410 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
411 .nir = build_nir(meta, key_data),
412 };
413
414 const VkComputePipelineCreateInfo info = {
415 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
416 .stage = {
417 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
418 .pNext = &cs_nir_info,
419 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
420 .pName = "main",
421 },
422 .layout = layout,
423 };
424
425 VkResult result = vk_meta_create_compute_pipeline(
426 device, meta, &info, key_data, key_size, pipeline_out);
427
428 ralloc_free(cs_nir_info.nir);
429
430 return result;
431 }
432
433 static VkResult
copy_create_src_image_view(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,const struct vk_meta_copy_image_view * view_info,VkImageAspectFlags aspect,const VkImageSubresourceLayers * subres,VkImageView * view_out)434 copy_create_src_image_view(struct vk_command_buffer *cmd,
435 struct vk_meta_device *meta, struct vk_image *img,
436 const struct vk_meta_copy_image_view *view_info,
437 VkImageAspectFlags aspect,
438 const VkImageSubresourceLayers *subres,
439 VkImageView *view_out)
440 {
441 const VkImageViewUsageCreateInfo usage = {
442 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
443 .usage = VK_IMAGE_USAGE_SAMPLED_BIT,
444 };
445
446 VkFormat format = copy_img_view_format_for_aspect(view_info, aspect);
447
448 VkImageViewCreateInfo info = {
449 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
450 .pNext = &usage,
451 .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
452 .image = vk_image_to_handle(img),
453 .viewType = view_info->type,
454 .format = format,
455 .subresourceRange = {
456 .aspectMask = vk_format_aspects(format),
457 .baseMipLevel = subres->mipLevel,
458 .levelCount = 1,
459 .baseArrayLayer = 0,
460 .layerCount = img->array_layers,
461 },
462 };
463
464 if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
465 nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_STENCIL_BIT
466 ? view_info->stencil.component_mask
467 : view_info->depth.component_mask;
468 assert(comp_mask != 0);
469
470 VkComponentSwizzle *swizzle = &info.components.r;
471 unsigned num_comps = util_bitcount(comp_mask);
472 unsigned first_comp = ffs(comp_mask) - 1;
473
474 assert(first_comp + num_comps <= 4);
475
476 for (unsigned i = 0; i < num_comps; i++)
477 swizzle[i] = first_comp + i + VK_COMPONENT_SWIZZLE_R;
478 }
479
480 return vk_meta_create_image_view(cmd, meta, &info, view_out);
481 }
482
483 static VkResult
copy_create_dst_image_view(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,const struct vk_meta_copy_image_view * view_info,VkImageAspectFlags aspect,const VkOffset3D * offset,const VkExtent3D * extent,const VkImageSubresourceLayers * subres,VkPipelineBindPoint bind_point,VkImageView * view_out)484 copy_create_dst_image_view(struct vk_command_buffer *cmd,
485 struct vk_meta_device *meta, struct vk_image *img,
486 const struct vk_meta_copy_image_view *view_info,
487 VkImageAspectFlags aspect, const VkOffset3D *offset,
488 const VkExtent3D *extent,
489 const VkImageSubresourceLayers *subres,
490 VkPipelineBindPoint bind_point,
491 VkImageView *view_out)
492 {
493 uint32_t layer_count, base_layer;
494 VkFormat format = copy_img_view_format_for_aspect(view_info, aspect);
495 VkImageAspectFlags fmt_aspects = vk_format_aspects(format);
496 const VkImageViewUsageCreateInfo usage = {
497 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
498 .usage = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE
499 ? VK_IMAGE_USAGE_STORAGE_BIT
500 : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
501 };
502
503 if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
504 layer_count =
505 MAX2(extent->depth, vk_image_subresource_layer_count(img, subres));
506 base_layer = img->image_type == VK_IMAGE_TYPE_3D ? offset->z
507 : subres->baseArrayLayer;
508 } else {
509 /* Always create a view covering the whole image in case of compute. */
510 layer_count = img->image_type == VK_IMAGE_TYPE_3D ? 1 : img->array_layers;
511 base_layer = 0;
512 }
513
514 const VkImageViewCreateInfo info = {
515 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
516 .pNext = &usage,
517 .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
518 .image = vk_image_to_handle(img),
519 .viewType = bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS
520 ? vk_image_render_view_type(img, layer_count)
521 : vk_image_storage_view_type(img),
522 .format = format,
523 .subresourceRange = {
524 .aspectMask = fmt_aspects,
525 .baseMipLevel = subres->mipLevel,
526 .levelCount = 1,
527 .baseArrayLayer = base_layer,
528 .layerCount = layer_count,
529 },
530 };
531
532 return vk_meta_create_image_view(cmd, meta, &info, view_out);
533 }
534
535 static nir_def *
trim_img_coords(nir_builder * b,VkImageViewType view_type,nir_def * coords)536 trim_img_coords(nir_builder *b, VkImageViewType view_type, nir_def *coords)
537 {
538 switch (view_type) {
539 case VK_IMAGE_VIEW_TYPE_1D:
540 return nir_channel(b, coords, 0);
541
542 case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
543 case VK_IMAGE_VIEW_TYPE_2D:
544 return nir_trim_vector(b, coords, 2);
545
546 default:
547 return nir_trim_vector(b, coords, 3);
548 }
549 }
550
551 static nir_def *
copy_img_buf_addr(nir_builder * b,enum pipe_format pfmt,nir_def * coords)552 copy_img_buf_addr(nir_builder *b, enum pipe_format pfmt, nir_def *coords)
553 {
554 nir_def *buf_row_stride =
555 load_info(b, struct vk_meta_copy_buffer_image_info, buf.row_stride);
556 nir_def *buf_img_stride =
557 load_info(b, struct vk_meta_copy_buffer_image_info, buf.image_stride);
558 nir_def *buf_addr =
559 load_info(b, struct vk_meta_copy_buffer_image_info, buf.addr);
560 nir_def *offset = nir_imul(b, nir_channel(b, coords, 2), buf_img_stride);
561 unsigned blk_sz = util_format_get_blocksize(pfmt);
562
563 offset = nir_iadd(b, offset,
564 nir_imul(b, nir_channel(b, coords, 1), buf_row_stride));
565 offset = nir_iadd(b, offset,
566 nir_imul_imm(b, nir_channel(b, coords, 0), blk_sz));
567
568 return nir_iadd(b, buf_addr, nir_u2u64(b, offset));
569 }
570
571 static VkFormat
copy_img_buf_format_for_aspect(const struct vk_meta_copy_image_view * info,VkImageAspectFlagBits aspect)572 copy_img_buf_format_for_aspect(const struct vk_meta_copy_image_view *info,
573 VkImageAspectFlagBits aspect)
574 {
575 if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
576 enum pipe_format pfmt = vk_format_to_pipe_format(info->depth.format);
577 unsigned num_comps = util_format_get_nr_components(pfmt);
578 unsigned depth_comp_bits = 0;
579
580 for (unsigned i = 0; i < num_comps; i++) {
581 if (info->depth.component_mask & BITFIELD_BIT(i))
582 depth_comp_bits += util_format_get_component_bits(
583 pfmt, UTIL_FORMAT_COLORSPACE_RGB, i);
584 }
585
586 switch (depth_comp_bits) {
587 case 16:
588 return VK_FORMAT_R16_UINT;
589 case 24:
590 case 32:
591 return VK_FORMAT_R32_UINT;
592 default:
593 assert(!"Unsupported format");
594 return VK_FORMAT_UNDEFINED;
595 }
596 } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
597 return VK_FORMAT_R8_UINT;
598 }
599
600 enum pipe_format pfmt = vk_format_to_pipe_format(info->color.format);
601
602 switch (util_format_get_blocksize(pfmt)) {
603 case 1:
604 return VK_FORMAT_R8_UINT;
605 case 2:
606 return VK_FORMAT_R16_UINT;
607 case 3:
608 return VK_FORMAT_R8G8B8_UINT;
609 case 4:
610 return VK_FORMAT_R32_UINT;
611 case 6:
612 return VK_FORMAT_R16G16B16_UINT;
613 case 8:
614 return VK_FORMAT_R32G32_UINT;
615 case 12:
616 return VK_FORMAT_R32G32B32_UINT;
617 case 16:
618 return VK_FORMAT_R32G32B32A32_UINT;
619 default:
620 assert(!"Unsupported format");
621 return VK_FORMAT_UNDEFINED;
622 }
623 }
624
625 static nir_def *
convert_texel(nir_builder * b,VkFormat src_fmt,VkFormat dst_fmt,nir_def * texel)626 convert_texel(nir_builder *b, VkFormat src_fmt, VkFormat dst_fmt,
627 nir_def *texel)
628 {
629 enum pipe_format src_pfmt = vk_format_to_pipe_format(src_fmt);
630 enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_fmt);
631
632 if (src_pfmt == dst_pfmt)
633 return texel;
634
635 unsigned src_blksz = util_format_get_blocksize(src_pfmt);
636 unsigned dst_blksz = util_format_get_blocksize(dst_pfmt);
637
638 nir_def *packed = nir_format_pack_rgba(b, src_pfmt, texel);
639
640 /* Needed for depth/stencil copies where the source/dest formats might
641 * have a different size. */
642 if (src_blksz < dst_blksz)
643 packed = nir_pad_vector_imm_int(b, packed, 0, 4);
644
645 nir_def *unpacked = nir_format_unpack_rgba(b, packed, dst_pfmt);
646
647 return unpacked;
648 }
649
650 static nir_def *
place_ds_texel(nir_builder * b,VkFormat fmt,nir_component_mask_t comp_mask,nir_def * texel)651 place_ds_texel(nir_builder *b, VkFormat fmt, nir_component_mask_t comp_mask,
652 nir_def *texel)
653 {
654 assert(comp_mask != 0);
655
656 enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
657 unsigned num_comps = util_format_get_nr_components(pfmt);
658
659 if (comp_mask == nir_component_mask(num_comps))
660 return texel;
661
662 assert(num_comps <= 4);
663
664 nir_def *comps[4];
665 unsigned c = 0;
666
667 for (unsigned i = 0; i < num_comps; i++) {
668 if (comp_mask & BITFIELD_BIT(i))
669 comps[i] = nir_channel(b, texel, c++);
670 else
671 comps[i] = nir_imm_intN_t(b, 0, texel->bit_size);
672 }
673
674 return nir_vec(b, comps, num_comps);
675 }
676
677 static nir_deref_instr *
tex_deref(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,unsigned binding)678 tex_deref(nir_builder *b, const struct vk_meta_copy_image_view *view,
679 VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
680 unsigned binding)
681 {
682 VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
683 bool is_array = vk_image_view_type_is_array(view->type);
684 enum glsl_sampler_dim sampler_dim =
685 samples != VK_SAMPLE_COUNT_1_BIT
686 ? GLSL_SAMPLER_DIM_MS
687 : vk_image_view_type_to_sampler_dim(view->type);
688 enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
689 enum glsl_base_type base_type =
690 util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT
691 : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
692 : GLSL_TYPE_FLOAT;
693 const char *tex_name;
694 switch (aspect) {
695 case VK_IMAGE_ASPECT_COLOR_BIT:
696 tex_name = "color_tex";
697 break;
698 case VK_IMAGE_ASPECT_DEPTH_BIT:
699 tex_name = "depth_tex";
700 break;
701 case VK_IMAGE_ASPECT_STENCIL_BIT:
702 tex_name = "stencil_tex";
703 break;
704 default:
705 assert(!"Unsupported aspect");
706 return NULL;
707 }
708
709 const struct glsl_type *texture_type =
710 glsl_sampler_type(sampler_dim, false, is_array, base_type);
711 nir_variable *texture =
712 nir_variable_create(b->shader, nir_var_uniform, texture_type, tex_name);
713 texture->data.descriptor_set = 0;
714 texture->data.binding = binding;
715
716 return nir_build_deref_var(b, texture);
717 }
718
719 static nir_deref_instr *
img_deref(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,unsigned binding)720 img_deref(nir_builder *b, const struct vk_meta_copy_image_view *view,
721 VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
722 unsigned binding)
723 {
724 VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
725 bool is_array = vk_image_view_type_is_array(view->type);
726 enum glsl_sampler_dim sampler_dim =
727 samples != VK_SAMPLE_COUNT_1_BIT
728 ? GLSL_SAMPLER_DIM_MS
729 : vk_image_view_type_to_sampler_dim(view->type);
730 enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
731 enum glsl_base_type base_type =
732 util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT
733 : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
734 : GLSL_TYPE_FLOAT;
735 const char *img_name;
736 switch (aspect) {
737 case VK_IMAGE_ASPECT_COLOR_BIT:
738 img_name = "color_img";
739 break;
740 case VK_IMAGE_ASPECT_DEPTH_BIT:
741 img_name = "depth_img";
742 break;
743 case VK_IMAGE_ASPECT_STENCIL_BIT:
744 img_name = "stencil_img";
745 break;
746 default:
747 assert(!"Unsupported aspect");
748 return NULL;
749 }
750 const struct glsl_type *image_type =
751 glsl_image_type(sampler_dim, is_array, base_type);
752 nir_variable *image_var =
753 nir_variable_create(b->shader, nir_var_uniform, image_type, img_name);
754 image_var->data.descriptor_set = 0;
755 image_var->data.binding = binding;
756
757 return nir_build_deref_var(b, image_var);
758 }
759
760 static nir_def *
read_texel(nir_builder * b,nir_deref_instr * tex_deref,nir_def * coords,nir_def * sample_id)761 read_texel(nir_builder *b, nir_deref_instr *tex_deref, nir_def *coords,
762 nir_def *sample_id)
763 {
764 return sample_id ? nir_txf_ms_deref(b, tex_deref, coords, sample_id)
765 : nir_txf_deref(b, tex_deref, coords, NULL);
766 }
767
768 static nir_variable *
frag_var(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,uint32_t rt)769 frag_var(nir_builder *b, const struct vk_meta_copy_image_view *view,
770 VkImageAspectFlags aspect, uint32_t rt)
771 {
772 VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
773 enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
774 enum glsl_base_type base_type =
775 util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT
776 : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
777 : GLSL_TYPE_FLOAT;
778 const struct glsl_type *var_type = glsl_vector_type(base_type, 4);
779 static const char *var_names[] = {
780 "gl_FragData[0]",
781 "gl_FragData[1]",
782 };
783
784 assert(rt < ARRAY_SIZE(var_names));
785
786 nir_variable *var = nir_variable_create(b->shader, nir_var_shader_out,
787 var_type, var_names[rt]);
788 var->data.location = FRAG_RESULT_DATA0 + rt;
789
790 return var;
791 }
792
793 static void
write_frag(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,nir_variable * frag_var,nir_def * frag_val)794 write_frag(nir_builder *b, const struct vk_meta_copy_image_view *view,
795 VkImageAspectFlags aspect, nir_variable *frag_var, nir_def *frag_val)
796 {
797 nir_component_mask_t comp_mask;
798
799 if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
800 VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
801
802 comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT
803 ? view->depth.component_mask
804 : view->stencil.component_mask;
805 frag_val = place_ds_texel(b, fmt, comp_mask, frag_val);
806 } else {
807 comp_mask = nir_component_mask(4);
808 }
809
810 if (frag_val->bit_size != 32) {
811 switch (glsl_get_base_type(frag_var->type)) {
812 case GLSL_TYPE_INT:
813 frag_val = nir_i2i32(b, frag_val);
814 break;
815 case GLSL_TYPE_UINT:
816 frag_val = nir_u2u32(b, frag_val);
817 break;
818 case GLSL_TYPE_FLOAT:
819 frag_val = nir_f2f32(b, frag_val);
820 break;
821 default:
822 assert(!"Invalid type");
823 frag_val = NULL;
824 break;
825 }
826 }
827
828 frag_val = nir_pad_vector_imm_int(b, frag_val, 0, 4);
829
830 nir_store_var(b, frag_var, frag_val, comp_mask);
831 }
832
833 static void
write_img(nir_builder * b,const struct vk_meta_copy_image_view * view,VkImageAspectFlags aspect,VkSampleCountFlagBits samples,nir_deref_instr * img_deref,nir_def * coords,nir_def * sample_id,nir_def * val)834 write_img(nir_builder *b, const struct vk_meta_copy_image_view *view,
835 VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
836 nir_deref_instr *img_deref, nir_def *coords, nir_def *sample_id,
837 nir_def *val)
838 {
839 VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
840 enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
841 enum glsl_base_type base_type =
842 util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT
843 : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
844 : GLSL_TYPE_FLOAT;
845 enum glsl_sampler_dim sampler_dim =
846 samples != VK_SAMPLE_COUNT_1_BIT
847 ? GLSL_SAMPLER_DIM_MS
848 : vk_image_view_type_to_sampler_dim(view->type);
849 bool is_array = vk_image_view_type_is_array(view->type);
850
851 if (!sample_id) {
852 assert(samples == VK_SAMPLE_COUNT_1_BIT);
853 sample_id = nir_imm_int(b, 0);
854 }
855
856 unsigned access_flags = ACCESS_NON_READABLE;
857 nir_def *zero_lod = nir_imm_int(b, 0);
858
859 if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
860 nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT
861 ? view->depth.component_mask
862 : view->stencil.component_mask;
863 unsigned num_comps = util_format_get_nr_components(pfmt);
864
865 val = place_ds_texel(b, fmt, comp_mask, val);
866
867 if (comp_mask != nir_component_mask(num_comps)) {
868 nir_def *comps[4];
869 access_flags = 0;
870
871 nir_def *old_val = nir_image_deref_load(b,
872 val->num_components, val->bit_size, &img_deref->def, coords,
873 sample_id, zero_lod, .image_dim = sampler_dim,
874 .image_array = is_array, .format = pfmt, .access = access_flags,
875 .dest_type = nir_get_nir_type_for_glsl_base_type(base_type));
876
877 for (unsigned i = 0; i < val->num_components; i++) {
878 if (comp_mask & BITFIELD_BIT(i))
879 comps[i] = nir_channel(b, val, i);
880 else
881 comps[i] = nir_channel(b, old_val, i);
882 }
883
884 val = nir_vec(b, comps, val->num_components);
885 }
886 }
887
888 nir_image_deref_store(b,
889 &img_deref->def, coords, sample_id, val, zero_lod,
890 .image_dim = sampler_dim, .image_array = is_array, .format = pfmt,
891 .access = access_flags,
892 .src_type = nir_get_nir_type_for_glsl_base_type(base_type));
893 }
894
895 static nir_shader *
build_image_to_buffer_shader(const struct vk_meta_device * meta,const void * key_data)896 build_image_to_buffer_shader(const struct vk_meta_device *meta,
897 const void *key_data)
898 {
899 const struct vk_meta_copy_buffer_image_key *key = key_data;
900
901 assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
902
903 nir_builder builder = nir_builder_init_simple_shader(
904 MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-to-buffer");
905 nir_builder *b = &builder;
906
907 b->shader->info.workgroup_size[0] = key->wg_size[0];
908 b->shader->info.workgroup_size[1] = key->wg_size[1];
909 b->shader->info.workgroup_size[2] = key->wg_size[2];
910
911 VkFormat buf_fmt =
912 copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
913 enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
914
915 nir_def *copy_id = nir_load_global_invocation_id(b, 32);
916 nir_def *copy_id_start =
917 nir_vec3(b,
918 load_info(b, struct vk_meta_copy_buffer_image_info,
919 copy_id_range.start.x),
920 load_info(b, struct vk_meta_copy_buffer_image_info,
921 copy_id_range.start.y),
922 load_info(b, struct vk_meta_copy_buffer_image_info,
923 copy_id_range.start.z));
924 nir_def *copy_id_end = nir_vec3(b,
925 load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x),
926 load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y),
927 load_info(b, struct vk_meta_copy_buffer_image_info,
928 copy_id_range.end.z));
929
930 nir_def *in_bounds =
931 nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
932 nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
933
934 nir_push_if(b, in_bounds);
935
936 copy_id = nir_isub(b, copy_id, copy_id_start);
937
938 nir_def *img_offs = nir_vec3(b,
939 load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
940 load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
941 load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
942
943 nir_def *img_coords =
944 trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs));
945
946 VkFormat iview_fmt =
947 copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
948 nir_deref_instr *tex =
949 tex_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0);
950 nir_def *texel = read_texel(b, tex, img_coords, NULL);
951
952 texel = convert_texel(b, iview_fmt, buf_fmt, texel);
953
954 unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
955 unsigned comp_count = util_format_get_nr_components(buf_pfmt);
956 assert(blk_sz % comp_count == 0);
957 unsigned comp_sz = (blk_sz / comp_count) * 8;
958
959 /* nir_format_unpack() (which is called in convert_texel()) always
960 * returns a 32-bit result, which we might have to downsize to match
961 * the component size we want, hence the u2uN().
962 */
963 texel = nir_u2uN(b, texel, comp_sz);
964
965 /* nir_format_unpack_rgba() (which is called from convert_texel()) returns
966 * a vec4, which means we might have more components than we need, but
967 * that's fine because we pass a write_mask to store_global.
968 */
969 assert(texel->num_components >= comp_count);
970 nir_store_global(b, copy_img_buf_addr(b, buf_pfmt, copy_id),
971 comp_sz / 8, texel, nir_component_mask(comp_count));
972
973 nir_pop_if(b, NULL);
974
975 return b->shader;
976 }
977
978 static VkResult
get_copy_image_to_buffer_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)979 get_copy_image_to_buffer_pipeline(
980 struct vk_device *device, struct vk_meta_device *meta,
981 const struct vk_meta_copy_buffer_image_key *key,
982 VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
983 {
984 const VkDescriptorSetLayoutBinding bindings[] = {
985 COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE),
986 };
987
988 VkResult result = get_copy_pipeline_layout(
989 device, meta, "vk-meta-copy-image-to-buffer-pipeline-layout",
990 VK_SHADER_STAGE_COMPUTE_BIT,
991 sizeof(struct vk_meta_copy_buffer_image_info), bindings,
992 ARRAY_SIZE(bindings), layout_out);
993
994 if (unlikely(result != VK_SUCCESS))
995 return result;
996
997 return get_compute_copy_pipeline(device, meta, *layout_out,
998 build_image_to_buffer_shader, key,
999 sizeof(*key), pipeline_out);
1000 }
1001
1002 static nir_shader *
build_buffer_to_image_fs(const struct vk_meta_device * meta,const void * key_data)1003 build_buffer_to_image_fs(const struct vk_meta_device *meta,
1004 const void *key_data)
1005 {
1006 const struct vk_meta_copy_buffer_image_key *key = key_data;
1007
1008 assert(key->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
1009
1010 nir_builder builder = nir_builder_init_simple_shader(
1011 MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-buffer-to-image-frag");
1012 nir_builder *b = &builder;
1013
1014 VkFormat buf_fmt =
1015 copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
1016
1017 enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
1018 nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b));
1019 nir_def *out_layer = nir_load_layer_id(b);
1020
1021 nir_def *img_offs = nir_vec3(b,
1022 load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
1023 load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
1024 load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
1025
1026 /* Move the layer ID to the second coordinate if we're dealing with a 1D
1027 * array, as this is where the texture instruction expects it. */
1028 nir_def *coords = key->img.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY
1029 ? nir_vec3(b, nir_channel(b, out_coord_xy, 0),
1030 out_layer, nir_imm_int(b, 0))
1031 : nir_vec3(b, nir_channel(b, out_coord_xy, 0),
1032 nir_channel(b, out_coord_xy, 1), out_layer);
1033
1034 unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
1035 unsigned comp_count = util_format_get_nr_components(buf_pfmt);
1036 assert(blk_sz % comp_count == 0);
1037 unsigned comp_sz = (blk_sz / comp_count) * 8;
1038
1039 coords = nir_isub(b, coords, img_offs);
1040
1041 nir_def *texel = nir_build_load_global(b,
1042 comp_count, comp_sz, copy_img_buf_addr(b, buf_pfmt, coords),
1043 .align_mul = 1 << (ffs(blk_sz) - 1));
1044
1045 /* We don't do compressed formats. The driver should select a non-compressed
1046 * format with the same block size. */
1047 assert(!util_format_is_compressed(buf_pfmt));
1048
1049 VkFormat iview_fmt =
1050 copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
1051 nir_variable *out_var = frag_var(b, &key->img.view, key->img.aspect, 0);
1052
1053 texel = convert_texel(b, buf_fmt, iview_fmt, texel);
1054 write_frag(b, &key->img.view, key->img.aspect, out_var, texel);
1055 return b->shader;
1056 }
1057
1058 static VkResult
get_copy_buffer_to_image_gfx_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1059 get_copy_buffer_to_image_gfx_pipeline(
1060 struct vk_device *device, struct vk_meta_device *meta,
1061 const struct vk_meta_copy_buffer_image_key *key,
1062 VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
1063 {
1064 VkResult result = get_copy_pipeline_layout(
1065 device, meta, "vk-meta-copy-buffer-to-image-gfx-pipeline-layout",
1066 VK_SHADER_STAGE_FRAGMENT_BIT,
1067 sizeof(struct vk_meta_copy_buffer_image_info), NULL, 0, layout_out);
1068
1069 if (unlikely(result != VK_SUCCESS))
1070 return result;
1071
1072 return get_gfx_copy_pipeline(device, meta, *layout_out,
1073 VK_SAMPLE_COUNT_1_BIT, build_buffer_to_image_fs,
1074 key->img.aspect, &key->img.view, key,
1075 sizeof(*key), pipeline_out);
1076 }
1077
1078 static nir_shader *
build_buffer_to_image_cs(const struct vk_meta_device * meta,const void * key_data)1079 build_buffer_to_image_cs(const struct vk_meta_device *meta,
1080 const void *key_data)
1081 {
1082 const struct vk_meta_copy_buffer_image_key *key = key_data;
1083
1084 assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
1085
1086 nir_builder builder = nir_builder_init_simple_shader(
1087 MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer-to-image-compute");
1088 nir_builder *b = &builder;
1089
1090 b->shader->info.workgroup_size[0] = key->wg_size[0];
1091 b->shader->info.workgroup_size[1] = key->wg_size[1];
1092 b->shader->info.workgroup_size[2] = key->wg_size[2];
1093
1094 VkFormat buf_fmt =
1095 copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
1096 VkFormat img_fmt =
1097 copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
1098 enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
1099 nir_deref_instr *image_deref =
1100 img_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0);
1101
1102 nir_def *copy_id = nir_load_global_invocation_id(b, 32);
1103 nir_def *copy_id_start =
1104 nir_vec3(b,
1105 load_info(b, struct vk_meta_copy_buffer_image_info,
1106 copy_id_range.start.x),
1107 load_info(b, struct vk_meta_copy_buffer_image_info,
1108 copy_id_range.start.y),
1109 load_info(b, struct vk_meta_copy_buffer_image_info,
1110 copy_id_range.start.z));
1111 nir_def *copy_id_end = nir_vec3(b,
1112 load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x),
1113 load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y),
1114 load_info(b, struct vk_meta_copy_buffer_image_info,
1115 copy_id_range.end.z));
1116
1117 nir_def *in_bounds =
1118 nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
1119 nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
1120
1121 nir_push_if(b, in_bounds);
1122
1123 /* Adjust the copy ID such that we can directly deduce the image coords and
1124 * buffer offset from it. */
1125 copy_id = nir_isub(b, copy_id, copy_id_start);
1126
1127 nir_def *img_offs = nir_vec3(b,
1128 load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
1129 load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
1130 load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
1131
1132 nir_def *img_coords =
1133 trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs));
1134
1135 img_coords = nir_pad_vector_imm_int(b, img_coords, 0, 4);
1136
1137 unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
1138 unsigned bit_sz = blk_sz & 1 ? 8 : blk_sz & 2 ? 16 : 32;
1139 unsigned comp_count = blk_sz * 8 / bit_sz;
1140
1141 nir_def *texel = nir_build_load_global(b,
1142 comp_count, bit_sz, copy_img_buf_addr(b, buf_pfmt, copy_id),
1143 .align_mul = 1 << (ffs(blk_sz) - 1));
1144
1145 texel = convert_texel(b, buf_fmt, img_fmt, texel);
1146
1147 write_img(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT,
1148 image_deref, img_coords, NULL, texel);
1149
1150 nir_pop_if(b, NULL);
1151
1152 return b->shader;
1153 }
1154
1155 static VkResult
get_copy_buffer_to_image_compute_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1156 get_copy_buffer_to_image_compute_pipeline(
1157 struct vk_device *device, struct vk_meta_device *meta,
1158 const struct vk_meta_copy_buffer_image_key *key,
1159 VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
1160 {
1161 const VkDescriptorSetLayoutBinding bindings[] = {
1162 COPY_SHADER_BINDING(0, STORAGE_IMAGE, COMPUTE),
1163 };
1164
1165 VkResult result = get_copy_pipeline_layout(
1166 device, meta, "vk-meta-copy-buffer-to-image-compute-pipeline-layout",
1167 VK_SHADER_STAGE_COMPUTE_BIT,
1168 sizeof(struct vk_meta_copy_buffer_image_info), bindings,
1169 ARRAY_SIZE(bindings), layout_out);
1170
1171 if (unlikely(result != VK_SUCCESS))
1172 return result;
1173
1174 return get_compute_copy_pipeline(device, meta, *layout_out,
1175 build_buffer_to_image_cs, key, sizeof(*key),
1176 pipeline_out);
1177 }
1178
1179 static VkResult
copy_buffer_image_prepare_gfx_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout pipeline_layout,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,struct vk_image * img,const VkBufferImageCopy2 * region)1180 copy_buffer_image_prepare_gfx_push_const(
1181 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1182 const struct vk_meta_copy_buffer_image_key *key,
1183 VkPipelineLayout pipeline_layout, VkBuffer buffer,
1184 const struct vk_image_buffer_layout *buf_layout, struct vk_image *img,
1185 const VkBufferImageCopy2 *region)
1186 {
1187 struct vk_device *dev = cmd->base.device;
1188 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1189 uint32_t depth_or_layer_count =
1190 MAX2(region->imageExtent.depth,
1191 vk_image_subresource_layer_count(img, ®ion->imageSubresource));
1192 VkImageViewType img_view_type =
1193 vk_image_render_view_type(img, depth_or_layer_count);
1194 VkOffset3D img_offs =
1195 base_layer_as_offset(img_view_type, region->imageOffset,
1196 region->imageSubresource.baseArrayLayer);
1197
1198 /* vk_meta_copy_buffer_image_info::image_stride is 32-bit for now.
1199 * We might want to make it a 64-bit integer (and patch the shader code
1200 * accordingly) if that becomes a limiting factor for vk_meta_copy users.
1201 */
1202 assert(buf_layout->image_stride_B <= UINT32_MAX);
1203
1204 struct vk_meta_copy_buffer_image_info info = {
1205 .buf = {
1206 .row_stride = buf_layout->row_stride_B,
1207 .image_stride = buf_layout->image_stride_B,
1208 .addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset,
1209 VK_WHOLE_SIZE),
1210 },
1211 .img.offset = {
1212 .x = img_offs.x,
1213 .y = img_offs.y,
1214 .z = img_offs.z,
1215 },
1216 };
1217
1218 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1219 VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info);
1220 return VK_SUCCESS;
1221 }
1222
1223 static VkResult
copy_buffer_image_prepare_compute_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_image_key * key,VkPipelineLayout pipeline_layout,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,struct vk_image * img,const VkBufferImageCopy2 * region,uint32_t * wg_count)1224 copy_buffer_image_prepare_compute_push_const(
1225 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1226 const struct vk_meta_copy_buffer_image_key *key,
1227 VkPipelineLayout pipeline_layout, VkBuffer buffer,
1228 const struct vk_image_buffer_layout *buf_layout, struct vk_image *img,
1229 const VkBufferImageCopy2 *region, uint32_t *wg_count)
1230 {
1231 struct vk_device *dev = cmd->base.device;
1232 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1233 VkImageViewType img_view_type = key->img.view.type;
1234 VkOffset3D img_offs =
1235 base_layer_as_offset(img_view_type, region->imageOffset,
1236 region->imageSubresource.baseArrayLayer);
1237 uint32_t layer_count =
1238 vk_image_subresource_layer_count(img, ®ion->imageSubresource);
1239 VkExtent3D img_extent =
1240 layer_count_as_extent(img_view_type, region->imageExtent, layer_count);
1241
1242 struct vk_meta_copy_buffer_image_info info = {
1243 .buf = {
1244 .row_stride = buf_layout->row_stride_B,
1245 .image_stride = buf_layout->image_stride_B,
1246 .addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset,
1247 VK_WHOLE_SIZE),
1248 },
1249 .img.offset = {
1250 .x = img_offs.x,
1251 .y = img_offs.y,
1252 .z = img_offs.z,
1253 },
1254 };
1255
1256 info.copy_id_range.start.x = img_offs.x % key->wg_size[0];
1257 info.copy_id_range.start.y = img_offs.y % key->wg_size[1];
1258 info.copy_id_range.start.z = img_offs.z % key->wg_size[2];
1259 info.copy_id_range.end.x = info.copy_id_range.start.x + img_extent.width;
1260 info.copy_id_range.end.y = info.copy_id_range.start.y + img_extent.height;
1261 info.copy_id_range.end.z = info.copy_id_range.start.z + img_extent.depth;
1262 wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]);
1263 wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]);
1264 wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]);
1265
1266 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1267 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info);
1268 return VK_SUCCESS;
1269 }
1270
1271 static bool
format_is_supported(VkFormat fmt)1272 format_is_supported(VkFormat fmt)
1273 {
1274 enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
1275 const struct util_format_description *fdesc = util_format_description(pfmt);
1276
1277 /* We only support RGB formats in the copy path to keep things simple. */
1278 return fdesc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
1279 fdesc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB;
1280 }
1281
1282 static struct vk_meta_copy_image_view
img_copy_view_info(VkImageViewType view_type,VkImageAspectFlags aspects,const struct vk_image * img,const struct vk_meta_copy_image_properties * img_props)1283 img_copy_view_info(VkImageViewType view_type, VkImageAspectFlags aspects,
1284 const struct vk_image *img,
1285 const struct vk_meta_copy_image_properties *img_props)
1286 {
1287 struct vk_meta_copy_image_view view = {
1288 .type = view_type,
1289 };
1290
1291 /* We only support color/depth/stencil aspects. */
1292 assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT |
1293 VK_IMAGE_ASPECT_STENCIL_BIT));
1294
1295 if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1296 /* Color aspect can't be combined with other aspects. */
1297 assert(!(aspects & ~VK_IMAGE_ASPECT_COLOR_BIT));
1298 view.color.format = img_props->color.view_format;
1299 assert(format_is_supported(view.color.format));
1300 return view;
1301 }
1302
1303
1304 view.depth.format = img_props->depth.view_format;
1305 view.depth.component_mask = img_props->depth.component_mask;
1306 view.stencil.format = img_props->stencil.view_format;
1307 view.stencil.component_mask = img_props->stencil.component_mask;
1308
1309 assert(view.depth.format == VK_FORMAT_UNDEFINED ||
1310 format_is_supported(view.depth.format));
1311 assert(view.stencil.format == VK_FORMAT_UNDEFINED ||
1312 format_is_supported(view.stencil.format));
1313 return view;
1314 }
1315
1316 static void
copy_image_to_buffer_region(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,VkImageLayout img_layout,const struct vk_meta_copy_image_properties * img_props,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,const VkBufferImageCopy2 * region)1317 copy_image_to_buffer_region(
1318 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1319 struct vk_image *img, VkImageLayout img_layout,
1320 const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
1321 const struct vk_image_buffer_layout *buf_layout,
1322 const VkBufferImageCopy2 *region)
1323 {
1324 struct vk_device *dev = cmd->base.device;
1325 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1326 struct vk_meta_copy_buffer_image_key key = {
1327 .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
1328 .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE,
1329 .img = {
1330 .view = img_copy_view_info(vk_image_sampled_view_type(img),
1331 region->imageSubresource.aspectMask, img,
1332 img_props),
1333 .aspect = region->imageSubresource.aspectMask,
1334 },
1335 .wg_size = {
1336 img_props->tile_size.width,
1337 img_props->tile_size.height,
1338 img_props->tile_size.depth,
1339 },
1340 };
1341
1342 VkPipelineLayout pipeline_layout;
1343 VkPipeline pipeline;
1344 VkResult result = get_copy_image_to_buffer_pipeline(
1345 dev, meta, &key, &pipeline_layout, &pipeline);
1346 if (unlikely(result != VK_SUCCESS)) {
1347 vk_command_buffer_set_error(cmd, result);
1348 return;
1349 }
1350
1351 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1352 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1353
1354 VkImageView iview;
1355 result = copy_create_src_image_view(cmd, meta, img, &key.img.view,
1356 region->imageSubresource.aspectMask,
1357 ®ion->imageSubresource, &iview);
1358
1359 if (unlikely(result != VK_SUCCESS)) {
1360 vk_command_buffer_set_error(cmd, result);
1361 return;
1362 }
1363
1364 const VkWriteDescriptorSet descs[] = {
1365 COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iview, img_layout),
1366 };
1367
1368 disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1369 VK_PIPELINE_BIND_POINT_COMPUTE,
1370 pipeline_layout, 0, ARRAY_SIZE(descs), descs);
1371
1372 uint32_t wg_count[3] = {0};
1373
1374 result = copy_buffer_image_prepare_compute_push_const(
1375 cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region,
1376 wg_count);
1377 if (unlikely(result != VK_SUCCESS)) {
1378 vk_command_buffer_set_error(cmd, result);
1379 return;
1380 }
1381
1382 disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1],
1383 wg_count[2]);
1384 }
1385
1386 void
vk_meta_copy_image_to_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageToBufferInfo2 * info,const struct vk_meta_copy_image_properties * img_props)1387 vk_meta_copy_image_to_buffer(
1388 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1389 const VkCopyImageToBufferInfo2 *info,
1390 const struct vk_meta_copy_image_properties *img_props)
1391 {
1392 VK_FROM_HANDLE(vk_image, img, info->srcImage);
1393
1394 for (uint32_t i = 0; i < info->regionCount; i++) {
1395 VkBufferImageCopy2 region = info->pRegions[i];
1396 struct vk_image_buffer_layout buf_layout =
1397 vk_image_buffer_copy_layout(img, ®ion);
1398
1399 region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent);
1400 region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset);
1401
1402 copy_image_to_buffer_region(cmd, meta, img, info->srcImageLayout,
1403 img_props, info->dstBuffer, &buf_layout,
1404 ®ion);
1405 }
1406 }
1407
1408 static void
copy_draw(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * dst_img,VkImageLayout dst_img_layout,const VkImageSubresourceLayers * dst_img_subres,const VkOffset3D * dst_img_offset,const VkExtent3D * copy_extent,const struct vk_meta_copy_image_view * view_info)1409 copy_draw(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1410 struct vk_image *dst_img, VkImageLayout dst_img_layout,
1411 const VkImageSubresourceLayers *dst_img_subres,
1412 const VkOffset3D *dst_img_offset, const VkExtent3D *copy_extent,
1413 const struct vk_meta_copy_image_view *view_info)
1414 {
1415 struct vk_device *dev = cmd->base.device;
1416 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1417 uint32_t depth_or_layer_count =
1418 MAX2(copy_extent->depth,
1419 vk_image_subresource_layer_count(dst_img, dst_img_subres));
1420 struct vk_meta_rect rect = {
1421 .x0 = dst_img_offset->x,
1422 .x1 = dst_img_offset->x + copy_extent->width,
1423 .y0 = dst_img_offset->y,
1424 .y1 = dst_img_offset->y + copy_extent->height,
1425 };
1426 VkRenderingAttachmentInfo vk_atts[2];
1427 VkRenderingInfo vk_render = {
1428 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
1429 .renderArea = {
1430 .offset = {
1431 dst_img_offset->x,
1432 dst_img_offset->y,
1433 },
1434 .extent = {
1435 copy_extent->width,
1436 copy_extent->height,
1437 },
1438 },
1439 .layerCount = depth_or_layer_count,
1440 .pColorAttachments = vk_atts,
1441 };
1442 VkImageView iview = VK_NULL_HANDLE;
1443
1444 u_foreach_bit(a, dst_img_subres->aspectMask) {
1445 VkImageAspectFlagBits aspect = 1 << a;
1446
1447 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && iview != VK_NULL_HANDLE &&
1448 depth_stencil_interleaved(view_info))
1449 continue;
1450
1451 VkResult result = copy_create_dst_image_view(
1452 cmd, meta, dst_img, view_info, aspect, dst_img_offset, copy_extent,
1453 dst_img_subres, VK_PIPELINE_BIND_POINT_GRAPHICS, &iview);
1454 if (unlikely(result != VK_SUCCESS)) {
1455 vk_command_buffer_set_error(cmd, result);
1456 return;
1457 }
1458
1459 vk_atts[vk_render.colorAttachmentCount] = (VkRenderingAttachmentInfo){
1460 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1461 .imageView = iview,
1462 .imageLayout = dst_img_layout,
1463 .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1464 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1465 };
1466
1467 /* If we have interleaved depth/stencil and only one aspect is copied, we
1468 * need to load the attachment to preserve the other component. */
1469 if (vk_format_has_depth(dst_img->format) &&
1470 vk_format_has_stencil(dst_img->format) &&
1471 depth_stencil_interleaved(view_info) &&
1472 (dst_img_subres->aspectMask !=
1473 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
1474 vk_atts[vk_render.colorAttachmentCount].loadOp =
1475 VK_ATTACHMENT_LOAD_OP_LOAD;
1476 }
1477
1478 vk_render.colorAttachmentCount++;
1479 }
1480
1481 disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd), &vk_render);
1482 meta->cmd_draw_volume(cmd, meta, &rect, vk_render.layerCount);
1483 disp->CmdEndRendering(vk_command_buffer_to_handle(cmd));
1484 }
1485
1486 static void
copy_buffer_to_image_region_gfx(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,VkImageLayout img_layout,const struct vk_meta_copy_image_properties * img_props,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,const VkBufferImageCopy2 * region)1487 copy_buffer_to_image_region_gfx(
1488 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1489 struct vk_image *img, VkImageLayout img_layout,
1490 const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
1491 const struct vk_image_buffer_layout *buf_layout,
1492 const VkBufferImageCopy2 *region)
1493 {
1494 struct vk_device *dev = cmd->base.device;
1495 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1496
1497 /* We only special-case 1D_ARRAY to move the layer ID to the second
1498 * component instead of the third. For all other view types, let's pick an
1499 * invalid VkImageViewType value so we don't end up creating the same
1500 * pipeline multiple times. */
1501 VkImageViewType view_type =
1502 img->image_type == VK_IMAGE_TYPE_1D && img->array_layers > 1
1503 ? VK_IMAGE_VIEW_TYPE_1D_ARRAY
1504 : (VkImageViewType)-1;
1505
1506 struct vk_meta_copy_buffer_image_key key = {
1507 .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE,
1508 .bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS,
1509 .img = {
1510 .view = img_copy_view_info(view_type,
1511 region->imageSubresource.aspectMask, img,
1512 img_props),
1513 .aspect = region->imageSubresource.aspectMask,
1514 },
1515 };
1516
1517 VkPipelineLayout pipeline_layout;
1518 VkPipeline pipeline;
1519 VkResult result = get_copy_buffer_to_image_gfx_pipeline(
1520 dev, meta, &key, &pipeline_layout, &pipeline);
1521 if (unlikely(result != VK_SUCCESS)) {
1522 vk_command_buffer_set_error(cmd, result);
1523 return;
1524 }
1525
1526 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1527 VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
1528
1529 result = copy_buffer_image_prepare_gfx_push_const(
1530 cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region);
1531 if (unlikely(result != VK_SUCCESS)) {
1532 vk_command_buffer_set_error(cmd, result);
1533 return;
1534 }
1535
1536 copy_draw(cmd, meta, img, img_layout, ®ion->imageSubresource,
1537 ®ion->imageOffset, ®ion->imageExtent, &key.img.view);
1538 }
1539
1540 static void
copy_buffer_to_image_region_compute(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * img,VkImageLayout img_layout,const struct vk_meta_copy_image_properties * img_props,VkBuffer buffer,const struct vk_image_buffer_layout * buf_layout,const VkBufferImageCopy2 * region)1541 copy_buffer_to_image_region_compute(
1542 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1543 struct vk_image *img, VkImageLayout img_layout,
1544 const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
1545 const struct vk_image_buffer_layout *buf_layout,
1546 const VkBufferImageCopy2 *region)
1547 {
1548 struct vk_device *dev = cmd->base.device;
1549 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1550 VkImageViewType view_type = vk_image_storage_view_type(img);
1551 struct vk_meta_copy_buffer_image_key key = {
1552 .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE,
1553 .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE,
1554 .img = {
1555 .view = img_copy_view_info(view_type,
1556 region->imageSubresource.aspectMask, img,
1557 img_props),
1558 .aspect = region->imageSubresource.aspectMask,
1559 },
1560 .wg_size = {
1561 img_props->tile_size.width,
1562 img_props->tile_size.height,
1563 img_props->tile_size.depth,
1564 },
1565 };
1566
1567 VkPipelineLayout pipeline_layout;
1568 VkPipeline pipeline;
1569 VkResult result = get_copy_buffer_to_image_compute_pipeline(
1570 dev, meta, &key, &pipeline_layout, &pipeline);
1571 if (unlikely(result != VK_SUCCESS)) {
1572 vk_command_buffer_set_error(cmd, result);
1573 return;
1574 }
1575
1576 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
1577 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1578
1579 VkImageView iview;
1580 result = copy_create_dst_image_view(
1581 cmd, meta, img, &key.img.view, region->imageSubresource.aspectMask,
1582 ®ion->imageOffset, ®ion->imageExtent, ®ion->imageSubresource,
1583 VK_PIPELINE_BIND_POINT_COMPUTE, &iview);
1584
1585 if (unlikely(result != VK_SUCCESS)) {
1586 vk_command_buffer_set_error(cmd, result);
1587 return;
1588 }
1589
1590 const VkWriteDescriptorSet descs[] = {
1591 COPY_PUSH_SET_IMG_DESC(0, STORAGE, iview, img_layout),
1592 };
1593
1594 disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1595 VK_PIPELINE_BIND_POINT_COMPUTE,
1596 pipeline_layout, 0, ARRAY_SIZE(descs), descs);
1597
1598 uint32_t wg_count[3] = {0};
1599
1600 result = copy_buffer_image_prepare_compute_push_const(
1601 cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region,
1602 wg_count);
1603 if (unlikely(result != VK_SUCCESS)) {
1604 vk_command_buffer_set_error(cmd, result);
1605 return;
1606 }
1607
1608 disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
1609 wg_count[0], wg_count[1], wg_count[2]);
1610 }
1611
1612 void
vk_meta_copy_buffer_to_image(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyBufferToImageInfo2 * info,const struct vk_meta_copy_image_properties * img_props,VkPipelineBindPoint bind_point)1613 vk_meta_copy_buffer_to_image(
1614 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1615 const VkCopyBufferToImageInfo2 *info,
1616 const struct vk_meta_copy_image_properties *img_props,
1617 VkPipelineBindPoint bind_point)
1618 {
1619 VK_FROM_HANDLE(vk_image, img, info->dstImage);
1620
1621 for (uint32_t i = 0; i < info->regionCount; i++) {
1622 VkBufferImageCopy2 region = info->pRegions[i];
1623 struct vk_image_buffer_layout buf_layout =
1624 vk_image_buffer_copy_layout(img, ®ion);
1625
1626 region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent);
1627 region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset);
1628
1629 if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
1630 copy_buffer_to_image_region_gfx(cmd, meta, img, info->dstImageLayout,
1631 img_props, info->srcBuffer,
1632 &buf_layout, ®ion);
1633 } else {
1634 copy_buffer_to_image_region_compute(cmd, meta, img,
1635 info->dstImageLayout, img_props,
1636 info->srcBuffer, &buf_layout,
1637 ®ion);
1638 }
1639 }
1640 }
1641
1642 static nir_shader *
build_copy_image_fs(const struct vk_meta_device * meta,const void * key_data)1643 build_copy_image_fs(const struct vk_meta_device *meta, const void *key_data)
1644 {
1645 const struct vk_meta_copy_image_key *key = key_data;
1646
1647 assert(key->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
1648
1649 nir_builder builder = nir_builder_init_simple_shader(
1650 MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-image-frag");
1651 nir_builder *b = &builder;
1652
1653 b->shader->info.fs.uses_sample_shading =
1654 key->samples != VK_SAMPLE_COUNT_1_BIT;
1655
1656 nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b));
1657 nir_def *out_layer = nir_load_layer_id(b);
1658
1659 nir_def *src_offset = nir_vec3(b,
1660 load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.x),
1661 load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.y),
1662 load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.z));
1663
1664 /* Move the layer ID to the second coordinate if we're dealing with a 1D
1665 * array, as this is where the texture instruction expects it. */
1666 nir_def *src_coords =
1667 key->dst.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY
1668 ? nir_vec3(b, nir_channel(b, out_coord_xy, 0), out_layer,
1669 nir_imm_int(b, 0))
1670 : nir_vec3(b, nir_channel(b, out_coord_xy, 0),
1671 nir_channel(b, out_coord_xy, 1), out_layer);
1672
1673 src_coords = trim_img_coords(b, key->src.view.type,
1674 nir_iadd(b, src_coords, src_offset));
1675
1676 nir_def *sample_id =
1677 key->samples != VK_SAMPLE_COUNT_1_BIT ? nir_load_sample_id(b) : NULL;
1678 nir_variable *color_var = NULL;
1679 uint32_t tex_binding = 0;
1680
1681 u_foreach_bit(a, key->aspects) {
1682 VkImageAspectFlagBits aspect = 1 << a;
1683 VkFormat src_fmt =
1684 copy_img_view_format_for_aspect(&key->src.view, aspect);
1685 VkFormat dst_fmt =
1686 copy_img_view_format_for_aspect(&key->dst.view, aspect);
1687 nir_deref_instr *tex =
1688 tex_deref(b, &key->src.view, aspect, key->samples, tex_binding++);
1689 nir_def *texel = read_texel(b, tex, src_coords, sample_id);
1690
1691 if (!color_var || !depth_stencil_interleaved(&key->dst.view)) {
1692 color_var =
1693 frag_var(b, &key->dst.view, aspect, color_var != NULL ? 1 : 0);
1694 }
1695
1696 texel = convert_texel(b, src_fmt, dst_fmt, texel);
1697 write_frag(b, &key->dst.view, aspect, color_var, texel);
1698 }
1699
1700 return b->shader;
1701 }
1702
1703 static VkResult
get_copy_image_gfx_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1704 get_copy_image_gfx_pipeline(struct vk_device *device,
1705 struct vk_meta_device *meta,
1706 const struct vk_meta_copy_image_key *key,
1707 VkPipelineLayout *layout_out,
1708 VkPipeline *pipeline_out)
1709 {
1710 const struct VkDescriptorSetLayoutBinding bindings[] = {
1711 COPY_SHADER_BINDING(0, SAMPLED_IMAGE, FRAGMENT),
1712 COPY_SHADER_BINDING(1, SAMPLED_IMAGE, FRAGMENT),
1713 };
1714
1715 VkResult result = get_copy_pipeline_layout(
1716 device, meta, "vk-meta-copy-image-gfx-pipeline-layout",
1717 VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(struct vk_meta_copy_image_fs_info),
1718 bindings, ARRAY_SIZE(bindings), layout_out);
1719 if (unlikely(result != VK_SUCCESS))
1720 return result;
1721
1722 return get_gfx_copy_pipeline(
1723 device, meta, *layout_out, key->samples, build_copy_image_fs,
1724 key->aspects, &key->dst.view, key, sizeof(*key), pipeline_out);
1725 }
1726
1727 static nir_shader *
build_copy_image_cs(const struct vk_meta_device * meta,const void * key_data)1728 build_copy_image_cs(const struct vk_meta_device *meta, const void *key_data)
1729 {
1730 const struct vk_meta_copy_image_key *key = key_data;
1731
1732 assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
1733
1734 nir_builder builder = nir_builder_init_simple_shader(
1735 MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-compute");
1736 nir_builder *b = &builder;
1737
1738 b->shader->info.workgroup_size[0] = key->wg_size[0];
1739 b->shader->info.workgroup_size[1] = key->wg_size[1];
1740 b->shader->info.workgroup_size[2] = key->wg_size[2];
1741
1742 nir_def *copy_id = nir_load_global_invocation_id(b, 32);
1743 nir_def *copy_id_start = nir_vec3(b,
1744 load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.x),
1745 load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.y),
1746 load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.z));
1747 nir_def *copy_id_end = nir_vec3(b,
1748 load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.x),
1749 load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.y),
1750 load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.z));
1751
1752 nir_def *in_bounds =
1753 nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
1754 nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
1755
1756 nir_push_if(b, in_bounds);
1757
1758 nir_def *src_offset = nir_vec3(b,
1759 load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.x),
1760 load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.y),
1761 load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.z));
1762 nir_def *dst_offset = nir_vec3(b,
1763 load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.x),
1764 load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.y),
1765 load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.z));
1766
1767 nir_def *src_coords = trim_img_coords(b, key->src.view.type,
1768 nir_iadd(b, copy_id, src_offset));
1769 nir_def *dst_coords = trim_img_coords(b, key->dst.view.type,
1770 nir_iadd(b, copy_id, dst_offset));
1771
1772 dst_coords = nir_pad_vector_imm_int(b, dst_coords, 0, 4);
1773
1774 uint32_t binding = 0;
1775 u_foreach_bit(a, key->aspects) {
1776 VkImageAspectFlagBits aspect = 1 << a;
1777 VkFormat src_fmt =
1778 copy_img_view_format_for_aspect(&key->src.view, aspect);
1779 VkFormat dst_fmt =
1780 copy_img_view_format_for_aspect(&key->dst.view, aspect);
1781 nir_deref_instr *tex =
1782 tex_deref(b, &key->src.view, aspect, key->samples, binding);
1783 nir_deref_instr *img =
1784 img_deref(b, &key->dst.view, aspect, key->samples, binding + 1);
1785
1786 for (uint32_t s = 0; s < key->samples; s++) {
1787 nir_def *sample_id =
1788 key->samples == VK_SAMPLE_COUNT_1_BIT ? NULL : nir_imm_int(b, s);
1789 nir_def *texel = read_texel(b, tex, src_coords, sample_id);
1790
1791 texel = convert_texel(b, src_fmt, dst_fmt, texel);
1792 write_img(b, &key->dst.view, aspect, key->samples, img, dst_coords,
1793 sample_id, texel);
1794 }
1795
1796 binding += 2;
1797 }
1798
1799 nir_pop_if(b, NULL);
1800
1801 return b->shader;
1802 }
1803
1804 static VkResult
get_copy_image_compute_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)1805 get_copy_image_compute_pipeline(struct vk_device *device,
1806 struct vk_meta_device *meta,
1807 const struct vk_meta_copy_image_key *key,
1808 VkPipelineLayout *layout_out,
1809 VkPipeline *pipeline_out)
1810 {
1811 const VkDescriptorSetLayoutBinding bindings[] = {
1812 COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE),
1813 COPY_SHADER_BINDING(1, STORAGE_IMAGE, COMPUTE),
1814 COPY_SHADER_BINDING(2, SAMPLED_IMAGE, COMPUTE),
1815 COPY_SHADER_BINDING(3, STORAGE_IMAGE, COMPUTE),
1816 };
1817
1818 VkResult result = get_copy_pipeline_layout(
1819 device, meta, "vk-meta-copy-image-compute-pipeline-layout",
1820 VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_image_cs_info),
1821 bindings, ARRAY_SIZE(bindings), layout_out);
1822
1823 if (unlikely(result != VK_SUCCESS))
1824 return result;
1825
1826 return get_compute_copy_pipeline(device, meta, *layout_out,
1827 build_copy_image_cs, key, sizeof(*key),
1828 pipeline_out);
1829 }
1830
1831 static VkResult
copy_image_prepare_gfx_desc_set(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,struct vk_image * src_img,VkImageLayout src_img_layout,struct vk_image * dst_img,VkImageLayout dst_img_layout,const VkImageCopy2 * region)1832 copy_image_prepare_gfx_desc_set(
1833 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1834 const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
1835 struct vk_image *src_img, VkImageLayout src_img_layout,
1836 struct vk_image *dst_img, VkImageLayout dst_img_layout,
1837 const VkImageCopy2 *region)
1838 {
1839 struct vk_device *dev = cmd->base.device;
1840 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1841 VkImageAspectFlags aspects = key->aspects;
1842 VkImageView iviews[] = {
1843 VK_NULL_HANDLE,
1844 VK_NULL_HANDLE,
1845 };
1846 uint32_t desc_count = 0;
1847
1848 u_foreach_bit(a, aspects) {
1849 assert(desc_count < ARRAY_SIZE(iviews));
1850
1851 VkResult result = copy_create_src_image_view(
1852 cmd, meta, src_img, &key->src.view, 1 << a, ®ion->srcSubresource,
1853 &iviews[desc_count++]);
1854 if (unlikely(result != VK_SUCCESS))
1855 return result;
1856 }
1857
1858 VkWriteDescriptorSet descs[2] = {
1859 COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout),
1860 COPY_PUSH_SET_IMG_DESC(1, SAMPLED, iviews[1], src_img_layout),
1861 };
1862
1863 disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1864 VK_PIPELINE_BIND_POINT_GRAPHICS,
1865 pipeline_layout, 0, desc_count, descs);
1866 return VK_SUCCESS;
1867 }
1868
1869 static VkResult
copy_image_prepare_compute_desc_set(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,struct vk_image * src_img,VkImageLayout src_img_layout,struct vk_image * dst_img,VkImageLayout dst_img_layout,const VkImageCopy2 * region)1870 copy_image_prepare_compute_desc_set(
1871 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1872 const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
1873 struct vk_image *src_img, VkImageLayout src_img_layout,
1874 struct vk_image *dst_img, VkImageLayout dst_img_layout,
1875 const VkImageCopy2 *region)
1876 {
1877 struct vk_device *dev = cmd->base.device;
1878 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1879 VkImageAspectFlags aspects = key->aspects;
1880 VkImageView iviews[] = {
1881 VK_NULL_HANDLE,
1882 VK_NULL_HANDLE,
1883 VK_NULL_HANDLE,
1884 VK_NULL_HANDLE,
1885 };
1886 unsigned desc_count = 0;
1887
1888 u_foreach_bit(a, aspects) {
1889 VkImageAspectFlagBits aspect = 1 << a;
1890
1891 assert(desc_count + 2 <= ARRAY_SIZE(iviews));
1892
1893 VkResult result = copy_create_src_image_view(
1894 cmd, meta, src_img, &key->src.view, aspect, ®ion->srcSubresource,
1895 &iviews[desc_count++]);
1896 if (unlikely(result != VK_SUCCESS))
1897 return result;
1898
1899 result = copy_create_dst_image_view(
1900 cmd, meta, dst_img, &key->dst.view, aspect, ®ion->dstOffset,
1901 ®ion->extent, ®ion->dstSubresource,
1902 VK_PIPELINE_BIND_POINT_COMPUTE, &iviews[desc_count++]);
1903 if (unlikely(result != VK_SUCCESS))
1904 return result;
1905 }
1906
1907 VkWriteDescriptorSet descs[] = {
1908 COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout),
1909 COPY_PUSH_SET_IMG_DESC(1, STORAGE, iviews[1], dst_img_layout),
1910 COPY_PUSH_SET_IMG_DESC(2, SAMPLED, iviews[2], src_img_layout),
1911 COPY_PUSH_SET_IMG_DESC(3, STORAGE, iviews[3], dst_img_layout),
1912 };
1913
1914 disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
1915 VK_PIPELINE_BIND_POINT_COMPUTE,
1916 pipeline_layout, 0, desc_count, descs);
1917 return VK_SUCCESS;
1918 }
1919
1920 enum vk_meta_copy_image_align_policy {
1921 VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE,
1922 VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE,
1923 };
1924
1925 static VkResult
copy_image_prepare_compute_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,const struct vk_image * src,const struct vk_image * dst,enum vk_meta_copy_image_align_policy align_policy,const VkImageCopy2 * region,uint32_t * wg_count)1926 copy_image_prepare_compute_push_const(
1927 struct vk_command_buffer *cmd, struct vk_meta_device *meta,
1928 const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
1929 const struct vk_image *src, const struct vk_image *dst,
1930 enum vk_meta_copy_image_align_policy align_policy,
1931 const VkImageCopy2 *region, uint32_t *wg_count)
1932 {
1933 struct vk_device *dev = cmd->base.device;
1934 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1935 VkOffset3D src_offs =
1936 base_layer_as_offset(key->src.view.type, region->srcOffset,
1937 region->srcSubresource.baseArrayLayer);
1938 uint32_t layer_count =
1939 vk_image_subresource_layer_count(src, ®ion->srcSubresource);
1940 VkExtent3D src_extent =
1941 layer_count_as_extent(key->src.view.type, region->extent, layer_count);
1942 VkOffset3D dst_offs =
1943 base_layer_as_offset(key->dst.view.type, region->dstOffset,
1944 region->dstSubresource.baseArrayLayer);
1945
1946 struct vk_meta_copy_image_cs_info info = {0};
1947
1948 /* We can't necessarily optimize the read+write path, so align things
1949 * on the biggest tile size. */
1950 if (align_policy == VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE) {
1951 info.copy_id_range.start.x = src_offs.x % key->wg_size[0];
1952 info.copy_id_range.start.y = src_offs.y % key->wg_size[1];
1953 info.copy_id_range.start.z = src_offs.z % key->wg_size[2];
1954 } else {
1955 info.copy_id_range.start.x = dst_offs.x % key->wg_size[0];
1956 info.copy_id_range.start.y = dst_offs.y % key->wg_size[1];
1957 info.copy_id_range.start.z = dst_offs.z % key->wg_size[2];
1958 }
1959
1960 info.copy_id_range.end.x = info.copy_id_range.start.x + src_extent.width;
1961 info.copy_id_range.end.y = info.copy_id_range.start.y + src_extent.height;
1962 info.copy_id_range.end.z = info.copy_id_range.start.z + src_extent.depth;
1963
1964 info.src_img.offset.x = src_offs.x - info.copy_id_range.start.x;
1965 info.src_img.offset.y = src_offs.y - info.copy_id_range.start.y;
1966 info.src_img.offset.z = src_offs.z - info.copy_id_range.start.z;
1967 info.dst_img.offset.x = dst_offs.x - info.copy_id_range.start.x;
1968 info.dst_img.offset.y = dst_offs.y - info.copy_id_range.start.y;
1969 info.dst_img.offset.z = dst_offs.z - info.copy_id_range.start.z;
1970 wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]);
1971 wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]);
1972 wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]);
1973
1974 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
1975 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info);
1976
1977 return VK_SUCCESS;
1978 }
1979
1980 static VkResult
copy_image_prepare_gfx_push_const(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const struct vk_meta_copy_image_key * key,VkPipelineLayout pipeline_layout,struct vk_image * src_img,struct vk_image * dst_img,const VkImageCopy2 * region)1981 copy_image_prepare_gfx_push_const(struct vk_command_buffer *cmd,
1982 struct vk_meta_device *meta,
1983 const struct vk_meta_copy_image_key *key,
1984 VkPipelineLayout pipeline_layout,
1985 struct vk_image *src_img,
1986 struct vk_image *dst_img,
1987 const VkImageCopy2 *region)
1988 {
1989 struct vk_device *dev = cmd->base.device;
1990 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
1991 VkOffset3D src_img_offs =
1992 base_layer_as_offset(key->src.view.type, region->srcOffset,
1993 region->srcSubresource.baseArrayLayer);
1994
1995 struct vk_meta_copy_image_fs_info info = {
1996 .dst_to_src_offs = {
1997 /* The subtraction may lead to negative values, but that's fine
1998 * because the shader does the mirror operation thus guaranteeing
1999 * a src_coords >= 0. */
2000 .x = src_img_offs.x - region->dstOffset.x,
2001 .y = src_img_offs.y - region->dstOffset.y,
2002 /* Render image view only contains the layers needed for rendering,
2003 * so we consider the coordinate containing the layer to always be
2004 * zero.
2005 */
2006 .z = src_img_offs.z,
2007 },
2008 };
2009
2010 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
2011 VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info);
2012
2013 return VK_SUCCESS;
2014 }
2015
2016 static void
copy_image_region_gfx(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * src_img,VkImageLayout src_image_layout,const struct vk_meta_copy_image_properties * src_props,struct vk_image * dst_img,VkImageLayout dst_image_layout,const struct vk_meta_copy_image_properties * dst_props,const VkImageCopy2 * region)2017 copy_image_region_gfx(struct vk_command_buffer *cmd,
2018 struct vk_meta_device *meta, struct vk_image *src_img,
2019 VkImageLayout src_image_layout,
2020 const struct vk_meta_copy_image_properties *src_props,
2021 struct vk_image *dst_img, VkImageLayout dst_image_layout,
2022 const struct vk_meta_copy_image_properties *dst_props,
2023 const VkImageCopy2 *region)
2024 {
2025 struct vk_device *dev = cmd->base.device;
2026 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2027
2028 /* We only special-case 1D_ARRAY to move the layer ID to the second
2029 * component instead of the third. For all other view types, let's pick an
2030 * invalid VkImageViewType value so we don't end up creating the same
2031 * pipeline multiple times. */
2032 VkImageViewType dst_view_type =
2033 dst_img->image_type == VK_IMAGE_TYPE_1D && dst_img->array_layers > 1
2034 ? VK_IMAGE_VIEW_TYPE_1D_ARRAY
2035 : (VkImageViewType)-1;
2036
2037 assert(region->srcSubresource.aspectMask ==
2038 region->dstSubresource.aspectMask);
2039
2040 struct vk_meta_copy_image_key key = {
2041 .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE,
2042 .bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS,
2043 .samples = src_img->samples,
2044 .aspects = region->srcSubresource.aspectMask,
2045 .src.view = img_copy_view_info(vk_image_sampled_view_type(src_img),
2046 region->srcSubresource.aspectMask, src_img,
2047 src_props),
2048 .dst.view = img_copy_view_info(dst_view_type,
2049 region->dstSubresource.aspectMask, dst_img,
2050 dst_props),
2051 };
2052
2053 VkPipelineLayout pipeline_layout;
2054 VkPipeline pipeline;
2055 VkResult result =
2056 get_copy_image_gfx_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
2057 if (unlikely(result != VK_SUCCESS)) {
2058 vk_command_buffer_set_error(cmd, result);
2059 return;
2060 }
2061
2062 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2063 VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
2064
2065 result = copy_image_prepare_gfx_desc_set(cmd, meta, &key, pipeline_layout,
2066 src_img, src_image_layout, dst_img,
2067 dst_image_layout, region);
2068 if (unlikely(result != VK_SUCCESS)) {
2069 vk_command_buffer_set_error(cmd, result);
2070 return;
2071 }
2072
2073 result = copy_image_prepare_gfx_push_const(cmd, meta, &key, pipeline_layout,
2074 src_img, dst_img, region);
2075 if (unlikely(result != VK_SUCCESS)) {
2076 vk_command_buffer_set_error(cmd, result);
2077 return;
2078 }
2079
2080 copy_draw(cmd, meta, dst_img, dst_image_layout, ®ion->dstSubresource,
2081 ®ion->dstOffset, ®ion->extent, &key.dst.view);
2082 }
2083
2084 static void
copy_image_region_compute(struct vk_command_buffer * cmd,struct vk_meta_device * meta,struct vk_image * src_img,VkImageLayout src_image_layout,const struct vk_meta_copy_image_properties * src_props,struct vk_image * dst_img,VkImageLayout dst_image_layout,const struct vk_meta_copy_image_properties * dst_props,const VkImageCopy2 * region)2085 copy_image_region_compute(struct vk_command_buffer *cmd,
2086 struct vk_meta_device *meta, struct vk_image *src_img,
2087 VkImageLayout src_image_layout,
2088 const struct vk_meta_copy_image_properties *src_props,
2089 struct vk_image *dst_img,
2090 VkImageLayout dst_image_layout,
2091 const struct vk_meta_copy_image_properties *dst_props,
2092 const VkImageCopy2 *region)
2093 {
2094 struct vk_device *dev = cmd->base.device;
2095 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2096 VkImageViewType dst_view_type = vk_image_storage_view_type(dst_img);
2097
2098 assert(region->srcSubresource.aspectMask ==
2099 region->dstSubresource.aspectMask);
2100
2101 struct vk_meta_copy_image_key key = {
2102 .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE,
2103 .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE,
2104 .samples = src_img->samples,
2105 .aspects = region->srcSubresource.aspectMask,
2106 .src.view = img_copy_view_info(vk_image_sampled_view_type(src_img),
2107 region->srcSubresource.aspectMask, src_img,
2108 src_props),
2109 .dst.view = img_copy_view_info(
2110 dst_view_type, region->dstSubresource.aspectMask, dst_img, dst_props),
2111 };
2112
2113 uint32_t src_pix_per_tile = src_props->tile_size.width *
2114 src_props->tile_size.height *
2115 src_props->tile_size.depth;
2116 uint32_t dst_pix_per_tile = dst_props->tile_size.width *
2117 dst_props->tile_size.height *
2118 dst_props->tile_size.depth;
2119 enum vk_meta_copy_image_align_policy align_policy;
2120
2121 if (src_pix_per_tile >= dst_pix_per_tile) {
2122 key.wg_size[0] = src_props->tile_size.width;
2123 key.wg_size[1] = src_props->tile_size.height;
2124 key.wg_size[2] = src_props->tile_size.depth;
2125 align_policy = VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE;
2126 } else {
2127 key.wg_size[0] = dst_props->tile_size.width;
2128 key.wg_size[1] = dst_props->tile_size.height;
2129 key.wg_size[2] = dst_props->tile_size.depth;
2130 align_policy = VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE;
2131 }
2132
2133 VkPipelineLayout pipeline_layout;
2134 VkPipeline pipeline;
2135 VkResult result = get_copy_image_compute_pipeline(
2136 dev, meta, &key, &pipeline_layout, &pipeline);
2137 if (unlikely(result != VK_SUCCESS)) {
2138 vk_command_buffer_set_error(cmd, result);
2139 return;
2140 }
2141
2142 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2143 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2144
2145 result = copy_image_prepare_compute_desc_set(
2146 cmd, meta, &key, pipeline_layout, src_img, src_image_layout, dst_img,
2147 dst_image_layout, region);
2148 if (unlikely(result != VK_SUCCESS)) {
2149 vk_command_buffer_set_error(cmd, result);
2150 return;
2151 }
2152
2153 assert(key.wg_size[0] && key.wg_size[1] && key.wg_size[2]);
2154
2155 uint32_t wg_count[3] = {0};
2156
2157 result = copy_image_prepare_compute_push_const(
2158 cmd, meta, &key, pipeline_layout, src_img, dst_img, align_policy, region,
2159 wg_count);
2160 if (unlikely(result != VK_SUCCESS)) {
2161 vk_command_buffer_set_error(cmd, result);
2162 return;
2163 }
2164
2165 disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1],
2166 wg_count[2]);
2167 }
2168
2169 void
vk_meta_copy_image(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyImageInfo2 * info,const struct vk_meta_copy_image_properties * src_props,const struct vk_meta_copy_image_properties * dst_props,VkPipelineBindPoint bind_point)2170 vk_meta_copy_image(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2171 const VkCopyImageInfo2 *info,
2172 const struct vk_meta_copy_image_properties *src_props,
2173 const struct vk_meta_copy_image_properties *dst_props,
2174 VkPipelineBindPoint bind_point)
2175 {
2176 VK_FROM_HANDLE(vk_image, src_img, info->srcImage);
2177 VK_FROM_HANDLE(vk_image, dst_img, info->dstImage);
2178
2179 for (uint32_t i = 0; i < info->regionCount; i++) {
2180 VkImageCopy2 region = info->pRegions[i];
2181
2182 region.extent = vk_image_extent_to_elements(src_img, region.extent);
2183 region.srcOffset = vk_image_offset_to_elements(src_img, region.srcOffset);
2184 region.dstOffset = vk_image_offset_to_elements(dst_img, region.dstOffset);
2185
2186 if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2187 copy_image_region_gfx(cmd, meta, src_img, info->srcImageLayout,
2188 src_props, dst_img, info->dstImageLayout,
2189 dst_props, ®ion);
2190 } else {
2191 copy_image_region_compute(cmd, meta, src_img, info->srcImageLayout,
2192 src_props, dst_img, info->dstImageLayout,
2193 dst_props, ®ion);
2194 }
2195 }
2196 }
2197
2198 static nir_shader *
build_copy_buffer_shader(const struct vk_meta_device * meta,const void * key_data)2199 build_copy_buffer_shader(const struct vk_meta_device *meta,
2200 const void *key_data)
2201 {
2202 const struct vk_meta_copy_buffer_key *key = key_data;
2203 nir_builder builder = nir_builder_init_simple_shader(
2204 MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer");
2205 nir_builder *b = &builder;
2206
2207 b->shader->info.workgroup_size[0] =
2208 vk_meta_buffer_access_wg_size(meta, key->chunk_size);
2209 b->shader->info.workgroup_size[1] = 1;
2210 b->shader->info.workgroup_size[2] = 1;
2211
2212 uint32_t chunk_bit_size, chunk_comp_count;
2213
2214 assert(util_is_power_of_two_nonzero(key->chunk_size));
2215 if (key->chunk_size <= 4) {
2216 chunk_bit_size = key->chunk_size * 8;
2217 chunk_comp_count = 1;
2218 } else {
2219 chunk_bit_size = 32;
2220 chunk_comp_count = key->chunk_size / 4;
2221 }
2222
2223 assert(chunk_comp_count < NIR_MAX_VEC_COMPONENTS);
2224
2225 nir_def *global_id = nir_load_global_invocation_id(b, 32);
2226 nir_def *copy_id = nir_channel(b, global_id, 0);
2227 nir_def *offset = nir_imul_imm(b, copy_id, key->chunk_size);
2228 nir_def *size = load_info(b, struct vk_meta_copy_buffer_info, size);
2229
2230 nir_push_if(b, nir_ult(b, offset, size));
2231
2232 offset = nir_u2u64(b, offset);
2233
2234 nir_def *src_addr = load_info(b, struct vk_meta_copy_buffer_info, src_addr);
2235 nir_def *dst_addr = nir_load_push_constant(b, 1, 64, nir_imm_int(b, 8));
2236 nir_def *data = nir_build_load_global(b, chunk_comp_count, chunk_bit_size,
2237 nir_iadd(b, src_addr, offset),
2238 .align_mul = chunk_bit_size / 8);
2239
2240 nir_build_store_global(b, data, nir_iadd(b, dst_addr, offset),
2241 .align_mul = key->chunk_size);
2242
2243 nir_pop_if(b, NULL);
2244
2245 return b->shader;
2246 }
2247
2248 static VkResult
get_copy_buffer_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_copy_buffer_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)2249 get_copy_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta,
2250 const struct vk_meta_copy_buffer_key *key,
2251 VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
2252 {
2253 VkResult result = get_copy_pipeline_layout(
2254 device, meta, "vk-meta-copy-buffer-pipeline-layout",
2255 VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_buffer_info),
2256 NULL, 0, layout_out);
2257
2258 if (unlikely(result != VK_SUCCESS))
2259 return result;
2260
2261 return get_compute_copy_pipeline(device, meta, *layout_out,
2262 build_copy_buffer_shader, key, sizeof(*key),
2263 pipeline_out);
2264 }
2265
2266 static void
copy_buffer_region(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer src,VkBuffer dst,const VkBufferCopy2 * region)2267 copy_buffer_region(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2268 VkBuffer src, VkBuffer dst, const VkBufferCopy2 *region)
2269 {
2270 struct vk_device *dev = cmd->base.device;
2271 const struct vk_physical_device *pdev = dev->physical;
2272 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2273 VkResult result;
2274
2275 struct vk_meta_copy_buffer_key key = {
2276 .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_PIPELINE,
2277 };
2278
2279 VkDeviceSize size = region->size;
2280 VkDeviceAddress src_addr =
2281 vk_meta_buffer_address(dev, src, region->srcOffset, size);
2282 VkDeviceAddress dst_addr =
2283 vk_meta_buffer_address(dev, dst, region->dstOffset, size);
2284
2285 /* Combine the size and src/dst address to extract the alignment. */
2286 uint64_t align = src_addr | dst_addr | size;
2287
2288 assert(align != 0);
2289
2290 /* Pick the first power-of-two of the combined src/dst address and size as
2291 * our alignment. We limit the chunk size to 16 bytes (a uvec4) for now.
2292 */
2293 key.chunk_size = MIN2(16, 1 << (ffs(align) - 1));
2294
2295 VkPipelineLayout pipeline_layout;
2296 VkPipeline pipeline;
2297 result =
2298 get_copy_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
2299 if (unlikely(result != VK_SUCCESS)) {
2300 vk_command_buffer_set_error(cmd, result);
2301 return;
2302 }
2303
2304 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2305 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2306
2307 const uint32_t optimal_wg_size =
2308 vk_meta_buffer_access_wg_size(meta, key.chunk_size);
2309 const uint32_t per_wg_copy_size = optimal_wg_size * key.chunk_size;
2310 uint32_t max_per_dispatch_size =
2311 pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size;
2312
2313 assert(optimal_wg_size <= pdev->properties.maxComputeWorkGroupSize[0]);
2314
2315 while (size) {
2316 struct vk_meta_copy_buffer_info args = {
2317 .size = MIN2(size, max_per_dispatch_size),
2318 .src_addr = src_addr,
2319 .dst_addr = dst_addr,
2320 };
2321 uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size);
2322
2323 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
2324 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
2325 &args);
2326
2327 disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1);
2328
2329 src_addr += args.size;
2330 dst_addr += args.size;
2331 size -= args.size;
2332 }
2333 }
2334
2335 void
vk_meta_copy_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,const VkCopyBufferInfo2 * info)2336 vk_meta_copy_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2337 const VkCopyBufferInfo2 *info)
2338 {
2339 for (unsigned i = 0; i < info->regionCount; i++) {
2340 const VkBufferCopy2 *region = &info->pRegions[i];
2341
2342 copy_buffer_region(cmd, meta, info->srcBuffer, info->dstBuffer, region);
2343 }
2344 }
2345
2346 void
vk_meta_update_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size,const void * data)2347 vk_meta_update_buffer(struct vk_command_buffer *cmd,
2348 struct vk_meta_device *meta, VkBuffer buffer,
2349 VkDeviceSize offset, VkDeviceSize size, const void *data)
2350 {
2351 VkResult result;
2352
2353 const VkBufferCreateInfo tmp_buffer_info = {
2354 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2355 .size = size,
2356 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
2357 .queueFamilyIndexCount = 1,
2358 .pQueueFamilyIndices = &cmd->pool->queue_family_index,
2359 };
2360
2361 VkBuffer tmp_buffer;
2362 result = vk_meta_create_buffer(cmd, meta, &tmp_buffer_info, &tmp_buffer);
2363 if (unlikely(result != VK_SUCCESS)) {
2364 vk_command_buffer_set_error(cmd, result);
2365 return;
2366 }
2367
2368 void *tmp_buffer_map;
2369 result = meta->cmd_bind_map_buffer(cmd, meta, tmp_buffer, &tmp_buffer_map);
2370 if (unlikely(result != VK_SUCCESS)) {
2371 vk_command_buffer_set_error(cmd, result);
2372 return;
2373 }
2374
2375 memcpy(tmp_buffer_map, data, size);
2376
2377 const VkBufferCopy2 copy_region = {
2378 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
2379 .srcOffset = 0,
2380 .dstOffset = offset,
2381 .size = size,
2382 };
2383 const VkCopyBufferInfo2 copy_info = {
2384 .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
2385 .srcBuffer = tmp_buffer,
2386 .dstBuffer = buffer,
2387 .regionCount = 1,
2388 .pRegions = ©_region,
2389 };
2390
2391 vk_meta_copy_buffer(cmd, meta, ©_info);
2392 }
2393
2394 static nir_shader *
build_fill_buffer_shader(const struct vk_meta_device * meta,UNUSED const void * key_data)2395 build_fill_buffer_shader(const struct vk_meta_device *meta,
2396 UNUSED const void *key_data)
2397 {
2398 nir_builder builder = nir_builder_init_simple_shader(
2399 MESA_SHADER_COMPUTE, NULL, "vk-meta-fill-buffer");
2400 nir_builder *b = &builder;
2401
2402 b->shader->info.workgroup_size[0] = vk_meta_buffer_access_wg_size(meta, 4);
2403 b->shader->info.workgroup_size[1] = 1;
2404 b->shader->info.workgroup_size[2] = 1;
2405
2406 nir_def *global_id = nir_load_global_invocation_id(b, 32);
2407 nir_def *copy_id = nir_channel(b, global_id, 0);
2408 nir_def *offset = nir_imul_imm(b, copy_id, 4);
2409 nir_def *size = load_info(b, struct vk_meta_fill_buffer_info, size);
2410 nir_def *data = load_info(b, struct vk_meta_fill_buffer_info, data);
2411
2412 nir_push_if(b, nir_ult(b, offset, size));
2413
2414 offset = nir_u2u64(b, offset);
2415
2416 nir_def *buf_addr =
2417 load_info(b, struct vk_meta_fill_buffer_info, buf_addr);
2418
2419 nir_build_store_global(b, data, nir_iadd(b, buf_addr, offset),
2420 .align_mul = 4);
2421
2422 nir_pop_if(b, NULL);
2423
2424 return b->shader;
2425 }
2426
2427 static VkResult
get_fill_buffer_pipeline(struct vk_device * device,struct vk_meta_device * meta,const struct vk_meta_fill_buffer_key * key,VkPipelineLayout * layout_out,VkPipeline * pipeline_out)2428 get_fill_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta,
2429 const struct vk_meta_fill_buffer_key *key,
2430 VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
2431 {
2432 VkResult result = get_copy_pipeline_layout(
2433 device, meta, "vk-meta-fill-buffer-pipeline-layout",
2434 VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_fill_buffer_info), NULL, 0,
2435 layout_out);
2436 if (unlikely(result != VK_SUCCESS))
2437 return result;
2438
2439 return get_compute_copy_pipeline(device, meta, *layout_out,
2440 build_fill_buffer_shader, key, sizeof(*key),
2441 pipeline_out);
2442 }
2443
2444 void
vk_meta_fill_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size,uint32_t data)2445 vk_meta_fill_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
2446 VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
2447 uint32_t data)
2448 {
2449 VK_FROM_HANDLE(vk_buffer, buf, buffer);
2450 struct vk_device *dev = cmd->base.device;
2451 const struct vk_physical_device *pdev = dev->physical;
2452 const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
2453 VkResult result;
2454
2455 struct vk_meta_fill_buffer_key key = {
2456 .key_type = VK_META_OBJECT_KEY_FILL_BUFFER_PIPELINE,
2457 };
2458
2459 VkPipelineLayout pipeline_layout;
2460 VkPipeline pipeline;
2461 result =
2462 get_fill_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
2463 if (unlikely(result != VK_SUCCESS)) {
2464 vk_command_buffer_set_error(cmd, result);
2465 return;
2466 }
2467
2468 disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
2469 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2470
2471 /* From the Vulkan 1.3.290 spec:
2472 *
2473 * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not a
2474 * multiple of 4, then the nearest smaller multiple is used."
2475 *
2476 * hence the mask to align the size on 4 bytes here.
2477 */
2478 size = vk_buffer_range(buf, offset, size) & ~3u;
2479
2480 const uint32_t optimal_wg_size = vk_meta_buffer_access_wg_size(meta, 4);
2481 const uint32_t per_wg_copy_size = optimal_wg_size * 4;
2482 uint32_t max_per_dispatch_size =
2483 pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size;
2484
2485 while (size > 0) {
2486 struct vk_meta_fill_buffer_info args = {
2487 .size = MIN2(size, max_per_dispatch_size),
2488 .buf_addr = vk_meta_buffer_address(dev, buffer, offset, size),
2489 .data = data,
2490 };
2491 uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size);
2492
2493 disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
2494 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
2495 &args);
2496
2497 disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1);
2498
2499 offset += args.size;
2500 size -= args.size;
2501 }
2502 }
2503