1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 #include "vk_common_entrypoints.h"
30
31 static uint32_t
meta_blit_key_hash(const void * key)32 meta_blit_key_hash(const void *key)
33 {
34 return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
35 }
36
37 static bool
meta_blit_key_compare(const void * key1,const void * key2)38 meta_blit_key_compare(const void *key1, const void *key2)
39 {
40 return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
41 }
42
43 static bool
44 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
45 VkImageAspectFlags aspect,
46 struct v3dv_image *image,
47 VkFormat dst_format,
48 VkFormat src_format,
49 struct v3dv_buffer *buffer,
50 uint32_t buffer_bpp,
51 VkColorComponentFlags cmask,
52 VkComponentMapping *cswizzle,
53 uint32_t region_count,
54 const VkBufferImageCopy2 *regions);
55
56 static bool
create_blit_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * descriptor_set_layout,VkPipelineLayout * pipeline_layout)57 create_blit_pipeline_layout(struct v3dv_device *device,
58 VkDescriptorSetLayout *descriptor_set_layout,
59 VkPipelineLayout *pipeline_layout)
60 {
61 VkResult result;
62
63 if (*descriptor_set_layout == 0) {
64 VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
65 .binding = 0,
66 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
67 .descriptorCount = 1,
68 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
69 };
70 VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
71 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
72 .bindingCount = 1,
73 .pBindings = &descriptor_set_layout_binding,
74 };
75 result =
76 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
77 &descriptor_set_layout_info,
78 &device->vk.alloc,
79 descriptor_set_layout);
80 if (result != VK_SUCCESS)
81 return false;
82 }
83
84 assert(*pipeline_layout == 0);
85 VkPipelineLayoutCreateInfo pipeline_layout_info = {
86 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
87 .setLayoutCount = 1,
88 .pSetLayouts = descriptor_set_layout,
89 .pushConstantRangeCount = 1,
90 .pPushConstantRanges =
91 &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
92 };
93
94 result =
95 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
96 &pipeline_layout_info,
97 &device->vk.alloc,
98 pipeline_layout);
99 return result == VK_SUCCESS;
100 }
101
102 void
v3dv_meta_blit_init(struct v3dv_device * device)103 v3dv_meta_blit_init(struct v3dv_device *device)
104 {
105 for (uint32_t i = 0; i < 3; i++) {
106 device->meta.blit.cache[i] =
107 _mesa_hash_table_create(NULL,
108 meta_blit_key_hash,
109 meta_blit_key_compare);
110 }
111
112 create_blit_pipeline_layout(device,
113 &device->meta.blit.ds_layout,
114 &device->meta.blit.p_layout);
115 }
116
117 static void
destroy_meta_blit_pipeline(VkDevice vk_device,uint64_t obj,VkAllocationCallbacks * alloc)118 destroy_meta_blit_pipeline(VkDevice vk_device,
119 uint64_t obj,
120 VkAllocationCallbacks *alloc)
121 {
122 struct v3dv_meta_blit_pipeline *p =
123 (struct v3dv_meta_blit_pipeline *)(uintptr_t) obj;
124 v3dv_DestroyPipeline(vk_device, p->pipeline, alloc);
125 v3dv_DestroyRenderPass(vk_device, p->pass, alloc);
126 v3dv_DestroyRenderPass(vk_device, p->pass_no_load, alloc);
127 vk_free(alloc, p);
128 }
129
130 void
v3dv_meta_blit_finish(struct v3dv_device * device)131 v3dv_meta_blit_finish(struct v3dv_device *device)
132 {
133 VkDevice _device = v3dv_device_to_handle(device);
134
135 for (uint32_t i = 0; i < 3; i++) {
136 hash_table_foreach(device->meta.blit.cache[i], entry) {
137 destroy_meta_blit_pipeline(_device, (uintptr_t)entry->data,
138 &device->vk.alloc);
139 }
140 _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
141 }
142
143 if (device->meta.blit.p_layout) {
144 v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
145 &device->vk.alloc);
146 }
147
148 if (device->meta.blit.ds_layout) {
149 v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
150 &device->vk.alloc);
151 }
152 }
153
154 static uint32_t
meta_texel_buffer_copy_key_hash(const void * key)155 meta_texel_buffer_copy_key_hash(const void *key)
156 {
157 return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
158 }
159
160 static bool
meta_texel_buffer_copy_key_compare(const void * key1,const void * key2)161 meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
162 {
163 return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
164 }
165
166 static bool
create_texel_buffer_copy_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * ds_layout,VkPipelineLayout * p_layout)167 create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
168 VkDescriptorSetLayout *ds_layout,
169 VkPipelineLayout *p_layout)
170 {
171 VkResult result;
172
173 if (*ds_layout == 0) {
174 VkDescriptorSetLayoutBinding ds_layout_binding = {
175 .binding = 0,
176 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
177 .descriptorCount = 1,
178 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
179 };
180 VkDescriptorSetLayoutCreateInfo ds_layout_info = {
181 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
182 .bindingCount = 1,
183 .pBindings = &ds_layout_binding,
184 };
185 result =
186 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
187 &ds_layout_info,
188 &device->vk.alloc,
189 ds_layout);
190 if (result != VK_SUCCESS)
191 return false;
192 }
193
194 assert(*p_layout == 0);
195 /* FIXME: this is abusing a bit the API, since not all of our copy
196 * pipelines have a geometry shader. We could create 2 different pipeline
197 * layouts, but this works for us for now.
198 */
199 #define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0
200 #define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16
201 #define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20
202 #define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24
203 VkPushConstantRange ranges[2] = {
204 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
205 { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
206 };
207
208 VkPipelineLayoutCreateInfo p_layout_info = {
209 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
210 .setLayoutCount = 1,
211 .pSetLayouts = ds_layout,
212 .pushConstantRangeCount = 2,
213 .pPushConstantRanges = ranges,
214 };
215
216 result =
217 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
218 &p_layout_info,
219 &device->vk.alloc,
220 p_layout);
221 return result == VK_SUCCESS;
222 }
223
224 void
v3dv_meta_texel_buffer_copy_init(struct v3dv_device * device)225 v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
226 {
227 for (uint32_t i = 0; i < 3; i++) {
228 device->meta.texel_buffer_copy.cache[i] =
229 _mesa_hash_table_create(NULL,
230 meta_texel_buffer_copy_key_hash,
231 meta_texel_buffer_copy_key_compare);
232 }
233
234 create_texel_buffer_copy_pipeline_layout(
235 device,
236 &device->meta.texel_buffer_copy.ds_layout,
237 &device->meta.texel_buffer_copy.p_layout);
238 }
239
240 static void
destroy_meta_texel_buffer_copy_pipeline(VkDevice vk_device,uint64_t obj,VkAllocationCallbacks * alloc)241 destroy_meta_texel_buffer_copy_pipeline(VkDevice vk_device,
242 uint64_t obj,
243 VkAllocationCallbacks *alloc)
244 {
245 struct v3dv_meta_texel_buffer_copy_pipeline *p =
246 (struct v3dv_meta_texel_buffer_copy_pipeline *)(uintptr_t) obj;
247 v3dv_DestroyPipeline(vk_device, p->pipeline, alloc);
248 v3dv_DestroyRenderPass(vk_device, p->pass, alloc);
249 v3dv_DestroyRenderPass(vk_device, p->pass_no_load, alloc);
250 vk_free(alloc, p);
251 }
252
253 void
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device * device)254 v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
255 {
256 VkDevice _device = v3dv_device_to_handle(device);
257
258 for (uint32_t i = 0; i < 3; i++) {
259 hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
260 destroy_meta_texel_buffer_copy_pipeline(_device, (uintptr_t)entry->data,
261 &device->vk.alloc);
262 }
263 _mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
264 }
265
266 if (device->meta.texel_buffer_copy.p_layout) {
267 v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
268 &device->vk.alloc);
269 }
270
271 if (device->meta.texel_buffer_copy.ds_layout) {
272 v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
273 &device->vk.alloc);
274 }
275 }
276
277 static VkFormat
get_compatible_tlb_format(VkFormat format)278 get_compatible_tlb_format(VkFormat format)
279 {
280 switch (format) {
281 case VK_FORMAT_R8G8B8A8_SNORM:
282 return VK_FORMAT_R8G8B8A8_UINT;
283
284 case VK_FORMAT_R8G8_SNORM:
285 return VK_FORMAT_R8G8_UINT;
286
287 case VK_FORMAT_R8_SNORM:
288 return VK_FORMAT_R8_UINT;
289
290 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
291 return VK_FORMAT_A8B8G8R8_UINT_PACK32;
292
293 case VK_FORMAT_R16_UNORM:
294 case VK_FORMAT_R16_SNORM:
295 return VK_FORMAT_R16_UINT;
296
297 case VK_FORMAT_R16G16_UNORM:
298 case VK_FORMAT_R16G16_SNORM:
299 return VK_FORMAT_R16G16_UINT;
300
301 case VK_FORMAT_R16G16B16A16_UNORM:
302 case VK_FORMAT_R16G16B16A16_SNORM:
303 return VK_FORMAT_R16G16B16A16_UINT;
304
305 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
306 return VK_FORMAT_R32_SFLOAT;
307
308 /* We can't render to compressed formats using the TLB so instead we use
309 * a compatible format with the same bpp as the compressed format. Because
310 * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
311 * case of ETC), when we implement copies with the compatible format we
312 * will have to divide offsets and dimensions on the compressed image by
313 * the compressed block size.
314 */
315 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
316 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
317 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
318 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
319 case VK_FORMAT_BC2_UNORM_BLOCK:
320 case VK_FORMAT_BC2_SRGB_BLOCK:
321 case VK_FORMAT_BC3_SRGB_BLOCK:
322 case VK_FORMAT_BC3_UNORM_BLOCK:
323 case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
324 case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
325 case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
326 case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
327 case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
328 case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
329 case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
330 case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
331 case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
332 case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
333 case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
334 case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
335 case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
336 case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
337 case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
338 case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
339 case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
340 case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
341 case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
342 case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
343 case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
344 case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
345 case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
346 case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
347 case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
348 case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
349 case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
350 case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
351 return VK_FORMAT_R32G32B32A32_UINT;
352
353 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
354 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
355 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
356 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
357 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
358 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
359 case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
360 case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
361 case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
362 case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
363 return VK_FORMAT_R16G16B16A16_UINT;
364
365 default:
366 return VK_FORMAT_UNDEFINED;
367 }
368 }
369
370 /**
371 * Checks if we can implement an image copy or clear operation using the TLB
372 * hardware.
373 *
374 * The extent and miplevel are only used to validate tile stores (to match the
375 * region to store against the miplevel dimensions to avoid avoid cases where
376 * the region to store is not a aligned to tile boundaries). If extent is
377 * NULL no checks are done (which is fine if the image will only be used for a
378 * TLB load or when we know in advance that the store will be for the entire
379 * size of the image miplevel).
380 *
381 * For tlb copies we are doing a per-plane copy, so for multi-plane formats,
382 * the compatible format will be single-plane.
383 */
384 bool
v3dv_meta_can_use_tlb(struct v3dv_image * image,uint8_t plane,uint8_t miplevel,const VkOffset3D * offset,const VkExtent3D * extent,VkFormat * compat_format)385 v3dv_meta_can_use_tlb(struct v3dv_image *image,
386 uint8_t plane,
387 uint8_t miplevel,
388 const VkOffset3D *offset,
389 const VkExtent3D *extent,
390 VkFormat *compat_format)
391 {
392 if (offset->x != 0 || offset->y != 0)
393 return false;
394
395 /* FIXME: this is suboptimal, what we really want to check is that the
396 * extent of the region to copy is the full slice or a multiple of the
397 * tile size.
398 */
399 if (extent) {
400 struct v3d_resource_slice *slice = &image->planes[plane].slices[miplevel];
401 if (slice->width != extent->width || slice->height != extent->height)
402 return false;
403 }
404
405 if (image->format->planes[plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
406 if (compat_format)
407 *compat_format = image->planes[plane].vk_format;
408 return true;
409 }
410
411 /* If the image format is not TLB-supported, then check if we can use
412 * a compatible format instead.
413 */
414 if (compat_format) {
415 *compat_format = get_compatible_tlb_format(image->planes[plane].vk_format);
416 if (*compat_format != VK_FORMAT_UNDEFINED) {
417 assert(vk_format_get_plane_count(*compat_format) == 1);
418 return true;
419 }
420 }
421
422 return false;
423 }
424
425 /* Implements a copy using the TLB.
426 *
427 * This only works if we are copying from offset (0,0), since a TLB store for
428 * tile (x,y) will be written at the same tile offset into the destination.
429 * When this requirement is not met, we need to use a blit instead.
430 *
431 * Returns true if the implementation supports the requested operation (even if
432 * it failed to process it, for example, due to an out-of-memory error).
433 *
434 */
435 static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)436 copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
437 struct v3dv_buffer *buffer,
438 struct v3dv_image *image,
439 const VkBufferImageCopy2 *region)
440 {
441 VkFormat fb_format;
442 uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
443 assert(plane < image->plane_count);
444
445 if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
446 ®ion->imageOffset, ®ion->imageExtent,
447 &fb_format)) {
448 return false;
449 }
450
451 uint32_t internal_type, internal_bpp;
452 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
453 (fb_format, region->imageSubresource.aspectMask,
454 &internal_type, &internal_bpp);
455
456 uint32_t num_layers;
457 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
458 num_layers = vk_image_subresource_layer_count(&image->vk,
459 ®ion->imageSubresource);
460 } else {
461 num_layers = region->imageExtent.depth;
462 }
463 assert(num_layers > 0);
464
465 struct v3dv_job *job =
466 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
467 if (!job)
468 return true;
469
470 /* Handle copy from compressed format using a compatible format */
471 const uint32_t block_w =
472 vk_format_get_blockwidth(image->planes[plane].vk_format);
473 const uint32_t block_h =
474 vk_format_get_blockheight(image->planes[plane].vk_format);
475 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
476 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
477
478 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
479 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
480 false);
481
482 struct v3dv_meta_framebuffer framebuffer;
483 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
484 internal_type, &job->frame_tiling);
485
486 v3dv_X(job->device, job_emit_binning_flush)(job);
487 v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl)
488 (job, buffer, image, &framebuffer, region);
489
490 v3dv_cmd_buffer_finish_job(cmd_buffer);
491
492 return true;
493 }
494
495 static bool
496 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
497 struct v3dv_image *dst,
498 VkFormat dst_format,
499 struct v3dv_image *src,
500 VkFormat src_format,
501 VkColorComponentFlags cmask,
502 VkComponentMapping *cswizzle,
503 const VkImageBlit2 *region,
504 VkFilter filter,
505 bool dst_is_padded_image);
506
507
508 /**
509 * A structure that contains all the information we may need in various
510 * processes involving image to buffer copies implemented with blit paths.
511 */
512 struct image_to_buffer_info {
513 /* Source image info */
514 VkFormat src_format;
515 uint8_t plane;
516 VkColorComponentFlags cmask;
517 VkComponentMapping cswizzle;
518 VkImageAspectFlags src_copy_aspect;
519 uint32_t block_width;
520 uint32_t block_height;
521
522 /* Destination buffer info */
523 VkFormat dst_format;
524 uint32_t buf_width;
525 uint32_t buf_height;
526 uint32_t buf_bpp;
527 VkImageAspectFlags dst_copy_aspect;
528 };
529
530 static VkImageBlit2
blit_region_for_image_to_buffer(const VkOffset3D * offset,const VkExtent3D * extent,uint32_t mip_level,uint32_t base_layer,uint32_t layer_offset,struct image_to_buffer_info * info)531 blit_region_for_image_to_buffer(const VkOffset3D *offset,
532 const VkExtent3D *extent,
533 uint32_t mip_level,
534 uint32_t base_layer,
535 uint32_t layer_offset,
536 struct image_to_buffer_info *info)
537 {
538 VkImageBlit2 output = {
539 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
540 .srcSubresource = {
541 .aspectMask = info->src_copy_aspect,
542 .mipLevel = mip_level,
543 .baseArrayLayer = base_layer + layer_offset,
544 .layerCount = 1,
545 },
546 .srcOffsets = {
547 {
548 DIV_ROUND_UP(offset->x, info->block_width),
549 DIV_ROUND_UP(offset->y, info->block_height),
550 offset->z + layer_offset,
551 },
552 {
553 DIV_ROUND_UP(offset->x + extent->width, info->block_width),
554 DIV_ROUND_UP(offset->y + extent->height, info->block_height),
555 offset->z + layer_offset + 1,
556 },
557 },
558 .dstSubresource = {
559 .aspectMask = info->dst_copy_aspect,
560 .mipLevel = 0,
561 .baseArrayLayer = 0,
562 .layerCount = 1,
563 },
564 .dstOffsets = {
565 { 0, 0, 0 },
566 {
567 DIV_ROUND_UP(extent->width, info->block_width),
568 DIV_ROUND_UP(extent->height, info->block_height),
569 1
570 },
571 },
572 };
573
574 return output;
575 }
576
577 /**
578 * Produces an image_to_buffer_info struct from a VkBufferImageCopy2 that we can
579 * use to implement buffer to image copies with blit paths.
580 *
581 * Returns false if the copy operation can't be implemented with a blit.
582 */
583 static bool
gather_image_to_buffer_info(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region,struct image_to_buffer_info * out_info)584 gather_image_to_buffer_info(struct v3dv_cmd_buffer *cmd_buffer,
585 struct v3dv_image *image,
586 const VkBufferImageCopy2 *region,
587 struct image_to_buffer_info *out_info)
588 {
589 bool supported = false;
590
591 VkImageAspectFlags dst_copy_aspect = region->imageSubresource.aspectMask;
592 /* For multi-planar images we copy one plane at a time using an image alias
593 * with a color aspect for each plane.
594 */
595 if (image->plane_count > 1)
596 dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
597
598 VkImageAspectFlags src_copy_aspect = region->imageSubresource.aspectMask;
599 uint8_t plane = v3dv_plane_from_aspect(src_copy_aspect);
600 assert(plane < image->plane_count);
601
602 /* Generally, the bpp of the data in the buffer matches that of the
603 * source image. The exception is the case where we are copying
604 * stencil (8bpp) to a combined d24s8 image (32bpp).
605 */
606 uint32_t buffer_bpp = image->planes[plane].cpp;
607
608 /* Because we are going to implement the copy as a blit, we need to create
609 * a linear image from the destination buffer and we also want our blit
610 * source and destination formats to be the same (to avoid any format
611 * conversions), so we choose a canonical format that matches the
612 * source image bpp.
613 *
614 * The exception to the above is copying from combined depth/stencil images
615 * because we are copying only one aspect of the image, so we need to setup
616 * our formats, color write mask and source swizzle mask to match that.
617 */
618 VkFormat dst_format;
619 VkFormat src_format;
620 VkColorComponentFlags cmask = 0; /* All components */
621 VkComponentMapping cswizzle = {
622 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
623 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
624 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
625 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
626 };
627 switch (buffer_bpp) {
628 case 16:
629 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
630 dst_format = VK_FORMAT_R32G32B32A32_UINT;
631 src_format = dst_format;
632 break;
633 case 8:
634 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
635 dst_format = VK_FORMAT_R16G16B16A16_UINT;
636 src_format = dst_format;
637 break;
638 case 4:
639 switch (dst_copy_aspect) {
640 case VK_IMAGE_ASPECT_COLOR_BIT:
641 src_format = VK_FORMAT_R8G8B8A8_UINT;
642 dst_format = VK_FORMAT_R8G8B8A8_UINT;
643 break;
644 case VK_IMAGE_ASPECT_DEPTH_BIT:
645 assert(image->plane_count == 1);
646 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
647 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
648 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
649 if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
650 src_format = VK_FORMAT_R32_UINT;
651 dst_format = VK_FORMAT_R32_UINT;
652 } else {
653 /* We want to write depth in the buffer in the first 24-bits,
654 * however, the hardware has depth in bits 8-31, so swizzle the
655 * the source components to match what we want. Also, we don't
656 * want to write bits 24-31 in the destination.
657 */
658 src_format = VK_FORMAT_R8G8B8A8_UINT;
659 dst_format = VK_FORMAT_R8G8B8A8_UINT;
660 cmask = VK_COLOR_COMPONENT_R_BIT |
661 VK_COLOR_COMPONENT_G_BIT |
662 VK_COLOR_COMPONENT_B_BIT;
663 cswizzle.r = VK_COMPONENT_SWIZZLE_G;
664 cswizzle.g = VK_COMPONENT_SWIZZLE_B;
665 cswizzle.b = VK_COMPONENT_SWIZZLE_A;
666 cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
667 }
668 break;
669 case VK_IMAGE_ASPECT_STENCIL_BIT:
670 assert(image->plane_count == 1);
671 assert(dst_copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
672 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
673 /* Copying from S8D24. We want to write 8-bit stencil values only,
674 * so adjust the buffer bpp for that. Since the hardware stores stencil
675 * in the LSB, we can just do a RGBA8UI to R8UI blit.
676 */
677 src_format = VK_FORMAT_R8G8B8A8_UINT;
678 dst_format = VK_FORMAT_R8_UINT;
679 buffer_bpp = 1;
680 break;
681 default:
682 unreachable("unsupported aspect");
683 return supported;
684 };
685 break;
686 case 2:
687 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
688 dst_copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
689 dst_format = VK_FORMAT_R16_UINT;
690 src_format = dst_format;
691 break;
692 case 1:
693 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
694 dst_format = VK_FORMAT_R8_UINT;
695 src_format = dst_format;
696 break;
697 default:
698 unreachable("unsupported bit-size");
699 return supported;
700 };
701
702 /* The hardware doesn't support linear depth/stencil stores, so we
703 * implement copies of depth/stencil aspect as color copies using a
704 * compatible color format.
705 */
706 assert(vk_format_is_color(src_format));
707 assert(vk_format_is_color(dst_format));
708 dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
709
710 /* We should be able to handle the blit if we got this far */
711 supported = true;
712
713 /* Obtain the 2D buffer region spec */
714 uint32_t buf_width, buf_height;
715 if (region->bufferRowLength == 0)
716 buf_width = region->imageExtent.width;
717 else
718 buf_width = region->bufferRowLength;
719
720 if (region->bufferImageHeight == 0)
721 buf_height = region->imageExtent.height;
722 else
723 buf_height = region->bufferImageHeight;
724
725 /* If the image is compressed, the bpp refers to blocks, not pixels */
726 uint32_t block_width =
727 vk_format_get_blockwidth(image->planes[plane].vk_format);
728 uint32_t block_height =
729 vk_format_get_blockheight(image->planes[plane].vk_format);
730 buf_width = DIV_ROUND_UP(buf_width, block_width);
731 buf_height = DIV_ROUND_UP(buf_height, block_height);
732
733 out_info->src_format = src_format;
734 out_info->dst_format = dst_format;
735 out_info->src_copy_aspect = src_copy_aspect;
736 out_info->dst_copy_aspect = dst_copy_aspect;
737 out_info->buf_width = buf_width;
738 out_info->buf_height = buf_height;
739 out_info->buf_bpp = buffer_bpp;
740 out_info->block_width = block_width;
741 out_info->block_height = block_height;
742 out_info->cmask = cmask;
743 out_info->cswizzle = cswizzle;
744 out_info->plane = plane;
745
746 return supported;
747 }
748
749 /* Creates a linear image to alias buffer memory. It also includes that image
750 * as a private object in the cmd_buffer.
751 *
752 * This is used for cases where we want to implement an image to buffer copy,
753 * but we need to rely on a mechanism that uses an image as destination, like
754 * blitting.
755 */
756 static VkResult
create_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region,struct image_to_buffer_info * info,uint32_t layer,VkImage * out_image)757 create_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
758 struct v3dv_buffer *buffer,
759 const VkBufferImageCopy2 *region,
760 struct image_to_buffer_info *info,
761 uint32_t layer,
762 VkImage *out_image)
763 {
764 VkImageCreateInfo image_info = {
765 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
766 .imageType = VK_IMAGE_TYPE_2D,
767 .format = info->dst_format,
768 .extent = { info->buf_width, info->buf_height, 1 },
769 .mipLevels = 1,
770 .arrayLayers = 1,
771 .samples = VK_SAMPLE_COUNT_1_BIT,
772 .tiling = VK_IMAGE_TILING_LINEAR,
773 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
774 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
775 .queueFamilyIndexCount = 0,
776 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
777 };
778
779 VkResult result;
780 struct v3dv_device *device = cmd_buffer->device;
781 VkDevice _device = v3dv_device_to_handle(device);
782
783 VkImage buffer_image;
784 result =
785 v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
786 if (result != VK_SUCCESS)
787 return result;
788
789 *out_image = buffer_image;
790
791 v3dv_cmd_buffer_add_private_obj(
792 cmd_buffer, (uintptr_t)buffer_image,
793 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
794
795 /* Bind the buffer memory to the image
796 */
797 VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
798 layer * info->buf_width * info->buf_height * info->buf_bpp;
799
800 result =
801 vk_common_BindImageMemory(_device, buffer_image,
802 v3dv_device_memory_to_handle(buffer->mem),
803 buffer_offset);
804 return result;
805 }
806
807 /**
808 * Creates an image with a single mip level that aliases the memory of a
809 * mip level in another image, re-interpreting the memory with an uncompressed
810 * format. The image is added to the command buffer as a private object for
811 * disposal.
812 */
813 static bool
create_image_mip_level_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,VkFormat format,uint32_t plane,uint32_t mip_level,uint32_t layer,VkImage * alias)814 create_image_mip_level_alias(struct v3dv_cmd_buffer *cmd_buffer,
815 struct v3dv_image *image,
816 VkFormat format,
817 uint32_t plane,
818 uint32_t mip_level,
819 uint32_t layer,
820 VkImage *alias)
821 {
822 VkResult result;
823 assert(!vk_format_is_compressed(format));
824
825 struct v3dv_device *device = cmd_buffer->device;
826 VkDevice vk_device = v3dv_device_to_handle(device);
827 uint32_t mip_width = image->planes[plane].slices[mip_level].width;
828 uint32_t mip_height = image->planes[plane].slices[mip_level].height;
829
830 uint32_t block_width =
831 vk_format_get_blockwidth(image->planes[plane].vk_format);
832 uint32_t block_height =
833 vk_format_get_blockheight(image->planes[plane].vk_format);
834
835 VkImageCreateInfo info = {
836 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
837 .imageType = image->vk.image_type,
838 .format = format,
839 .extent = { DIV_ROUND_UP(mip_width, block_width),
840 DIV_ROUND_UP(mip_height, block_height),
841 1 },
842 .mipLevels = 1,
843 .arrayLayers = 1,
844 .samples = image->vk.samples,
845 .tiling = image->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
846 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
847 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
848 .queueFamilyIndexCount = 0,
849 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
850 };
851 result = v3dv_CreateImage(vk_device, &info, &device->vk.alloc, alias);
852 if (result != VK_SUCCESS)
853 return false;
854
855 /* The alias we have just created has just one mip, but we may be aliasing
856 * any mip in the original image. Because the slice setup changes based on
857 * the mip (particularly, for mips >= 2 it uses power of 2 sizes internally)
858 * and this can influence the tiling layout selected for the slice, we want
859 * to make sure we copy the slice description from the actual mip level in
860 * the original image, and then rewrite any fields that we need for the
861 * alias. Particularly, we want to make the offset 0 because we are going to
862 * bind the underlying image memory exactly at the start of the selected mip.
863 * We also want to relax the image alignment requirements to the minimum
864 * (the one imposed by the Texture Base Address field) since we may not be
865 * aliasing a level 0 (for which we typically want a page alignment for
866 * optimal performance).
867 */
868 V3DV_FROM_HANDLE(v3dv_image, v3dv_alias, *alias);
869 v3dv_alias->planes[plane].slices[0] = image->planes[plane].slices[mip_level];
870 v3dv_alias->planes[plane].slices[0].width = info.extent.width;
871 v3dv_alias->planes[plane].slices[0].height = info.extent.height;
872 v3dv_alias->planes[plane].slices[0].offset = 0;
873 v3dv_alias->planes[plane].alignment = 64;
874
875 v3dv_cmd_buffer_add_private_obj(
876 cmd_buffer, (uintptr_t)*alias,
877 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
878
879 result =
880 vk_common_BindImageMemory(vk_device, *alias,
881 v3dv_device_memory_to_handle(image->planes[plane].mem),
882 v3dv_layer_offset(image, mip_level, layer, plane));
883 return result == VK_SUCCESS;
884 }
885
886 /**
887 * Returns true if the implementation supports the requested operation (even if
888 * it failed to process it, for example, due to an out-of-memory error).
889 */
890 static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)891 copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
892 struct v3dv_buffer *buffer,
893 struct v3dv_image *image,
894 const VkBufferImageCopy2 *region)
895 {
896 bool handled = false;
897 struct image_to_buffer_info info;
898
899 /* This path uses a shader blit which doesn't support linear images. Return
900 * early to avoid all the heavy lifting in preparation for the
901 * blit_shader() call that is bound to fail in that scenario.
902 */
903 if (!image->tiled && image->vk.image_type != VK_IMAGE_TYPE_1D) {
904 return handled;
905 }
906
907 handled = gather_image_to_buffer_info(cmd_buffer, image, region,
908 &info);
909
910 if (!handled)
911 return handled;
912
913 /* We should be able to handle the blit if we got this far */
914 handled = true;
915
916 /* Compute layers to copy */
917 uint32_t num_layers;
918 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
919 num_layers = vk_image_subresource_layer_count(&image->vk,
920 ®ion->imageSubresource);
921 } else {
922 num_layers = region->imageExtent.depth;
923 }
924 assert(num_layers > 0);
925
926 /* Copy requested layers */
927 VkResult result;
928 VkImageBlit2 blit_region;
929 uint32_t mip_level = region->imageSubresource.mipLevel;
930 uint32_t base_layer = region->imageSubresource.baseArrayLayer;
931 for (uint32_t i = 0; i < num_layers; i++) {
932 uint32_t layer_offset = i;
933
934 if (vk_format_is_compressed(image->vk.format)) {
935 /* Our blit interface can see the real format of the images to detect
936 * copies between compressed and uncompressed images and adapt the
937 * blit region accordingly. Here we are just doing a raw copy of
938 * compressed data, but we are passing an uncompressed view of the
939 * buffer for the blit destination image (since compressed formats are
940 * not renderable), so we also want to provide an uncompressed view of
941 * the source image.
942 *
943 * It is important that we create the alias over the selected mip
944 * level (instead of aliasing the entire image) because an uncompressed
945 * view of the image won't have the same number of mip levels as the
946 * original image and the implicit mip size calculations the hw will
947 * do to sample from a non-zero mip level may not match exactly between
948 * compressed and uncompressed views.
949 */
950 VkImage alias;
951 if (!create_image_mip_level_alias(cmd_buffer, image, info.dst_format,
952 info.plane, mip_level,
953 base_layer + layer_offset,
954 &alias)) {
955 return handled;
956 }
957
958 /* We are aliasing the selected mip level and layer with a
959 * single-mip and single-layer image.
960 */
961 image = v3dv_image_from_handle(alias);
962 mip_level = 0;
963 base_layer = 0;
964 layer_offset = 0;
965 }
966
967 /* Create the destination blit image from the destination buffer */
968 VkImage buffer_image;
969 result =
970 create_image_from_buffer(cmd_buffer, buffer, region, &info,
971 i, &buffer_image);
972 if (result != VK_SUCCESS)
973 return handled;
974
975 /* Blit-copy the requested image extent.
976 *
977 * Since we are copying, the blit must use the same format on the
978 * destination and source images to avoid format conversions. The
979 * only exception is copying stencil, which we upload to a R8UI source
980 * image, but that we need to blit to a S8D24 destination (the only
981 * stencil format we support).
982 */
983 blit_region =
984 blit_region_for_image_to_buffer(®ion->imageOffset,
985 ®ion->imageExtent,
986 mip_level, base_layer, layer_offset,
987 &info);
988
989 handled = blit_shader(cmd_buffer,
990 v3dv_image_from_handle(buffer_image),
991 info.dst_format,
992 image, info.src_format,
993 info.cmask, &info.cswizzle,
994 &blit_region, VK_FILTER_NEAREST, false);
995 if (!handled) {
996 /* This is unexpected, we should have a supported blit spec */
997 unreachable("Unable to blit buffer to destination image");
998 return false;
999 }
1000 }
1001
1002 assert(handled);
1003 return true;
1004 }
1005
1006 static bool
1007 copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1008 struct v3dv_image *dst,
1009 struct v3dv_image *src,
1010 const VkImageCopy2 *region);
1011
1012 static VkImageCopy2
image_copy_region_for_image_to_buffer(const VkBufferImageCopy2 * region,struct image_to_buffer_info * info,uint32_t layer)1013 image_copy_region_for_image_to_buffer(const VkBufferImageCopy2 *region,
1014 struct image_to_buffer_info *info,
1015 uint32_t layer)
1016 {
1017 VkImageCopy2 output = {
1018 .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
1019 .srcSubresource = {
1020 .aspectMask = info->src_copy_aspect,
1021 .mipLevel = region->imageSubresource.mipLevel,
1022 .baseArrayLayer = region->imageSubresource.baseArrayLayer + layer,
1023 .layerCount = 1,
1024 },
1025 .srcOffset = {
1026 DIV_ROUND_UP(region->imageOffset.x, info->block_width),
1027 DIV_ROUND_UP(region->imageOffset.y, info->block_height),
1028 region->imageOffset.z,
1029 },
1030 .dstSubresource = {
1031 .aspectMask = info->dst_copy_aspect,
1032 .mipLevel = 0,
1033 .baseArrayLayer = 0,
1034 .layerCount = 1,
1035 },
1036 .dstOffset = { 0, 0, 0 },
1037 .extent = {
1038 DIV_ROUND_UP(region->imageExtent.width, info->block_width),
1039 DIV_ROUND_UP(region->imageExtent.height, info->block_height),
1040 1
1041 },
1042 };
1043
1044 return output;
1045 }
1046
1047 /**
1048 * Returns true if the implementation supports the requested operation (even if
1049 * it failed to process it, for example, due to an out-of-memory error).
1050 */
1051 static bool
copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * dst_buffer,struct v3dv_image * src_image,const VkBufferImageCopy2 * region)1052 copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1053 struct v3dv_buffer *dst_buffer,
1054 struct v3dv_image *src_image,
1055 const VkBufferImageCopy2 *region)
1056 {
1057 bool handled = false;
1058 VkImage dst_buffer_image;
1059 struct image_to_buffer_info info;
1060
1061 /* This is a requirement for copy_image_linear_texel_buffer below. We check
1062 * it in advance in order to do an early return
1063 */
1064 if (src_image->tiled)
1065 return false;
1066
1067 handled =
1068 gather_image_to_buffer_info(cmd_buffer, src_image, region,
1069 &info);
1070 if (!handled)
1071 return handled;
1072
1073 /* At this point the implementation should support the copy, any possible
1074 * error below are for different reasons, like out-of-memory error
1075 */
1076 handled = true;
1077
1078 uint32_t num_layers;
1079 if (src_image->vk.image_type != VK_IMAGE_TYPE_3D) {
1080 num_layers = vk_image_subresource_layer_count(&src_image->vk,
1081 ®ion->imageSubresource);
1082 } else {
1083 num_layers = region->imageExtent.depth;
1084 }
1085 assert(num_layers > 0);
1086
1087 VkResult result;
1088 VkImageCopy2 image_region;
1089 for (uint32_t layer = 0; layer < num_layers; layer++) {
1090 /* Create the destination image from the destination buffer */
1091 result =
1092 create_image_from_buffer(cmd_buffer, dst_buffer, region, &info,
1093 layer, &dst_buffer_image);
1094 if (result != VK_SUCCESS)
1095 return handled;
1096
1097 image_region =
1098 image_copy_region_for_image_to_buffer(region, &info, layer);
1099
1100 handled =
1101 copy_image_linear_texel_buffer(cmd_buffer,
1102 v3dv_image_from_handle(dst_buffer_image),
1103 src_image, &image_region);
1104 }
1105
1106 return handled;
1107 }
1108
1109 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)1110 v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
1111 const VkCopyImageToBufferInfo2 *info)
1112
1113 {
1114 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1115 V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
1116 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
1117
1118 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1119
1120 cmd_buffer->state.is_transfer = true;
1121
1122 for (uint32_t i = 0; i < info->regionCount; i++) {
1123 const VkBufferImageCopy2 *region = &info->pRegions[i];
1124
1125 if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, region))
1126 continue;
1127
1128 if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, region))
1129 continue;
1130
1131 if (copy_image_to_buffer_texel_buffer(cmd_buffer, buffer, image, region))
1132 continue;
1133
1134 unreachable("Unsupported image to buffer copy.");
1135 }
1136 cmd_buffer->state.is_transfer = false;
1137 }
1138
1139 /**
1140 * Returns true if the implementation supports the requested operation (even if
1141 * it failed to process it, for example, due to an out-of-memory error).
1142 */
1143 static bool
copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1144 copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1145 struct v3dv_image *dst,
1146 struct v3dv_image *src,
1147 const VkImageCopy2 *region)
1148 {
1149 if (V3D_DBG(DISABLE_TFU)) {
1150 perf_debug("Copy images: TFU disabled, fallbacks could be slower.\n");
1151 return false;
1152 }
1153
1154 /* Destination can't be raster format */
1155 if (!dst->tiled)
1156 return false;
1157
1158 /* We can only do full copies, so if the format is D24S8 both aspects need
1159 * to be copied. We only need to check the dst format because the spec
1160 * states that depth/stencil formats must match exactly.
1161 */
1162 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
1163 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
1164 VK_IMAGE_ASPECT_STENCIL_BIT;
1165 if (region->dstSubresource.aspectMask != ds_aspects)
1166 return false;
1167 }
1168
1169 /* Don't handle copies between uncompressed and compressed formats for now.
1170 *
1171 * FIXME: we should be able to handle these easily but there is no coverage
1172 * in CTS at the moment that make such copies with full images (which we
1173 * require here), only partial copies. Also, in that case the code below that
1174 * checks for "dst image complete" requires some changes, since it is
1175 * checking against the region dimensions, which are in units of the source
1176 * image format.
1177 */
1178 if (vk_format_is_compressed(dst->vk.format) !=
1179 vk_format_is_compressed(src->vk.format)) {
1180 return false;
1181 }
1182
1183 /* Source region must start at (0,0) */
1184 if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
1185 return false;
1186
1187 /* Destination image must be complete */
1188 if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
1189 return false;
1190
1191 uint8_t src_plane =
1192 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1193 uint8_t dst_plane =
1194 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1195
1196 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
1197 uint32_t dst_width = u_minify(dst->planes[dst_plane].width, dst_mip_level);
1198 uint32_t dst_height = u_minify(dst->planes[dst_plane].height, dst_mip_level);
1199 if (region->extent.width != dst_width || region->extent.height != dst_height)
1200 return false;
1201
1202 /* From vkCmdCopyImage:
1203 *
1204 * "When copying between compressed and uncompressed formats the extent
1205 * members represent the texel dimensions of the source image and not
1206 * the destination."
1207 */
1208 const uint32_t block_w =
1209 vk_format_get_blockwidth(src->planes[src_plane].vk_format);
1210 const uint32_t block_h =
1211 vk_format_get_blockheight(src->planes[src_plane].vk_format);
1212 uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
1213 uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
1214
1215 /* Account for sample count */
1216 assert(dst->vk.samples == src->vk.samples);
1217 if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
1218 assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
1219 width *= 2;
1220 height *= 2;
1221 }
1222
1223 /* The TFU unit doesn't handle format conversions so we need the formats to
1224 * match. On the other hand, vkCmdCopyImage allows different color formats
1225 * on the source and destination images, but only if they are texel
1226 * compatible. For us, this means that we can effectively ignore different
1227 * formats and just make the copy using either of them, since we are just
1228 * moving raw data and not making any conversions.
1229 *
1230 * Also, the formats supported by the TFU unit are limited, but again, since
1231 * we are only doing raw copies here without interpreting or converting
1232 * the underlying pixel data according to its format, we can always choose
1233 * to use compatible formats that are supported with the TFU unit.
1234 */
1235 assert(dst->planes[dst_plane].cpp == src->planes[src_plane].cpp);
1236 const struct v3dv_format *format =
1237 v3dv_get_compatible_tfu_format(cmd_buffer->device,
1238 dst->planes[dst_plane].cpp, NULL);
1239
1240 /* Emit a TFU job for each layer to blit */
1241 const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1242 vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource) :
1243 region->extent.depth;
1244 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
1245
1246 const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
1247 region->srcSubresource.baseArrayLayer : region->srcOffset.z;
1248 const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1249 region->dstSubresource.baseArrayLayer : region->dstOffset.z;
1250 for (uint32_t i = 0; i < layer_count; i++) {
1251 const uint32_t dst_offset =
1252 dst->planes[dst_plane].mem->bo->offset +
1253 v3dv_layer_offset(dst, dst_mip_level, base_dst_layer + i, dst_plane);
1254 const uint32_t src_offset =
1255 src->planes[src_plane].mem->bo->offset +
1256 v3dv_layer_offset(src, src_mip_level, base_src_layer + i, src_plane);
1257
1258 const struct v3d_resource_slice *dst_slice =
1259 &dst->planes[dst_plane].slices[dst_mip_level];
1260 const struct v3d_resource_slice *src_slice =
1261 &src->planes[src_plane].slices[src_mip_level];
1262
1263 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
1264 cmd_buffer,
1265 dst->planes[dst_plane].mem->bo->handle,
1266 dst_offset,
1267 dst_slice->tiling,
1268 dst_slice->padded_height,
1269 dst->planes[dst_plane].cpp,
1270 src->planes[src_plane].mem->bo->handle,
1271 src_offset,
1272 src_slice->tiling,
1273 src_slice->tiling == V3D_TILING_RASTER ?
1274 src_slice->stride : src_slice->padded_height,
1275 src->planes[src_plane].cpp,
1276 /* All compatible TFU formats are single-plane */
1277 width, height, &format->planes[0]);
1278 }
1279
1280 return true;
1281 }
1282
1283 inline bool
v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1284 v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1285 struct v3dv_image *dst,
1286 struct v3dv_image *src,
1287 const VkImageCopy2 *region)
1288 {
1289 return copy_image_tfu(cmd_buffer, dst, src, region);
1290 }
1291
1292 /**
1293 * Returns true if the implementation supports the requested operation (even if
1294 * it failed to process it, for example, due to an out-of-memory error).
1295 */
1296 static bool
copy_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1297 copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1298 struct v3dv_image *dst,
1299 struct v3dv_image *src,
1300 const VkImageCopy2 *region)
1301 {
1302 uint8_t src_plane =
1303 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1304 assert(src_plane < src->plane_count);
1305 uint8_t dst_plane =
1306 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1307 assert(dst_plane < dst->plane_count);
1308
1309 VkFormat fb_format;
1310 if (!v3dv_meta_can_use_tlb(src, src_plane, region->srcSubresource.mipLevel,
1311 ®ion->srcOffset, NULL, &fb_format) ||
1312 !v3dv_meta_can_use_tlb(dst, dst_plane, region->dstSubresource.mipLevel,
1313 ®ion->dstOffset, ®ion->extent, &fb_format)) {
1314 return false;
1315 }
1316
1317 /* We can't do TLB stores of linear D/S */
1318 if (!dst->tiled && vk_format_is_depth_or_stencil(fb_format))
1319 return false;
1320
1321 /* From the Vulkan spec, VkImageCopy valid usage:
1322 *
1323 * "If neither the calling command’s srcImage nor the calling command’s
1324 * dstImage has a multi-planar image format then the aspectMask member
1325 * of srcSubresource and dstSubresource must match."
1326 */
1327 assert(src->plane_count != 1 || dst->plane_count != 1 ||
1328 region->dstSubresource.aspectMask ==
1329 region->srcSubresource.aspectMask);
1330 uint32_t internal_type, internal_bpp;
1331 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1332 (fb_format, region->dstSubresource.aspectMask,
1333 &internal_type, &internal_bpp);
1334
1335 /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
1336 *
1337 * "The number of slices of the extent (for 3D) or layers of the
1338 * srcSubresource (for non-3D) must match the number of slices of the
1339 * extent (for 3D) or layers of the dstSubresource (for non-3D)."
1340 */
1341 assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
1342 vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource) :
1343 region->extent.depth) ==
1344 (dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1345 vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource) :
1346 region->extent.depth));
1347 uint32_t num_layers;
1348 if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
1349 num_layers = vk_image_subresource_layer_count(&dst->vk,
1350 ®ion->dstSubresource);
1351 } else {
1352 num_layers = region->extent.depth;
1353 }
1354 assert(num_layers > 0);
1355
1356 struct v3dv_job *job =
1357 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1358 if (!job)
1359 return true;
1360
1361 /* Handle copy to compressed image using compatible format */
1362 const uint32_t block_w =
1363 vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
1364 const uint32_t block_h =
1365 vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
1366 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
1367 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
1368
1369 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
1370 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
1371 src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
1372
1373 struct v3dv_meta_framebuffer framebuffer;
1374 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
1375 internal_type, &job->frame_tiling);
1376
1377 v3dv_X(job->device, job_emit_binning_flush)(job);
1378 v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
1379
1380 v3dv_cmd_buffer_finish_job(cmd_buffer);
1381
1382 return true;
1383 }
1384
1385 /**
1386 * Takes the image provided as argument and creates a new image that has
1387 * the same specification and aliases the same memory storage, except that:
1388 *
1389 * - It has the uncompressed format passed in.
1390 * - Its original width/height are scaled by the factors passed in.
1391 *
1392 * This is useful to implement copies from compressed images using the blit
1393 * path. The idea is that we create uncompressed "image views" of both the
1394 * source and destination images using the uncompressed format and then we
1395 * define the copy blit in terms of that format.
1396 */
1397 static struct v3dv_image *
create_image_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * src,float width_scale,float height_scale,VkFormat format)1398 create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
1399 struct v3dv_image *src,
1400 float width_scale,
1401 float height_scale,
1402 VkFormat format)
1403 {
1404 assert(!vk_format_is_compressed(format));
1405 /* We don't support ycbcr compressed formats */
1406 assert(src->plane_count == 1);
1407
1408 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
1409
1410 VkImageCreateInfo info = {
1411 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1412 .imageType = src->vk.image_type,
1413 .format = format,
1414 .extent = {
1415 .width = src->vk.extent.width * width_scale,
1416 .height = src->vk.extent.height * height_scale,
1417 .depth = src->vk.extent.depth,
1418 },
1419 .mipLevels = src->vk.mip_levels,
1420 .arrayLayers = src->vk.array_layers,
1421 .samples = src->vk.samples,
1422 .tiling = src->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
1423 .usage = src->vk.usage,
1424 };
1425
1426 VkImage _image;
1427 VkResult result =
1428 v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
1429 if (result != VK_SUCCESS) {
1430 v3dv_flag_oom(cmd_buffer, NULL);
1431 return NULL;
1432 }
1433
1434 v3dv_cmd_buffer_add_private_obj(
1435 cmd_buffer, (uintptr_t)_image,
1436 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
1437
1438 struct v3dv_image *image = v3dv_image_from_handle(_image);
1439 image->planes[0].mem = src->planes[0].mem;
1440 image->planes[0].mem_offset = src->planes[0].mem_offset;
1441 return image;
1442 }
1443
1444 /**
1445 * Returns true if the implementation supports the requested operation (even if
1446 * it failed to process it, for example, due to an out-of-memory error).
1447 */
1448 static bool
copy_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1449 copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
1450 struct v3dv_image *dst,
1451 struct v3dv_image *src,
1452 const VkImageCopy2 *region)
1453 {
1454 if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D)
1455 return false;
1456
1457 uint8_t src_plane =
1458 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1459 assert(src_plane < src->plane_count);
1460 uint8_t dst_plane =
1461 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1462 assert(dst_plane < dst->plane_count);
1463
1464 const uint32_t src_block_w =
1465 vk_format_get_blockwidth(src->planes[src_plane].vk_format);
1466 const uint32_t src_block_h =
1467 vk_format_get_blockheight(src->planes[src_plane].vk_format);
1468 const uint32_t dst_block_w =
1469 vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
1470 const uint32_t dst_block_h =
1471 vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
1472 const float block_scale_w = (float)src_block_w / (float)dst_block_w;
1473 const float block_scale_h = (float)src_block_h / (float)dst_block_h;
1474
1475 /* We need to choose a single format for the blit to ensure that this is
1476 * really a copy and there are not format conversions going on. Since we
1477 * going to blit, we need to make sure that the selected format can be
1478 * both rendered to and textured from.
1479 */
1480 VkFormat format;
1481 float src_scale_w = 1.0f;
1482 float src_scale_h = 1.0f;
1483 float dst_scale_w = block_scale_w;
1484 float dst_scale_h = block_scale_h;
1485 if (vk_format_is_compressed(src->vk.format)) {
1486 /* If we are copying from a compressed format we should be aware that we
1487 * are going to texture from the source image, and the texture setup
1488 * knows the actual size of the image, so we need to choose a format
1489 * that has a per-texel (not per-block) bpp that is compatible for that
1490 * image size. For example, for a source image with size Bw*WxBh*H
1491 * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
1492 * each of the Bw*WxBh*H texels in the compressed source image is 8-bit
1493 * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
1494 * so we could specify a blit with size Bw*WxBh*H and a format with
1495 * a bpp of 8-bit per texel (R8_UINT).
1496 *
1497 * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
1498 * which is 64-bit per texel, then we would need a 4-bit format, which
1499 * we don't have, so instead we still choose an 8-bit format, but we
1500 * apply a divisor to the row dimensions of the blit, since we are
1501 * copying two texels per item.
1502 *
1503 * Generally, we can choose any format so long as we compute appropriate
1504 * divisors for the width and height depending on the source image's
1505 * bpp.
1506 */
1507 assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp);
1508
1509 format = VK_FORMAT_R32G32_UINT;
1510 switch (src->planes[src_plane].cpp) {
1511 case 16:
1512 format = VK_FORMAT_R32G32B32A32_UINT;
1513 break;
1514 case 8:
1515 format = VK_FORMAT_R16G16B16A16_UINT;
1516 break;
1517 default:
1518 unreachable("Unsupported compressed format");
1519 }
1520
1521 /* Create image views of the src/dst images that we can interpret in
1522 * terms of the canonical format.
1523 */
1524 src_scale_w /= src_block_w;
1525 src_scale_h /= src_block_h;
1526 dst_scale_w /= src_block_w;
1527 dst_scale_h /= src_block_h;
1528
1529 src = create_image_alias(cmd_buffer, src,
1530 src_scale_w, src_scale_h, format);
1531
1532 dst = create_image_alias(cmd_buffer, dst,
1533 dst_scale_w, dst_scale_h, format);
1534 } else {
1535 format = src->format->planes[src_plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
1536 src->planes[src_plane].vk_format :
1537 get_compatible_tlb_format(src->planes[src_plane].vk_format);
1538 if (format == VK_FORMAT_UNDEFINED)
1539 return false;
1540
1541 const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
1542 assert(f->plane_count < 2);
1543 if (!f->plane_count || f->planes[0].tex_type == TEXTURE_DATA_FORMAT_NO)
1544 return false;
1545 }
1546
1547 /* Given an uncompressed image with size WxH, if we copy it to a compressed
1548 * image, it will result in an image with size W*bWxH*bH, where bW and bH
1549 * are the compressed format's block width and height. This means that
1550 * copies between compressed and uncompressed images involve different
1551 * image sizes, and therefore, we need to take that into account when
1552 * setting up the source and destination blit regions below, so they are
1553 * consistent from the point of view of the single compatible format
1554 * selected for the copy.
1555 *
1556 * We should take into account that the dimensions of the region provided
1557 * to the copy command are specified in terms of the source image. With that
1558 * in mind, below we adjust the blit destination region to be consistent with
1559 * the source region for the compatible format, so basically, we apply
1560 * the block scale factor to the destination offset provided by the copy
1561 * command (because it is specified in terms of the destination image, not
1562 * the source), and then we just add the region copy dimensions to that
1563 * (since the region dimensions are already specified in terms of the source
1564 * image).
1565 */
1566 uint32_t region_width = region->extent.width * src_scale_w;
1567 uint32_t region_height = region->extent.height * src_scale_h;
1568 if (src_block_w > 1)
1569 region_width = util_next_power_of_two(region_width);
1570 if (src_block_h > 1)
1571 region_height = util_next_power_of_two(region_height);
1572
1573 const VkOffset3D src_start = {
1574 region->srcOffset.x * src_scale_w,
1575 region->srcOffset.y * src_scale_h,
1576 region->srcOffset.z,
1577 };
1578 const VkOffset3D src_end = {
1579 src_start.x + region_width,
1580 src_start.y + region_height,
1581 src_start.z + region->extent.depth,
1582 };
1583
1584 const VkOffset3D dst_start = {
1585 region->dstOffset.x * dst_scale_w,
1586 region->dstOffset.y * dst_scale_h,
1587 region->dstOffset.z,
1588 };
1589 const VkOffset3D dst_end = {
1590 dst_start.x + region_width,
1591 dst_start.y + region_height,
1592 dst_start.z + region->extent.depth,
1593 };
1594
1595 const VkImageBlit2 blit_region = {
1596 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
1597 .srcSubresource = region->srcSubresource,
1598 .srcOffsets = { src_start, src_end },
1599 .dstSubresource = region->dstSubresource,
1600 .dstOffsets = { dst_start, dst_end },
1601 };
1602 bool handled = blit_shader(cmd_buffer,
1603 dst, format,
1604 src, format,
1605 0, NULL,
1606 &blit_region, VK_FILTER_NEAREST, true);
1607
1608 /* We should have selected formats that we can blit */
1609 assert(handled);
1610 return handled;
1611 }
1612
1613 static bool
copy_image_linear_texel_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1614 copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1615 struct v3dv_image *dst,
1616 struct v3dv_image *src,
1617 const VkImageCopy2 *region)
1618 {
1619 if (src->tiled)
1620 return false;
1621
1622 /* Implementations are allowed to restrict linear images like this */
1623 assert(region->srcOffset.z == 0);
1624 assert(region->dstOffset.z == 0);
1625 assert(region->srcSubresource.mipLevel == 0);
1626 assert(region->srcSubresource.baseArrayLayer == 0);
1627 assert(region->srcSubresource.layerCount == 1);
1628 assert(region->dstSubresource.mipLevel == 0);
1629 assert(region->dstSubresource.baseArrayLayer == 0);
1630 assert(region->dstSubresource.layerCount == 1);
1631
1632 uint8_t src_plane =
1633 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1634 uint8_t dst_plane =
1635 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1636
1637 assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp);
1638 const uint32_t bpp = src->planes[src_plane].cpp;
1639
1640 VkFormat format;
1641 switch (bpp) {
1642 case 16:
1643 format = VK_FORMAT_R32G32B32A32_UINT;
1644 break;
1645 case 8:
1646 format = VK_FORMAT_R16G16B16A16_UINT;
1647 break;
1648 case 4:
1649 format = VK_FORMAT_R8G8B8A8_UINT;
1650 break;
1651 case 2:
1652 format = VK_FORMAT_R16_UINT;
1653 break;
1654 case 1:
1655 format = VK_FORMAT_R8_UINT;
1656 break;
1657 default:
1658 unreachable("unsupported bit-size");
1659 return false;
1660 }
1661
1662 VkComponentMapping ident_swizzle = {
1663 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
1664 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
1665 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
1666 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
1667 };
1668
1669 const uint32_t buf_stride = src->planes[src_plane].slices[0].stride;
1670 const VkDeviceSize buf_offset =
1671 region->srcOffset.y * buf_stride + region->srcOffset.x * bpp;
1672
1673 struct v3dv_buffer src_buffer;
1674 vk_object_base_init(&cmd_buffer->device->vk, &src_buffer.base,
1675 VK_OBJECT_TYPE_BUFFER);
1676
1677 const struct VkBufferCreateInfo buf_create_info = {
1678 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1679 .size = src->planes[src_plane].size,
1680 .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
1681 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1682 };
1683 v3dv_buffer_init(cmd_buffer->device, &buf_create_info, &src_buffer,
1684 src->planes[src_plane].alignment);
1685
1686 const VkBindBufferMemoryInfo buf_bind_info = {
1687 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1688 .buffer = v3dv_buffer_to_handle(&src_buffer),
1689 .memory = v3dv_device_memory_to_handle(src->planes[src_plane].mem),
1690 .memoryOffset = src->planes[src_plane].mem_offset +
1691 v3dv_layer_offset(src, 0, 0, src_plane),
1692 };
1693 v3dv_buffer_bind_memory(&buf_bind_info);
1694
1695 const VkBufferImageCopy2 copy_region = {
1696 .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
1697 .pNext = NULL,
1698 .bufferOffset = buf_offset,
1699 .bufferRowLength = buf_stride / bpp,
1700 .bufferImageHeight = src->vk.extent.height,
1701 .imageSubresource = region->dstSubresource,
1702 .imageOffset = region->dstOffset,
1703 .imageExtent = region->extent,
1704 };
1705
1706 return texel_buffer_shader_copy(cmd_buffer,
1707 region->dstSubresource.aspectMask,
1708 dst,
1709 format,
1710 format,
1711 &src_buffer,
1712 src->planes[src_plane].cpp,
1713 0 /* color mask: full */, &ident_swizzle,
1714 1, ©_region);
1715 }
1716
1717 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)1718 v3dv_CmdCopyImage2(VkCommandBuffer commandBuffer,
1719 const VkCopyImageInfo2 *info)
1720
1721 {
1722 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1723 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
1724 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
1725
1726 assert(src->vk.samples == dst->vk.samples);
1727
1728 cmd_buffer->state.is_transfer = true;
1729
1730 for (uint32_t i = 0; i < info->regionCount; i++) {
1731 const VkImageCopy2 *region = &info->pRegions[i];
1732 if (copy_image_tfu(cmd_buffer, dst, src, region))
1733 continue;
1734 if (copy_image_tlb(cmd_buffer, dst, src, region))
1735 continue;
1736 if (copy_image_blit(cmd_buffer, dst, src, region))
1737 continue;
1738 if (copy_image_linear_texel_buffer(cmd_buffer, dst, src, region))
1739 continue;
1740 unreachable("Image copy not supported");
1741 }
1742
1743 cmd_buffer->state.is_transfer = false;
1744 }
1745
1746 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1747 v3dv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1748 const VkCopyBufferInfo2 *pCopyBufferInfo)
1749 {
1750 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1751 V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1752 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1753
1754 cmd_buffer->state.is_transfer = true;
1755
1756 for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1757 v3dv_X(cmd_buffer->device, meta_copy_buffer)
1758 (cmd_buffer,
1759 dst_buffer->mem->bo, dst_buffer->mem_offset,
1760 src_buffer->mem->bo, src_buffer->mem_offset,
1761 &pCopyBufferInfo->pRegions[i]);
1762 }
1763
1764 cmd_buffer->state.is_transfer = false;
1765 }
1766
1767 static void
destroy_update_buffer_cb(VkDevice _device,uint64_t pobj,VkAllocationCallbacks * alloc)1768 destroy_update_buffer_cb(VkDevice _device,
1769 uint64_t pobj,
1770 VkAllocationCallbacks *alloc)
1771 {
1772 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1773 struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
1774 v3dv_bo_free(device, bo);
1775 }
1776
1777 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1778 v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1779 VkBuffer dstBuffer,
1780 VkDeviceSize dstOffset,
1781 VkDeviceSize dataSize,
1782 const void *pData)
1783 {
1784 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1785 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1786
1787 struct v3dv_bo *src_bo =
1788 v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
1789 if (!src_bo) {
1790 fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
1791 return;
1792 }
1793
1794 bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
1795 if (!ok) {
1796 fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
1797 return;
1798 }
1799
1800 cmd_buffer->state.is_transfer = true;
1801
1802 memcpy(src_bo->map, pData, dataSize);
1803
1804 v3dv_bo_unmap(cmd_buffer->device, src_bo);
1805
1806 VkBufferCopy2 region = {
1807 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
1808 .srcOffset = 0,
1809 .dstOffset = dstOffset,
1810 .size = dataSize,
1811 };
1812 struct v3dv_job *copy_job =
1813 v3dv_X(cmd_buffer->device, meta_copy_buffer)
1814 (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
1815 src_bo, 0, ®ion);
1816
1817 if (copy_job) {
1818 v3dv_cmd_buffer_add_private_obj(
1819 cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
1820 }
1821
1822 cmd_buffer->state.is_transfer = false;
1823 }
1824
1825 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)1826 v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
1827 VkBuffer dstBuffer,
1828 VkDeviceSize dstOffset,
1829 VkDeviceSize size,
1830 uint32_t data)
1831 {
1832 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1833 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1834
1835 cmd_buffer->state.is_transfer = true;
1836
1837 struct v3dv_bo *bo = dst_buffer->mem->bo;
1838
1839 /* From the Vulkan spec:
1840 *
1841 * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
1842 * a multiple of 4, then the nearest smaller multiple is used."
1843 */
1844 if (size == VK_WHOLE_SIZE) {
1845 size = dst_buffer->size - dstOffset;
1846 size -= size % 4;
1847 }
1848
1849 v3dv_X(cmd_buffer->device, meta_fill_buffer)
1850 (cmd_buffer, bo, dstOffset, size, data);
1851
1852 cmd_buffer->state.is_transfer = false;
1853 }
1854
1855 /**
1856 * Returns true if the implementation supports the requested operation (even if
1857 * it failed to process it, for example, due to an out-of-memory error).
1858 */
1859 static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1860 copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1861 struct v3dv_image *image,
1862 struct v3dv_buffer *buffer,
1863 const VkBufferImageCopy2 *region)
1864 {
1865 if (V3D_DBG(DISABLE_TFU)) {
1866 perf_debug("Copy buffer to image: TFU disabled, fallbacks could be slower.\n");
1867 return false;
1868 }
1869
1870 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1871
1872 /* Destination can't be raster format */
1873 if (!image->tiled)
1874 return false;
1875
1876 /* We can't copy D24S8 because buffer to image copies only copy one aspect
1877 * at a time, and the TFU copies full images. Also, V3D depth bits for
1878 * both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
1879 * the Vulkan spec has the buffer data specified the other way around, so it
1880 * is not a straight copy, we would have to swizzle the channels, which the
1881 * TFU can't do.
1882 */
1883 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
1884 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
1885 return false;
1886 }
1887
1888 /* Region must include full slice */
1889 const uint32_t offset_x = region->imageOffset.x;
1890 const uint32_t offset_y = region->imageOffset.y;
1891 if (offset_x != 0 || offset_y != 0)
1892 return false;
1893
1894 uint32_t width, height;
1895 if (region->bufferRowLength == 0)
1896 width = region->imageExtent.width;
1897 else
1898 width = region->bufferRowLength;
1899
1900 if (region->bufferImageHeight == 0)
1901 height = region->imageExtent.height;
1902 else
1903 height = region->bufferImageHeight;
1904
1905 const uint8_t plane =
1906 v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
1907
1908 const uint32_t mip_level = region->imageSubresource.mipLevel;
1909 const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level];
1910
1911 if (width != slice->width || height != slice->height)
1912 return false;
1913
1914 /* Handle region semantics for compressed images */
1915 const uint32_t block_w =
1916 vk_format_get_blockwidth(image->planes[plane].vk_format);
1917 const uint32_t block_h =
1918 vk_format_get_blockheight(image->planes[plane].vk_format);
1919 width = DIV_ROUND_UP(width, block_w);
1920 height = DIV_ROUND_UP(height, block_h);
1921
1922 /* Format must be supported for texturing via the TFU. Since we are just
1923 * copying raw data and not converting between pixel formats, we can ignore
1924 * the image's format and choose a compatible TFU format for the image
1925 * texel size instead, which expands the list of formats we can handle here.
1926 */
1927 const struct v3dv_format *format =
1928 v3dv_get_compatible_tfu_format(cmd_buffer->device,
1929 image->planes[plane].cpp, NULL);
1930 /* We only use single-plane formats with the TFU */
1931 assert(format->plane_count == 1);
1932 const struct v3dv_format_plane *format_plane = &format->planes[0];
1933
1934 uint32_t num_layers;
1935 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
1936 num_layers = vk_image_subresource_layer_count(&image->vk,
1937 ®ion->imageSubresource);
1938 } else {
1939 num_layers = region->imageExtent.depth;
1940 }
1941 assert(num_layers > 0);
1942
1943 assert(image->planes[plane].mem && image->planes[plane].mem->bo);
1944 const struct v3dv_bo *dst_bo = image->planes[plane].mem->bo;
1945
1946 assert(buffer->mem && buffer->mem->bo);
1947 const struct v3dv_bo *src_bo = buffer->mem->bo;
1948
1949 /* Emit a TFU job per layer to copy */
1950 const uint32_t buffer_stride = width * image->planes[plane].cpp;
1951 for (int i = 0; i < num_layers; i++) {
1952 uint32_t layer;
1953 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1954 layer = region->imageSubresource.baseArrayLayer + i;
1955 else
1956 layer = region->imageOffset.z + i;
1957
1958 const uint32_t buffer_offset =
1959 buffer->mem_offset + region->bufferOffset +
1960 height * buffer_stride * i;
1961 const uint32_t src_offset = src_bo->offset + buffer_offset;
1962
1963 const uint32_t dst_offset =
1964 dst_bo->offset + v3dv_layer_offset(image, mip_level, layer, plane);
1965
1966 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
1967 cmd_buffer,
1968 dst_bo->handle,
1969 dst_offset,
1970 slice->tiling,
1971 slice->padded_height,
1972 image->planes[plane].cpp,
1973 src_bo->handle,
1974 src_offset,
1975 V3D_TILING_RASTER,
1976 width,
1977 1,
1978 width, height, format_plane);
1979 }
1980
1981 return true;
1982 }
1983
1984 /**
1985 * Returns true if the implementation supports the requested operation (even if
1986 * it failed to process it, for example, due to an out-of-memory error).
1987 */
1988 static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1989 copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1990 struct v3dv_image *image,
1991 struct v3dv_buffer *buffer,
1992 const VkBufferImageCopy2 *region)
1993 {
1994 VkFormat fb_format;
1995 uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
1996 assert(plane < image->plane_count);
1997
1998 if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
1999 ®ion->imageOffset, ®ion->imageExtent,
2000 &fb_format)) {
2001 return false;
2002 }
2003
2004 /* From the Vulkan spec for VkBufferImageCopy2:
2005 *
2006 * "The aspectMask member of imageSubresource must only have a
2007 * single bit set."
2008 *
2009 * For us this has relevant implications because we can't do TLB stores
2010 * of linear depth/stencil so we work around this by loading D/S data to the
2011 * color tile buffer using a compatible color format (see
2012 * emit_copy_buffer_to_layer_per_tile_list and choose_tlb_format functions),
2013 * however, when we are copying a single aspect to a combined D/S image
2014 * we need to preserve the other aspect, and for that we will still use the
2015 * D/S tile buffer to load and store the aspect of the image we need to
2016 * preserve, so in this case we are still constrained by the hw restriction
2017 * for linear D/S stores.
2018 */
2019 assert(util_bitcount(region->imageSubresource.aspectMask) == 1);
2020 if (!image->tiled &&
2021 vk_format_has_depth(fb_format) &&
2022 vk_format_has_stencil(fb_format)) {
2023 return false;
2024 }
2025
2026 uint32_t internal_type, internal_bpp;
2027 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
2028 (fb_format, region->imageSubresource.aspectMask,
2029 &internal_type, &internal_bpp);
2030
2031 uint32_t num_layers;
2032 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2033 num_layers = vk_image_subresource_layer_count(&image->vk,
2034 ®ion->imageSubresource);
2035 } else {
2036 num_layers = region->imageExtent.depth;
2037 }
2038 assert(num_layers > 0);
2039
2040 struct v3dv_job *job =
2041 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
2042 if (!job)
2043 return true;
2044
2045 /* Handle copy to compressed format using a compatible format */
2046 const uint32_t block_w =
2047 vk_format_get_blockwidth(image->planes[plane].vk_format);
2048 const uint32_t block_h =
2049 vk_format_get_blockheight(image->planes[plane].vk_format);
2050 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
2051 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
2052
2053 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
2054 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
2055 false);
2056
2057 struct v3dv_meta_framebuffer framebuffer;
2058 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
2059 internal_type, &job->frame_tiling);
2060
2061 v3dv_X(job->device, job_emit_binning_flush)(job);
2062 v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl)
2063 (job, image, buffer, &framebuffer, region);
2064
2065 v3dv_cmd_buffer_finish_job(cmd_buffer);
2066
2067 return true;
2068 }
2069
2070 static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)2071 create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
2072 struct v3dv_image *image,
2073 struct v3dv_buffer *buffer,
2074 const VkBufferImageCopy2 *region)
2075 {
2076 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
2077 return true;
2078 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
2079 return true;
2080 return false;
2081 }
2082
2083 static VkResult
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)2084 create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
2085 {
2086 /* If this is not the first pool we create for this command buffer
2087 * size it based on the size of the currently exhausted pool.
2088 */
2089 uint32_t descriptor_count = 64;
2090 if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
2091 struct v3dv_descriptor_pool *exhausted_pool =
2092 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
2093 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
2094 }
2095
2096 /* Create the descriptor pool */
2097 cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
2098 VkDescriptorPoolSize pool_size = {
2099 .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2100 .descriptorCount = descriptor_count,
2101 };
2102 VkDescriptorPoolCreateInfo info = {
2103 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
2104 .maxSets = descriptor_count,
2105 .poolSizeCount = 1,
2106 .pPoolSizes = &pool_size,
2107 .flags = 0,
2108 };
2109 VkResult result =
2110 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
2111 &info,
2112 &cmd_buffer->device->vk.alloc,
2113 &cmd_buffer->meta.texel_buffer_copy.dspool);
2114
2115 if (result == VK_SUCCESS) {
2116 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
2117 const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
2118
2119 v3dv_cmd_buffer_add_private_obj(
2120 cmd_buffer, (uintptr_t) _pool,
2121 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
2122
2123 struct v3dv_descriptor_pool *pool =
2124 v3dv_descriptor_pool_from_handle(_pool);
2125 pool->is_driver_internal = true;
2126 }
2127
2128 return result;
2129 }
2130
2131 static VkResult
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)2132 allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
2133 VkDescriptorSet *set)
2134 {
2135 /* Make sure we have a descriptor pool */
2136 VkResult result;
2137 if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
2138 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
2139 if (result != VK_SUCCESS)
2140 return result;
2141 }
2142 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
2143
2144 /* Allocate descriptor set */
2145 struct v3dv_device *device = cmd_buffer->device;
2146 VkDevice _device = v3dv_device_to_handle(device);
2147 VkDescriptorSetAllocateInfo info = {
2148 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2149 .descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
2150 .descriptorSetCount = 1,
2151 .pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
2152 };
2153 result = v3dv_AllocateDescriptorSets(_device, &info, set);
2154
2155 /* If we ran out of pool space, grow the pool and try again */
2156 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
2157 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
2158 if (result == VK_SUCCESS) {
2159 info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
2160 result = v3dv_AllocateDescriptorSets(_device, &info, set);
2161 }
2162 }
2163
2164 return result;
2165 }
2166
2167 static void
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,uint8_t * key)2168 get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
2169 VkColorComponentFlags cmask,
2170 VkComponentMapping *cswizzle,
2171 bool is_layered,
2172 uint8_t *key)
2173 {
2174 memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2175
2176 uint32_t *p = (uint32_t *) key;
2177
2178 *p = format;
2179 p++;
2180
2181 *p = cmask;
2182 p++;
2183
2184 /* Note that that we are using a single byte for this, so we could pack
2185 * more data into this 32-bit slot in the future.
2186 */
2187 *p = is_layered ? 1 : 0;
2188 p++;
2189
2190 memcpy(p, cswizzle, sizeof(VkComponentMapping));
2191 p += sizeof(VkComponentMapping) / sizeof(uint32_t);
2192
2193 assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2194 }
2195
2196 static bool
2197 create_blit_render_pass(struct v3dv_device *device,
2198 VkFormat dst_format,
2199 VkFormat src_format,
2200 VkRenderPass *pass_load,
2201 VkRenderPass *pass_no_load);
2202
2203 static bool
2204 create_pipeline(struct v3dv_device *device,
2205 struct v3dv_render_pass *pass,
2206 struct nir_shader *vs_nir,
2207 struct nir_shader *gs_nir,
2208 struct nir_shader *fs_nir,
2209 const VkPipelineVertexInputStateCreateInfo *vi_state,
2210 const VkPipelineDepthStencilStateCreateInfo *ds_state,
2211 const VkPipelineColorBlendStateCreateInfo *cb_state,
2212 const VkPipelineMultisampleStateCreateInfo *ms_state,
2213 const VkPipelineLayout layout,
2214 VkPipeline *pipeline);
2215
2216 static nir_shader *
get_texel_buffer_copy_vs(const nir_shader_compiler_options * options)2217 get_texel_buffer_copy_vs(const nir_shader_compiler_options *options)
2218 {
2219 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
2220 "meta texel buffer copy vs");
2221 nir_variable *vs_out_pos =
2222 nir_variable_create(b.shader, nir_var_shader_out,
2223 glsl_vec4_type(), "gl_Position");
2224 vs_out_pos->data.location = VARYING_SLOT_POS;
2225
2226 nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
2227 nir_store_var(&b, vs_out_pos, pos, 0xf);
2228
2229 return b.shader;
2230 }
2231
2232 static nir_shader *
get_texel_buffer_copy_gs(const nir_shader_compiler_options * options)2233 get_texel_buffer_copy_gs(const nir_shader_compiler_options *options)
2234 {
2235 /* FIXME: this creates a geometry shader that takes the index of a single
2236 * layer to clear from push constants, so we need to emit a draw call for
2237 * each layer that we want to clear. We could actually do better and have it
2238 * take a range of layers however, if we were to do this, we would need to
2239 * be careful not to exceed the maximum number of output vertices allowed in
2240 * a geometry shader.
2241 */
2242 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2243 "meta texel buffer copy gs");
2244 nir_shader *nir = b.shader;
2245 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
2246 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
2247 (1ull << VARYING_SLOT_LAYER);
2248 nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
2249 nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
2250 nir->info.gs.vertices_in = 3;
2251 nir->info.gs.vertices_out = 3;
2252 nir->info.gs.invocations = 1;
2253 nir->info.gs.active_stream_mask = 0x1;
2254
2255 /* in vec4 gl_Position[3] */
2256 nir_variable *gs_in_pos =
2257 nir_variable_create(b.shader, nir_var_shader_in,
2258 glsl_array_type(glsl_vec4_type(), 3, 0),
2259 "in_gl_Position");
2260 gs_in_pos->data.location = VARYING_SLOT_POS;
2261
2262 /* out vec4 gl_Position */
2263 nir_variable *gs_out_pos =
2264 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
2265 "out_gl_Position");
2266 gs_out_pos->data.location = VARYING_SLOT_POS;
2267
2268 /* out float gl_Layer */
2269 nir_variable *gs_out_layer =
2270 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
2271 "out_gl_Layer");
2272 gs_out_layer->data.location = VARYING_SLOT_LAYER;
2273
2274 /* Emit output triangle */
2275 for (uint32_t i = 0; i < 3; i++) {
2276 /* gl_Position from shader input */
2277 nir_deref_instr *in_pos_i =
2278 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
2279 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
2280
2281 /* gl_Layer from push constants */
2282 nir_def *layer =
2283 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2284 .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
2285 .range = 4);
2286 nir_store_var(&b, gs_out_layer, layer, 0x1);
2287
2288 nir_emit_vertex(&b, 0);
2289 }
2290
2291 nir_end_primitive(&b, 0);
2292
2293 return nir;
2294 }
2295
2296 static nir_def *
load_frag_coord(nir_builder * b)2297 load_frag_coord(nir_builder *b)
2298 {
2299 nir_foreach_shader_in_variable(var, b->shader) {
2300 if (var->data.location == VARYING_SLOT_POS)
2301 return nir_load_var(b, var);
2302 }
2303 nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
2304 glsl_vec4_type(), NULL);
2305 pos->data.location = VARYING_SLOT_POS;
2306 return nir_load_var(b, pos);
2307 }
2308
2309 static uint32_t
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp,VkComponentSwizzle swz)2310 component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
2311 {
2312 if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
2313 swz = comp;
2314
2315 switch (swz) {
2316 case VK_COMPONENT_SWIZZLE_R:
2317 return 0;
2318 case VK_COMPONENT_SWIZZLE_G:
2319 return 1;
2320 case VK_COMPONENT_SWIZZLE_B:
2321 return 2;
2322 case VK_COMPONENT_SWIZZLE_A:
2323 return 3;
2324 default:
2325 unreachable("Invalid swizzle");
2326 };
2327 }
2328
2329 static nir_shader *
get_texel_buffer_copy_fs(const nir_shader_compiler_options * options,VkFormat format,VkComponentMapping * cswizzle)2330 get_texel_buffer_copy_fs(const nir_shader_compiler_options *options,
2331 VkFormat format, VkComponentMapping *cswizzle)
2332 {
2333 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
2334 "meta texel buffer copy fs");
2335
2336 /* We only use the copy from texel buffer shader to implement
2337 * copy_buffer_to_image_shader, which always selects a compatible integer
2338 * format for the copy.
2339 */
2340 assert(vk_format_is_int(format));
2341
2342 /* Fragment shader output color */
2343 nir_variable *fs_out_color =
2344 nir_variable_create(b.shader, nir_var_shader_out,
2345 glsl_uvec4_type(), "out_color");
2346 fs_out_color->data.location = FRAG_RESULT_DATA0;
2347
2348 /* Texel buffer input */
2349 const struct glsl_type *sampler_type =
2350 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
2351 nir_variable *sampler =
2352 nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
2353 sampler->data.descriptor_set = 0;
2354 sampler->data.binding = 0;
2355
2356 /* Load the box describing the pixel region we want to copy from the
2357 * texel buffer.
2358 */
2359 nir_def *box =
2360 nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
2361 .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
2362 .range = 16);
2363
2364 /* Load the buffer stride (this comes in texel units) */
2365 nir_def *stride =
2366 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2367 .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
2368 .range = 4);
2369
2370 /* Load the buffer offset (this comes in texel units) */
2371 nir_def *offset =
2372 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2373 .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
2374 .range = 4);
2375
2376 nir_def *coord = nir_f2i32(&b, load_frag_coord(&b));
2377
2378 /* Load pixel data from texel buffer based on the x,y offset of the pixel
2379 * within the box. Texel buffers are 1D arrays of texels.
2380 *
2381 * Notice that we already make sure that we only generate fragments that are
2382 * inside the box through the scissor/viewport state, so our offset into the
2383 * texel buffer should always be within its bounds and we we don't need
2384 * to add a check for that here.
2385 */
2386 nir_def *x_offset =
2387 nir_isub(&b, nir_channel(&b, coord, 0),
2388 nir_channel(&b, box, 0));
2389 nir_def *y_offset =
2390 nir_isub(&b, nir_channel(&b, coord, 1),
2391 nir_channel(&b, box, 1));
2392 nir_def *texel_offset =
2393 nir_iadd(&b, nir_iadd(&b, offset, x_offset),
2394 nir_imul(&b, y_offset, stride));
2395
2396 nir_def *tex_deref = &nir_build_deref_var(&b, sampler)->def;
2397 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
2398 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
2399 tex->op = nir_texop_txf;
2400 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, texel_offset);
2401 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
2402 tex->dest_type = nir_type_uint32;
2403 tex->is_array = false;
2404 tex->coord_components = 1;
2405 nir_def_init(&tex->instr, &tex->def, 4, 32);
2406 nir_builder_instr_insert(&b, &tex->instr);
2407
2408 uint32_t swiz[4];
2409 swiz[0] =
2410 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
2411 swiz[1] =
2412 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
2413 swiz[2] =
2414 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
2415 swiz[3] =
2416 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
2417 nir_def *s = nir_swizzle(&b, &tex->def, swiz, 4);
2418 nir_store_var(&b, fs_out_color, s, 0xf);
2419
2420 return b.shader;
2421 }
2422
2423 static bool
create_texel_buffer_copy_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)2424 create_texel_buffer_copy_pipeline(struct v3dv_device *device,
2425 VkFormat format,
2426 VkColorComponentFlags cmask,
2427 VkComponentMapping *cswizzle,
2428 bool is_layered,
2429 VkRenderPass _pass,
2430 VkPipelineLayout pipeline_layout,
2431 VkPipeline *pipeline)
2432 {
2433 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
2434
2435 assert(vk_format_is_color(format));
2436
2437 const nir_shader_compiler_options *options =
2438 v3dv_pipeline_get_nir_options(&device->devinfo);
2439
2440 nir_shader *vs_nir = get_texel_buffer_copy_vs(options);
2441 nir_shader *fs_nir = get_texel_buffer_copy_fs(options, format, cswizzle);
2442 nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs(options) : NULL;
2443
2444 const VkPipelineVertexInputStateCreateInfo vi_state = {
2445 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
2446 .vertexBindingDescriptionCount = 0,
2447 .vertexAttributeDescriptionCount = 0,
2448 };
2449
2450 VkPipelineDepthStencilStateCreateInfo ds_state = {
2451 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
2452 };
2453
2454 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
2455 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
2456 .blendEnable = false,
2457 .colorWriteMask = cmask,
2458 };
2459
2460 const VkPipelineColorBlendStateCreateInfo cb_state = {
2461 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
2462 .logicOpEnable = false,
2463 .attachmentCount = 1,
2464 .pAttachments = blend_att_state
2465 };
2466
2467 const VkPipelineMultisampleStateCreateInfo ms_state = {
2468 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
2469 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
2470 .sampleShadingEnable = false,
2471 .pSampleMask = NULL,
2472 .alphaToCoverageEnable = false,
2473 .alphaToOneEnable = false,
2474 };
2475
2476 return create_pipeline(device,
2477 pass,
2478 vs_nir, gs_nir, fs_nir,
2479 &vi_state,
2480 &ds_state,
2481 &cb_state,
2482 &ms_state,
2483 pipeline_layout,
2484 pipeline);
2485 }
2486
2487 static bool
get_copy_texel_buffer_pipeline(struct v3dv_cmd_buffer * cmd_buffer,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,VkImageType image_type,bool is_layered,struct v3dv_meta_texel_buffer_copy_pipeline ** pipeline)2488 get_copy_texel_buffer_pipeline(
2489 struct v3dv_cmd_buffer *cmd_buffer,
2490 VkFormat format,
2491 VkColorComponentFlags cmask,
2492 VkComponentMapping *cswizzle,
2493 VkImageType image_type,
2494 bool is_layered,
2495 struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
2496 {
2497 bool ok = true;
2498 struct v3dv_device *device = cmd_buffer->device;
2499
2500 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
2501 if (device->instance->meta_cache_enabled) {
2502 get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
2503 key);
2504
2505 mtx_lock(&device->meta.mtx);
2506 struct hash_entry *entry =
2507 _mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
2508 key);
2509 if (entry) {
2510 mtx_unlock(&device->meta.mtx);
2511 *pipeline = entry->data;
2512 return true;
2513 }
2514 }
2515
2516 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
2517 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2518
2519 if (*pipeline == NULL)
2520 goto fail;
2521
2522 /* The blit render pass is compatible */
2523 ok = create_blit_render_pass(device, format, format,
2524 &(*pipeline)->pass,
2525 &(*pipeline)->pass_no_load);
2526 if (!ok)
2527 goto fail;
2528
2529 ok =
2530 create_texel_buffer_copy_pipeline(device,
2531 format, cmask, cswizzle, is_layered,
2532 (*pipeline)->pass,
2533 device->meta.texel_buffer_copy.p_layout,
2534 &(*pipeline)->pipeline);
2535 if (!ok)
2536 goto fail;
2537
2538 if (device->instance->meta_cache_enabled) {
2539 _mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
2540 key, *pipeline);
2541 mtx_unlock(&device->meta.mtx);
2542 } else {
2543 v3dv_cmd_buffer_add_private_obj(
2544 cmd_buffer, (uintptr_t)*pipeline,
2545 (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_meta_texel_buffer_copy_pipeline);
2546 }
2547
2548 return true;
2549
2550 fail:
2551 if (device->instance->meta_cache_enabled)
2552 mtx_unlock(&device->meta.mtx);
2553
2554 VkDevice _device = v3dv_device_to_handle(device);
2555 if (*pipeline) {
2556 if ((*pipeline)->pass)
2557 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
2558 if ((*pipeline)->pipeline)
2559 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
2560 vk_free(&device->vk.alloc, *pipeline);
2561 *pipeline = NULL;
2562 }
2563
2564 return false;
2565 }
2566
2567 static bool
texel_buffer_shader_copy(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)2568 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
2569 VkImageAspectFlags aspect,
2570 struct v3dv_image *image,
2571 VkFormat dst_format,
2572 VkFormat src_format,
2573 struct v3dv_buffer *buffer,
2574 uint32_t buffer_bpp,
2575 VkColorComponentFlags cmask,
2576 VkComponentMapping *cswizzle,
2577 uint32_t region_count,
2578 const VkBufferImageCopy2 *regions)
2579 {
2580 VkResult result;
2581 bool handled = false;
2582
2583 assert(cswizzle);
2584
2585 /* This is a copy path, so we don't handle format conversions. The only
2586 * exception are stencil to D24S8 copies, which are handled as a color
2587 * masked R8->RGBA8 copy.
2588 */
2589 assert(src_format == dst_format ||
2590 (dst_format == VK_FORMAT_R8G8B8A8_UINT &&
2591 src_format == VK_FORMAT_R8_UINT &&
2592 cmask == VK_COLOR_COMPONENT_R_BIT));
2593
2594 /* We only handle color copies. Callers can copy D/S aspects by using
2595 * a compatible color format and maybe a cmask/cswizzle for D24 formats.
2596 */
2597 if (!vk_format_is_color(dst_format) || !vk_format_is_color(src_format))
2598 return handled;
2599
2600 /* FIXME: we only handle uncompressed images for now. */
2601 if (vk_format_is_compressed(image->vk.format))
2602 return handled;
2603
2604 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
2605 VK_COLOR_COMPONENT_G_BIT |
2606 VK_COLOR_COMPONENT_B_BIT |
2607 VK_COLOR_COMPONENT_A_BIT;
2608 if (cmask == 0)
2609 cmask = full_cmask;
2610
2611 /* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
2612 * so we can bind it as a texel buffer. Otherwise, the buffer view
2613 * we create below won't setup the texture state that we need for this.
2614 */
2615 if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
2616 if (v3dv_buffer_format_supports_features(
2617 cmd_buffer->device, src_format,
2618 VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT)) {
2619 buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
2620 } else {
2621 return handled;
2622 }
2623 }
2624
2625 /* At this point we should be able to handle the copy unless an unexpected
2626 * error occurs, such as an OOM.
2627 */
2628 handled = true;
2629
2630
2631 /* Compute the number of layers to copy.
2632 *
2633 * If we are batching (region_count > 1) all our regions have the same
2634 * image subresource so we can take this from the first region. For 3D
2635 * images we require the same depth extent.
2636 */
2637 const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource;
2638 uint32_t num_layers;
2639 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2640 num_layers = vk_image_subresource_layer_count(&image->vk, resource);
2641 } else {
2642 assert(region_count == 1);
2643 num_layers = regions[0].imageExtent.depth;
2644 }
2645 assert(num_layers > 0);
2646
2647 /* Get the texel buffer copy pipeline */
2648 struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
2649 bool ok = get_copy_texel_buffer_pipeline(cmd_buffer,
2650 dst_format, cmask, cswizzle,
2651 image->vk.image_type, num_layers > 1,
2652 &pipeline);
2653 if (!ok)
2654 return handled;
2655 assert(pipeline && pipeline->pipeline && pipeline->pass);
2656
2657 /* Setup descriptor set for the source texel buffer. We don't have to
2658 * register the descriptor as a private command buffer object since
2659 * all descriptors will be freed automatically with the descriptor
2660 * pool.
2661 */
2662 VkDescriptorSet set;
2663 result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
2664 if (result != VK_SUCCESS)
2665 return handled;
2666
2667 /* We can't pass region->bufferOffset here for the offset field because
2668 * the texture base pointer in the texture shader state must be a 64-byte
2669 * aligned value. Instead, we use 0 here and we pass the offset in texels
2670 * as a push constant to the shader.
2671 */
2672 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
2673 VkBufferViewCreateInfo buffer_view_info = {
2674 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
2675 .buffer = v3dv_buffer_to_handle(buffer),
2676 .format = src_format,
2677 .offset = 0,
2678 .range = VK_WHOLE_SIZE,
2679 };
2680
2681 VkBufferView texel_buffer_view;
2682 result = v3dv_CreateBufferView(_device, &buffer_view_info,
2683 &cmd_buffer->device->vk.alloc,
2684 &texel_buffer_view);
2685 if (result != VK_SUCCESS)
2686 return handled;
2687
2688 v3dv_cmd_buffer_add_private_obj(
2689 cmd_buffer, (uintptr_t)texel_buffer_view,
2690 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
2691
2692 VkWriteDescriptorSet write = {
2693 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2694 .dstSet = set,
2695 .dstBinding = 0,
2696 .dstArrayElement = 0,
2697 .descriptorCount = 1,
2698 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2699 .pTexelBufferView = &texel_buffer_view,
2700 };
2701 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
2702
2703 /* Push command buffer state before starting meta operation */
2704 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
2705
2706 /* Bind common state for all layers and regions */
2707 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
2708 v3dv_CmdBindPipeline(_cmd_buffer,
2709 VK_PIPELINE_BIND_POINT_GRAPHICS,
2710 pipeline->pipeline);
2711
2712 v3dv_CmdBindDescriptorSets(_cmd_buffer,
2713 VK_PIPELINE_BIND_POINT_GRAPHICS,
2714 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2715 0, 1, &set,
2716 0, NULL);
2717
2718 /* Setup framebuffer.
2719 *
2720 * For 3D images, this creates a layered framebuffer with a number of
2721 * layers matching the depth extent of the 3D image.
2722 */
2723 uint8_t plane = v3dv_plane_from_aspect(aspect);
2724 uint32_t fb_width = u_minify(image->planes[plane].width, resource->mipLevel);
2725 uint32_t fb_height = u_minify(image->planes[plane].height, resource->mipLevel);
2726
2727 VkImageViewCreateInfo image_view_info = {
2728 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2729 .image = v3dv_image_to_handle(image),
2730 .viewType = v3dv_image_type_to_view_type(image->vk.image_type),
2731 .format = dst_format,
2732 .subresourceRange = {
2733 .aspectMask = aspect,
2734 .baseMipLevel = resource->mipLevel,
2735 .levelCount = 1,
2736 .baseArrayLayer = resource->baseArrayLayer,
2737 .layerCount = num_layers,
2738 },
2739 };
2740 VkImageView image_view;
2741 result = v3dv_create_image_view(cmd_buffer->device,
2742 &image_view_info, &image_view);
2743 if (result != VK_SUCCESS)
2744 goto fail;
2745
2746 v3dv_cmd_buffer_add_private_obj(
2747 cmd_buffer, (uintptr_t)image_view,
2748 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
2749
2750 VkFramebufferCreateInfo fb_info = {
2751 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
2752 .renderPass = pipeline->pass,
2753 .attachmentCount = 1,
2754 .pAttachments = &image_view,
2755 .width = fb_width,
2756 .height = fb_height,
2757 .layers = num_layers,
2758 };
2759
2760 VkFramebuffer fb;
2761 result = v3dv_CreateFramebuffer(_device, &fb_info,
2762 &cmd_buffer->device->vk.alloc, &fb);
2763 if (result != VK_SUCCESS)
2764 goto fail;
2765
2766 v3dv_cmd_buffer_add_private_obj(
2767 cmd_buffer, (uintptr_t)fb,
2768 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
2769
2770 /* For each layer */
2771 for (uint32_t l = 0; l < num_layers; l++) {
2772 /* Start render pass for this layer.
2773 *
2774 * If the we only have one region to copy, then we might be able to
2775 * skip the TLB load if it is aligned to tile boundaries. All layers
2776 * copy the same area, so we only need to check this once.
2777 */
2778 bool can_skip_tlb_load = false;
2779 VkRect2D render_area;
2780 if (region_count == 1) {
2781 render_area.offset.x = regions[0].imageOffset.x;
2782 render_area.offset.y = regions[0].imageOffset.y;
2783 render_area.extent.width = regions[0].imageExtent.width;
2784 render_area.extent.height = regions[0].imageExtent.height;
2785
2786 if (l == 0) {
2787 struct v3dv_render_pass *pipeline_pass =
2788 v3dv_render_pass_from_handle(pipeline->pass);
2789 can_skip_tlb_load =
2790 cmask == full_cmask &&
2791 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
2792 v3dv_framebuffer_from_handle(fb),
2793 pipeline_pass, 0);
2794 }
2795 } else {
2796 render_area.offset.x = 0;
2797 render_area.offset.y = 0;
2798 render_area.extent.width = fb_width;
2799 render_area.extent.height = fb_height;
2800 }
2801
2802 VkRenderPassBeginInfo rp_info = {
2803 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
2804 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
2805 pipeline->pass,
2806 .framebuffer = fb,
2807 .renderArea = render_area,
2808 .clearValueCount = 0,
2809 };
2810
2811 VkSubpassBeginInfo sp_info = {
2812 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
2813 .contents = VK_SUBPASS_CONTENTS_INLINE,
2814 };
2815
2816 v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
2817 struct v3dv_job *job = cmd_buffer->state.job;
2818 if (!job)
2819 goto fail;
2820
2821 /* If we are using a layered copy we need to specify the layer for the
2822 * Geometry Shader.
2823 */
2824 if (num_layers > 1) {
2825 uint32_t layer = resource->baseArrayLayer + l;
2826 v3dv_CmdPushConstants(_cmd_buffer,
2827 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2828 VK_SHADER_STAGE_GEOMETRY_BIT,
2829 24, 4, &layer);
2830 }
2831
2832 /* For each region */
2833 for (uint32_t r = 0; r < region_count; r++) {
2834 const VkBufferImageCopy2 *region = ®ions[r];
2835
2836 /* Obtain the 2D buffer region spec */
2837 uint32_t buf_width, buf_height;
2838 if (region->bufferRowLength == 0)
2839 buf_width = region->imageExtent.width;
2840 else
2841 buf_width = region->bufferRowLength;
2842
2843 if (region->bufferImageHeight == 0)
2844 buf_height = region->imageExtent.height;
2845 else
2846 buf_height = region->bufferImageHeight;
2847
2848 const VkViewport viewport = {
2849 .x = region->imageOffset.x,
2850 .y = region->imageOffset.y,
2851 .width = region->imageExtent.width,
2852 .height = region->imageExtent.height,
2853 .minDepth = 0.0f,
2854 .maxDepth = 1.0f
2855 };
2856 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
2857 const VkRect2D scissor = {
2858 .offset = { region->imageOffset.x, region->imageOffset.y },
2859 .extent = { region->imageExtent.width, region->imageExtent.height }
2860 };
2861 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
2862
2863 const VkDeviceSize buf_offset =
2864 region->bufferOffset / buffer_bpp + l * buf_height * buf_width;
2865 uint32_t push_data[6] = {
2866 region->imageOffset.x,
2867 region->imageOffset.y,
2868 region->imageOffset.x + region->imageExtent.width - 1,
2869 region->imageOffset.y + region->imageExtent.height - 1,
2870 buf_width,
2871 buf_offset,
2872 };
2873
2874 v3dv_CmdPushConstants(_cmd_buffer,
2875 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2876 VK_SHADER_STAGE_FRAGMENT_BIT,
2877 0, sizeof(push_data), &push_data);
2878
2879 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
2880 } /* For each region */
2881
2882 VkSubpassEndInfo sp_end_info = {
2883 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
2884 };
2885
2886 v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
2887 } /* For each layer */
2888
2889 fail:
2890 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, true);
2891 return handled;
2892 }
2893
2894 /**
2895 * Returns true if the implementation supports the requested operation (even if
2896 * it failed to process it, for example, due to an out-of-memory error).
2897 */
2898 static bool
copy_buffer_to_image_blit(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)2899 copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
2900 VkImageAspectFlags aspect,
2901 struct v3dv_image *image,
2902 VkFormat dst_format,
2903 VkFormat src_format,
2904 struct v3dv_buffer *buffer,
2905 uint32_t buffer_bpp,
2906 VkColorComponentFlags cmask,
2907 VkComponentMapping *cswizzle,
2908 uint32_t region_count,
2909 const VkBufferImageCopy2 *regions)
2910 {
2911 /* Since we can't sample linear images we need to upload the linear
2912 * buffer to a tiled image that we can use as a blit source, which
2913 * is slow.
2914 */
2915 perf_debug("Falling back to blit path for buffer to image copy.\n");
2916
2917 struct v3dv_device *device = cmd_buffer->device;
2918 VkDevice _device = v3dv_device_to_handle(device);
2919 bool handled = true;
2920
2921 /* Allocate memory for the tiled image. Since we copy layer by layer
2922 * we allocate memory to hold a full layer, which is the worse case.
2923 * For that we create a dummy image with that spec, get memory requirements
2924 * for it and use that information to create the memory allocation.
2925 * We will then reuse this memory store for all the regions we want to
2926 * copy.
2927 */
2928 VkImage dummy_image;
2929 VkImageCreateInfo dummy_info = {
2930 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2931 .imageType = VK_IMAGE_TYPE_2D,
2932 .format = src_format,
2933 .extent = { image->vk.extent.width, image->vk.extent.height, 1 },
2934 .mipLevels = 1,
2935 .arrayLayers = 1,
2936 .samples = VK_SAMPLE_COUNT_1_BIT,
2937 .tiling = VK_IMAGE_TILING_OPTIMAL,
2938 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2939 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2940 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2941 .queueFamilyIndexCount = 0,
2942 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2943 };
2944 VkResult result =
2945 v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
2946 if (result != VK_SUCCESS)
2947 return handled;
2948
2949 VkMemoryRequirements reqs;
2950 vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
2951 v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
2952
2953 VkDeviceMemory mem;
2954 VkMemoryAllocateInfo alloc_info = {
2955 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2956 .allocationSize = reqs.size,
2957 .memoryTypeIndex = 0,
2958 };
2959 result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
2960 if (result != VK_SUCCESS)
2961 return handled;
2962
2963 v3dv_cmd_buffer_add_private_obj(
2964 cmd_buffer, (uintptr_t)mem,
2965 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
2966
2967 /* Obtain the layer count.
2968 *
2969 * If we are batching (region_count > 1) all our regions have the same
2970 * image subresource so we can take this from the first region.
2971 */
2972 uint32_t num_layers;
2973 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2974 num_layers = vk_image_subresource_layer_count(&image->vk,
2975 ®ions[0].imageSubresource);
2976 } else {
2977 num_layers = regions[0].imageExtent.depth;
2978 }
2979 assert(num_layers > 0);
2980
2981 /* Sanity check: we can only batch multiple regions together if they have
2982 * the same framebuffer (so the same layer).
2983 */
2984 assert(num_layers == 1 || region_count == 1);
2985
2986 uint8_t plane = v3dv_plane_from_aspect(aspect);
2987 assert(plane < image->plane_count);
2988
2989 const uint32_t block_width =
2990 vk_format_get_blockwidth(image->planes[plane].vk_format);
2991 const uint32_t block_height =
2992 vk_format_get_blockheight(image->planes[plane].vk_format);
2993
2994 /* Copy regions by uploading each region to a temporary tiled image using
2995 * the memory we have just allocated as storage.
2996 */
2997 for (uint32_t r = 0; r < region_count; r++) {
2998 const VkBufferImageCopy2 *region = ®ions[r];
2999
3000 /* Obtain the 2D buffer region spec */
3001 uint32_t buf_width, buf_height;
3002 if (region->bufferRowLength == 0)
3003 buf_width = region->imageExtent.width;
3004 else
3005 buf_width = region->bufferRowLength;
3006
3007 if (region->bufferImageHeight == 0)
3008 buf_height = region->imageExtent.height;
3009 else
3010 buf_height = region->bufferImageHeight;
3011
3012 /* If the image is compressed, the bpp refers to blocks, not pixels */
3013 buf_width = buf_width / block_width;
3014 buf_height = buf_height / block_height;
3015
3016 for (uint32_t i = 0; i < num_layers; i++) {
3017 /* Create the tiled image */
3018 VkImageCreateInfo image_info = {
3019 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
3020 .imageType = VK_IMAGE_TYPE_2D,
3021 .format = src_format,
3022 .extent = { buf_width, buf_height, 1 },
3023 .mipLevels = 1,
3024 .arrayLayers = 1,
3025 .samples = VK_SAMPLE_COUNT_1_BIT,
3026 .tiling = VK_IMAGE_TILING_OPTIMAL,
3027 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3028 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
3029 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
3030 .queueFamilyIndexCount = 0,
3031 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
3032 };
3033
3034 VkImage buffer_image;
3035 VkResult result =
3036 v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
3037 &buffer_image);
3038 if (result != VK_SUCCESS)
3039 return handled;
3040
3041 v3dv_cmd_buffer_add_private_obj(
3042 cmd_buffer, (uintptr_t)buffer_image,
3043 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
3044
3045 result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
3046 if (result != VK_SUCCESS)
3047 return handled;
3048
3049 /* When copying a multi-plane image the aspect indicates the plane to
3050 * copy. For these, we only copy one plane at a time, which is always
3051 * a color plane.
3052 */
3053 VkImageAspectFlags copy_aspect =
3054 image->plane_count == 1 ? aspect : VK_IMAGE_ASPECT_COLOR_BIT;
3055
3056 /* Upload buffer contents for the selected layer */
3057 const VkDeviceSize buf_offset_bytes =
3058 region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
3059 const VkBufferImageCopy2 buffer_image_copy = {
3060 .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
3061 .bufferOffset = buf_offset_bytes,
3062 .bufferRowLength = region->bufferRowLength / block_width,
3063 .bufferImageHeight = region->bufferImageHeight / block_height,
3064 .imageSubresource = {
3065 .aspectMask = copy_aspect,
3066 .mipLevel = 0,
3067 .baseArrayLayer = 0,
3068 .layerCount = 1,
3069 },
3070 .imageOffset = { 0, 0, 0 },
3071 .imageExtent = { buf_width, buf_height, 1 }
3072 };
3073 handled =
3074 create_tiled_image_from_buffer(cmd_buffer,
3075 v3dv_image_from_handle(buffer_image),
3076 buffer, &buffer_image_copy);
3077 if (!handled) {
3078 /* This is unexpected, we should have setup the upload to be
3079 * conformant to a TFU or TLB copy.
3080 */
3081 unreachable("Unable to copy buffer to image through TLB");
3082 return false;
3083 }
3084
3085 /* Blit-copy the requested image extent from the buffer image to the
3086 * destination image.
3087 *
3088 * Since we are copying, the blit must use the same format on the
3089 * destination and source images to avoid format conversions. The
3090 * only exception is copying stencil, which we upload to a R8UI source
3091 * image, but that we need to blit to a S8D24 destination (the only
3092 * stencil format we support).
3093 */
3094 const VkImageBlit2 blit_region = {
3095 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
3096 .srcSubresource = {
3097 .aspectMask = copy_aspect,
3098 .mipLevel = 0,
3099 .baseArrayLayer = 0,
3100 .layerCount = 1,
3101 },
3102 .srcOffsets = {
3103 { 0, 0, 0 },
3104 { region->imageExtent.width, region->imageExtent.height, 1 },
3105 },
3106 .dstSubresource = {
3107 .aspectMask = aspect,
3108 .mipLevel = region->imageSubresource.mipLevel,
3109 .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
3110 .layerCount = 1,
3111 },
3112 .dstOffsets = {
3113 {
3114 DIV_ROUND_UP(region->imageOffset.x, block_width),
3115 DIV_ROUND_UP(region->imageOffset.y, block_height),
3116 region->imageOffset.z + i,
3117 },
3118 {
3119 DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
3120 block_width),
3121 DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
3122 block_height),
3123 region->imageOffset.z + i + 1,
3124 },
3125 },
3126 };
3127
3128 handled = blit_shader(cmd_buffer,
3129 image, dst_format,
3130 v3dv_image_from_handle(buffer_image), src_format,
3131 cmask, cswizzle,
3132 &blit_region, VK_FILTER_NEAREST, true);
3133 if (!handled) {
3134 /* This is unexpected, we should have a supported blit spec */
3135 unreachable("Unable to blit buffer to destination image");
3136 return false;
3137 }
3138 }
3139 }
3140
3141 return handled;
3142 }
3143
3144 /**
3145 * Returns true if the implementation supports the requested operation (even if
3146 * it failed to process it, for example, due to an out-of-memory error).
3147 */
3148 static bool
copy_buffer_to_image_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,uint32_t region_count,const VkBufferImageCopy2 * regions,bool use_texel_buffer)3149 copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
3150 struct v3dv_image *image,
3151 struct v3dv_buffer *buffer,
3152 uint32_t region_count,
3153 const VkBufferImageCopy2 *regions,
3154 bool use_texel_buffer)
3155 {
3156 /* We can only call this with region_count > 1 if we can batch the regions
3157 * together, in which case they share the same image subresource, and so
3158 * the same aspect.
3159 */
3160 VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
3161 const VkImageAspectFlagBits any_plane_aspect =
3162 VK_IMAGE_ASPECT_PLANE_0_BIT |
3163 VK_IMAGE_ASPECT_PLANE_1_BIT |
3164 VK_IMAGE_ASPECT_PLANE_2_BIT;
3165
3166 bool is_plane_aspect = aspect & any_plane_aspect;
3167
3168 /* Generally, the bpp of the data in the buffer matches that of the
3169 * destination image. The exception is the case where we are uploading
3170 * stencil (8bpp) to a combined d24s8 image (32bpp).
3171 */
3172 uint8_t plane = v3dv_plane_from_aspect(aspect);
3173 assert(plane < image->plane_count);
3174 uint32_t buf_bpp = image->planes[plane].cpp;
3175
3176 /* We are about to upload the buffer data to an image so we can then
3177 * blit that to our destination region. Because we are going to implement
3178 * the copy as a blit, we want our blit source and destination formats to be
3179 * the same (to avoid any format conversions), so we choose a canonical
3180 * format that matches the destination image bpp.
3181 */
3182 VkComponentMapping ident_swizzle = {
3183 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
3184 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
3185 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
3186 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
3187 };
3188
3189 VkComponentMapping cswizzle = ident_swizzle;
3190 VkColorComponentFlags cmask = 0; /* Write all components */
3191 VkFormat src_format;
3192 VkFormat dst_format;
3193 switch (buf_bpp) {
3194 case 16:
3195 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
3196 src_format = VK_FORMAT_R32G32B32A32_UINT;
3197 dst_format = src_format;
3198 break;
3199 case 8:
3200 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
3201 src_format = VK_FORMAT_R16G16B16A16_UINT;
3202 dst_format = src_format;
3203 break;
3204 case 4:
3205 switch (aspect) {
3206 case VK_IMAGE_ASPECT_COLOR_BIT:
3207 case VK_IMAGE_ASPECT_PLANE_0_BIT:
3208 case VK_IMAGE_ASPECT_PLANE_1_BIT:
3209 case VK_IMAGE_ASPECT_PLANE_2_BIT:
3210 src_format = VK_FORMAT_R8G8B8A8_UINT;
3211 dst_format = src_format;
3212 break;
3213 case VK_IMAGE_ASPECT_DEPTH_BIT:
3214 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
3215 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
3216 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
3217 src_format = VK_FORMAT_R8G8B8A8_UINT;
3218 dst_format = src_format;
3219
3220 /* For D24 formats, the Vulkan spec states that the depth component
3221 * in the buffer is stored in the 24-LSB, but V3D wants it in the
3222 * 24-MSB.
3223 */
3224 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
3225 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
3226 cmask = VK_COLOR_COMPONENT_G_BIT |
3227 VK_COLOR_COMPONENT_B_BIT |
3228 VK_COLOR_COMPONENT_A_BIT;
3229 cswizzle.r = VK_COMPONENT_SWIZZLE_R;
3230 cswizzle.g = VK_COMPONENT_SWIZZLE_R;
3231 cswizzle.b = VK_COMPONENT_SWIZZLE_G;
3232 cswizzle.a = VK_COMPONENT_SWIZZLE_B;
3233 }
3234 break;
3235 case VK_IMAGE_ASPECT_STENCIL_BIT:
3236 /* Since we don't support separate stencil this is always a stencil
3237 * copy to a combined depth/stencil image. Because we don't support
3238 * separate stencil images, we interpret the buffer data as a
3239 * color R8UI image, and implement the blit as a compatible color
3240 * blit to an RGBA8UI destination masking out writes to components
3241 * GBA (which map to the D24 component of a S8D24 image).
3242 */
3243 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
3244 buf_bpp = 1;
3245 src_format = VK_FORMAT_R8_UINT;
3246 dst_format = VK_FORMAT_R8G8B8A8_UINT;
3247 cmask = VK_COLOR_COMPONENT_R_BIT;
3248 break;
3249 default:
3250 unreachable("unsupported aspect");
3251 return false;
3252 };
3253 break;
3254 case 2:
3255 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
3256 aspect == VK_IMAGE_ASPECT_DEPTH_BIT ||
3257 is_plane_aspect);
3258 src_format = VK_FORMAT_R16_UINT;
3259 dst_format = src_format;
3260 break;
3261 case 1:
3262 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT || is_plane_aspect);
3263 src_format = VK_FORMAT_R8_UINT;
3264 dst_format = src_format;
3265 break;
3266 default:
3267 unreachable("unsupported bit-size");
3268 return false;
3269 }
3270
3271 if (use_texel_buffer) {
3272 return texel_buffer_shader_copy(cmd_buffer, aspect, image,
3273 dst_format, src_format,
3274 buffer, buf_bpp,
3275 cmask, &cswizzle,
3276 region_count, regions);
3277 } else {
3278 return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
3279 dst_format, src_format,
3280 buffer, buf_bpp,
3281 cmask, &cswizzle,
3282 region_count, regions);
3283 }
3284 }
3285
3286 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)3287 v3dv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
3288 const VkCopyBufferToImageInfo2 *info)
3289 {
3290 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
3291 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
3292 V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
3293
3294 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3295
3296 cmd_buffer->state.is_transfer = true;
3297
3298 uint32_t r = 0;
3299 while (r < info->regionCount) {
3300 /* The TFU and TLB paths can only copy one region at a time and the region
3301 * needs to start at the origin. We try these first for the common case
3302 * where we are copying full images, since they should be the fastest.
3303 */
3304 uint32_t batch_size = 1;
3305 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
3306 goto handled;
3307
3308 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
3309 goto handled;
3310
3311 /* Otherwise, we are copying subrects, so we fallback to copying
3312 * via shader and texel buffers and we try to batch the regions
3313 * if possible. We can only batch copies if they have the same
3314 * framebuffer spec, which is mostly determined by the image
3315 * subresource of the region.
3316 */
3317 const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
3318 for (uint32_t s = r + 1; s < info->regionCount; s++) {
3319 const VkImageSubresourceLayers *rsc_s =
3320 &info->pRegions[s].imageSubresource;
3321
3322 if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
3323 break;
3324
3325 /* For 3D images we also need to check the depth extent */
3326 if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
3327 info->pRegions[s].imageExtent.depth !=
3328 info->pRegions[r].imageExtent.depth) {
3329 break;
3330 }
3331
3332 batch_size++;
3333 }
3334
3335 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
3336 batch_size, &info->pRegions[r], true)) {
3337 goto handled;
3338 }
3339
3340 /* If we still could not copy, fallback to slower paths.
3341 *
3342 * FIXME: we could try to batch these too, but since they are bound to be
3343 * slow it might not be worth it and we should instead put more effort
3344 * in handling more cases with the other paths.
3345 */
3346 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
3347 batch_size, &info->pRegions[r], false)) {
3348 goto handled;
3349 }
3350
3351 unreachable("Unsupported buffer to image copy.");
3352
3353 handled:
3354 r += batch_size;
3355 }
3356
3357 cmd_buffer->state.is_transfer = false;
3358 }
3359
3360 static void
3361 compute_blit_3d_layers(const VkOffset3D *offsets,
3362 uint32_t *min_layer, uint32_t *max_layer,
3363 bool *mirror_z);
3364
3365 /**
3366 * Returns true if the implementation supports the requested operation (even if
3367 * it failed to process it, for example, due to an out-of-memory error).
3368 *
3369 * The TFU blit path doesn't handle scaling so the blit filter parameter can
3370 * be ignored.
3371 */
3372 static bool
blit_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageBlit2 * region)3373 blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
3374 struct v3dv_image *dst,
3375 struct v3dv_image *src,
3376 const VkImageBlit2 *region)
3377 {
3378 if (V3D_DBG(DISABLE_TFU)) {
3379 perf_debug("Blit: TFU disabled, fallbacks could be slower.");
3380 return false;
3381 }
3382
3383 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3384 assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3385
3386 /* From vkCmdBlitImage:
3387 * "srcImage must not use a format that requires a sampler YCBCR
3388 * conversion"
3389 * "dstImage must not use a format that requires a sampler YCBCR
3390 * conversion"
3391 */
3392 assert(dst->plane_count == 1);
3393 assert(src->plane_count == 1);
3394
3395 /* Format must match */
3396 if (src->vk.format != dst->vk.format)
3397 return false;
3398
3399 /* Destination can't be raster format */
3400 if (!dst->tiled)
3401 return false;
3402
3403 /* Source region must start at (0,0) */
3404 if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
3405 return false;
3406
3407 /* Destination image must be complete */
3408 if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
3409 return false;
3410
3411 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
3412 const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
3413 const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
3414 if (region->dstOffsets[1].x < dst_width - 1||
3415 region->dstOffsets[1].y < dst_height - 1) {
3416 return false;
3417 }
3418
3419 /* No XY scaling */
3420 if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
3421 region->srcOffsets[1].y != region->dstOffsets[1].y) {
3422 return false;
3423 }
3424
3425 /* If the format is D24S8 both aspects need to be copied, since the TFU
3426 * can't be programmed to copy only one aspect of the image.
3427 */
3428 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
3429 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
3430 VK_IMAGE_ASPECT_STENCIL_BIT;
3431 if (region->dstSubresource.aspectMask != ds_aspects)
3432 return false;
3433 }
3434
3435 /* Our TFU blits only handle exact copies (it requires same formats
3436 * on input and output, no scaling, etc), so there is no pixel format
3437 * conversions and we can rewrite the format to use one that is TFU
3438 * compatible based on its texel size.
3439 */
3440 const struct v3dv_format *format =
3441 v3dv_get_compatible_tfu_format(cmd_buffer->device,
3442 dst->planes[0].cpp, NULL);
3443
3444 /* Emit a TFU job for each layer to blit */
3445 assert(vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource) ==
3446 vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource));
3447
3448 uint32_t min_dst_layer;
3449 uint32_t max_dst_layer;
3450 bool dst_mirror_z = false;
3451 if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
3452 compute_blit_3d_layers(region->dstOffsets,
3453 &min_dst_layer, &max_dst_layer,
3454 &dst_mirror_z);
3455 } else {
3456 min_dst_layer = region->dstSubresource.baseArrayLayer;
3457 max_dst_layer = min_dst_layer +
3458 vk_image_subresource_layer_count(&dst->vk,
3459 ®ion->dstSubresource);
3460 }
3461
3462 uint32_t min_src_layer;
3463 uint32_t max_src_layer;
3464 bool src_mirror_z = false;
3465 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
3466 compute_blit_3d_layers(region->srcOffsets,
3467 &min_src_layer, &max_src_layer,
3468 &src_mirror_z);
3469 } else {
3470 min_src_layer = region->srcSubresource.baseArrayLayer;
3471 max_src_layer = min_src_layer +
3472 vk_image_subresource_layer_count(&src->vk,
3473 ®ion->srcSubresource);
3474 }
3475
3476 /* No Z scaling for 3D images (for non-3D images both src and dst must
3477 * have the same layerCount).
3478 */
3479 if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
3480 return false;
3481
3482 const uint32_t layer_count = max_dst_layer - min_dst_layer;
3483 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
3484 for (uint32_t i = 0; i < layer_count; i++) {
3485 /* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
3486 * only involves reversing the order of the slices.
3487 */
3488 const uint32_t dst_layer =
3489 dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
3490 const uint32_t src_layer =
3491 src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
3492
3493 const uint32_t dst_offset =
3494 dst->planes[0].mem->bo->offset + v3dv_layer_offset(dst, dst_mip_level,
3495 dst_layer, 0);
3496 const uint32_t src_offset =
3497 src->planes[0].mem->bo->offset + v3dv_layer_offset(src, src_mip_level,
3498 src_layer, 0);
3499
3500 const struct v3d_resource_slice *dst_slice = &dst->planes[0].slices[dst_mip_level];
3501 const struct v3d_resource_slice *src_slice = &src->planes[0].slices[src_mip_level];
3502
3503 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
3504 cmd_buffer,
3505 dst->planes[0].mem->bo->handle,
3506 dst_offset,
3507 dst_slice->tiling,
3508 dst_slice->padded_height,
3509 dst->planes[0].cpp,
3510 src->planes[0].mem->bo->handle,
3511 src_offset,
3512 src_slice->tiling,
3513 src_slice->tiling == V3D_TILING_RASTER ?
3514 src_slice->stride : src_slice->padded_height,
3515 src->planes[0].cpp,
3516 dst_width, dst_height, &format->planes[0]);
3517 }
3518
3519 return true;
3520 }
3521
3522 static bool
format_needs_software_int_clamp(VkFormat format)3523 format_needs_software_int_clamp(VkFormat format)
3524 {
3525 switch (format) {
3526 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
3527 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
3528 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
3529 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
3530 return true;
3531 default:
3532 return false;
3533 };
3534 }
3535
3536 static void
get_blit_pipeline_cache_key(VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,uint8_t * key)3537 get_blit_pipeline_cache_key(VkFormat dst_format,
3538 VkFormat src_format,
3539 VkColorComponentFlags cmask,
3540 VkSampleCountFlagBits dst_samples,
3541 VkSampleCountFlagBits src_samples,
3542 uint8_t *key)
3543 {
3544 memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
3545
3546 uint32_t *p = (uint32_t *) key;
3547
3548 *p = dst_format;
3549 p++;
3550
3551 /* Generally, when blitting from a larger format to a smaller format
3552 * the hardware takes care of clamping the source to the RT range.
3553 * Specifically, for integer formats, this is done by using
3554 * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
3555 * clamps to the bit-size of the render type, and some formats, such as
3556 * rgb10a2_uint have a 16-bit type, so it won't do what we need and we
3557 * require to clamp in software. In these cases, we need to amend the blit
3558 * shader with clamp code that depends on both the src and dst formats, so
3559 * we need the src format to be part of the key.
3560 */
3561 *p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
3562 p++;
3563
3564 *p = cmask;
3565 p++;
3566
3567 *p = (dst_samples << 8) | src_samples;
3568 p++;
3569
3570 assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
3571 }
3572
3573 static bool
create_blit_render_pass(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkRenderPass * pass_load,VkRenderPass * pass_no_load)3574 create_blit_render_pass(struct v3dv_device *device,
3575 VkFormat dst_format,
3576 VkFormat src_format,
3577 VkRenderPass *pass_load,
3578 VkRenderPass *pass_no_load)
3579 {
3580 const bool is_color_blit = vk_format_is_color(dst_format);
3581
3582 /* Attachment load operation is specified below */
3583 VkAttachmentDescription2 att = {
3584 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
3585 .format = dst_format,
3586 .samples = VK_SAMPLE_COUNT_1_BIT,
3587 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
3588 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
3589 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
3590 };
3591
3592 VkAttachmentReference2 att_ref = {
3593 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
3594 .attachment = 0,
3595 .layout = VK_IMAGE_LAYOUT_GENERAL,
3596 };
3597
3598 VkSubpassDescription2 subpass = {
3599 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
3600 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
3601 .inputAttachmentCount = 0,
3602 .colorAttachmentCount = is_color_blit ? 1 : 0,
3603 .pColorAttachments = is_color_blit ? &att_ref : NULL,
3604 .pResolveAttachments = NULL,
3605 .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
3606 .preserveAttachmentCount = 0,
3607 .pPreserveAttachments = NULL,
3608 };
3609
3610 VkRenderPassCreateInfo2 info = {
3611 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
3612 .attachmentCount = 1,
3613 .pAttachments = &att,
3614 .subpassCount = 1,
3615 .pSubpasses = &subpass,
3616 .dependencyCount = 0,
3617 .pDependencies = NULL,
3618 };
3619
3620 VkResult result;
3621 att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
3622 result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3623 &info, &device->vk.alloc, pass_load);
3624 if (result != VK_SUCCESS)
3625 return false;
3626
3627 att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
3628 result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3629 &info, &device->vk.alloc, pass_no_load);
3630 return result == VK_SUCCESS;
3631 }
3632
3633 static nir_def *
gen_tex_coords(nir_builder * b)3634 gen_tex_coords(nir_builder *b)
3635 {
3636 nir_def *tex_box =
3637 nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
3638
3639 nir_def *tex_z =
3640 nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
3641
3642 nir_def *vertex_id = nir_load_vertex_id(b);
3643
3644 /* vertex 0: src0_x, src0_y
3645 * vertex 1: src0_x, src1_y
3646 * vertex 2: src1_x, src0_y
3647 * vertex 3: src1_x, src1_y
3648 *
3649 * So:
3650 *
3651 * channel 0 is vertex_id < 2 ? src0_x : src1_x
3652 * channel 1 is vertex id & 1 ? src1_y : src0_y
3653 */
3654
3655 nir_def *one = nir_imm_int(b, 1);
3656 nir_def *c0cmp = nir_ilt_imm(b, vertex_id, 2);
3657 nir_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3658
3659 nir_def *comp[4];
3660 comp[0] = nir_bcsel(b, c0cmp,
3661 nir_channel(b, tex_box, 0),
3662 nir_channel(b, tex_box, 2));
3663
3664 comp[1] = nir_bcsel(b, c1cmp,
3665 nir_channel(b, tex_box, 3),
3666 nir_channel(b, tex_box, 1));
3667 comp[2] = tex_z;
3668 comp[3] = nir_imm_float(b, 1.0f);
3669 return nir_vec(b, comp, 4);
3670 }
3671
3672 static nir_def *
build_nir_tex_op_read(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type,enum glsl_sampler_dim dim)3673 build_nir_tex_op_read(struct nir_builder *b,
3674 nir_def *tex_pos,
3675 enum glsl_base_type tex_type,
3676 enum glsl_sampler_dim dim)
3677 {
3678 assert(dim != GLSL_SAMPLER_DIM_MS);
3679
3680 const struct glsl_type *sampler_type =
3681 glsl_sampler_type(dim, false, false, tex_type);
3682 nir_variable *sampler =
3683 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3684 sampler->data.descriptor_set = 0;
3685 sampler->data.binding = 0;
3686
3687 nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3688 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3689 tex->sampler_dim = dim;
3690 tex->op = nir_texop_tex;
3691 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
3692 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
3693 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref, tex_deref);
3694 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3695 tex->is_array = glsl_sampler_type_is_array(sampler_type);
3696 tex->coord_components = tex_pos->num_components;
3697
3698 nir_def_init(&tex->instr, &tex->def, 4, 32);
3699 nir_builder_instr_insert(b, &tex->instr);
3700 return &tex->def;
3701 }
3702
3703 static nir_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder * b,nir_variable * sampler,nir_def * tex_deref,enum glsl_base_type tex_type,nir_def * tex_pos,nir_def * sample_idx)3704 build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
3705 nir_variable *sampler,
3706 nir_def *tex_deref,
3707 enum glsl_base_type tex_type,
3708 nir_def *tex_pos,
3709 nir_def *sample_idx)
3710 {
3711 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3712 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
3713 tex->op = nir_texop_txf_ms;
3714 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
3715 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
3716 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_ms_index, sample_idx);
3717 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3718 tex->is_array = false;
3719 tex->coord_components = tex_pos->num_components;
3720
3721 nir_def_init(&tex->instr, &tex->def, 4, 32);
3722 nir_builder_instr_insert(b, &tex->instr);
3723 return &tex->def;
3724 }
3725
3726 /* Fetches all samples at the given position and averages them */
3727 static nir_def *
build_nir_tex_op_ms_resolve(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits src_samples)3728 build_nir_tex_op_ms_resolve(struct nir_builder *b,
3729 nir_def *tex_pos,
3730 enum glsl_base_type tex_type,
3731 VkSampleCountFlagBits src_samples)
3732 {
3733 assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
3734 const struct glsl_type *sampler_type =
3735 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3736 nir_variable *sampler =
3737 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3738 sampler->data.descriptor_set = 0;
3739 sampler->data.binding = 0;
3740
3741 const bool is_int = glsl_base_type_is_integer(tex_type);
3742
3743 nir_def *tmp = NULL;
3744 nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3745 for (uint32_t i = 0; i < src_samples; i++) {
3746 nir_def *s =
3747 build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3748 tex_type, tex_pos,
3749 nir_imm_int(b, i));
3750
3751 /* For integer formats, the multisample resolve operation is expected to
3752 * return one of the samples, we just return the first one.
3753 */
3754 if (is_int)
3755 return s;
3756
3757 tmp = i == 0 ? s : nir_fadd(b, tmp, s);
3758 }
3759
3760 assert(!is_int);
3761 return nir_fmul_imm(b, tmp, 1.0f / src_samples);
3762 }
3763
3764 /* Fetches the current sample (gl_SampleID) at the given position */
3765 static nir_def *
build_nir_tex_op_ms_read(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type)3766 build_nir_tex_op_ms_read(struct nir_builder *b,
3767 nir_def *tex_pos,
3768 enum glsl_base_type tex_type)
3769 {
3770 const struct glsl_type *sampler_type =
3771 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3772 nir_variable *sampler =
3773 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3774 sampler->data.descriptor_set = 0;
3775 sampler->data.binding = 0;
3776
3777 nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3778
3779 return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3780 tex_type, tex_pos,
3781 nir_load_sample_id(b));
3782 }
3783
3784 static nir_def *
build_nir_tex_op(struct nir_builder * b,struct v3dv_device * device,nir_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim dim)3785 build_nir_tex_op(struct nir_builder *b,
3786 struct v3dv_device *device,
3787 nir_def *tex_pos,
3788 enum glsl_base_type tex_type,
3789 VkSampleCountFlagBits dst_samples,
3790 VkSampleCountFlagBits src_samples,
3791 enum glsl_sampler_dim dim)
3792 {
3793 switch (dim) {
3794 case GLSL_SAMPLER_DIM_MS:
3795 assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
3796 /* For multisampled texture sources we need to use fetching instead of
3797 * normalized texture coordinates. We already configured our blit
3798 * coordinates to be in texel units, but here we still need to convert
3799 * them from floating point to integer.
3800 */
3801 tex_pos = nir_f2i32(b, tex_pos);
3802
3803 if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
3804 return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
3805 else
3806 return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
3807 default:
3808 assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
3809 return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
3810 }
3811 }
3812
3813 static nir_shader *
get_blit_vs(const nir_shader_compiler_options * options)3814 get_blit_vs(const nir_shader_compiler_options *options)
3815 {
3816 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
3817 "meta blit vs");
3818
3819 const struct glsl_type *vec4 = glsl_vec4_type();
3820
3821 nir_variable *vs_out_pos =
3822 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
3823 vs_out_pos->data.location = VARYING_SLOT_POS;
3824
3825 nir_variable *vs_out_tex_coord =
3826 nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
3827 vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
3828 vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
3829
3830 nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
3831 nir_store_var(&b, vs_out_pos, pos, 0xf);
3832
3833 nir_def *tex_coord = gen_tex_coords(&b);
3834 nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
3835
3836 return b.shader;
3837 }
3838
3839 static uint32_t
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)3840 get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
3841 {
3842 switch (sampler_dim) {
3843 case GLSL_SAMPLER_DIM_1D: return 0x1;
3844 case GLSL_SAMPLER_DIM_2D: return 0x3;
3845 case GLSL_SAMPLER_DIM_MS: return 0x3;
3846 case GLSL_SAMPLER_DIM_3D: return 0x7;
3847 default:
3848 unreachable("invalid sampler dim");
3849 };
3850 }
3851
3852 static nir_shader *
get_color_blit_fs(const nir_shader_compiler_options * options,struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim sampler_dim)3853 get_color_blit_fs(const nir_shader_compiler_options *options,
3854 struct v3dv_device *device,
3855 VkFormat dst_format,
3856 VkFormat src_format,
3857 VkSampleCountFlagBits dst_samples,
3858 VkSampleCountFlagBits src_samples,
3859 enum glsl_sampler_dim sampler_dim)
3860 {
3861 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
3862 "meta blit fs");
3863
3864 const struct glsl_type *vec4 = glsl_vec4_type();
3865
3866 nir_variable *fs_in_tex_coord =
3867 nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
3868 fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
3869
3870 const struct glsl_type *fs_out_type =
3871 vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
3872 vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
3873 glsl_vec4_type();
3874
3875 enum glsl_base_type src_base_type =
3876 vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
3877 vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
3878 GLSL_TYPE_FLOAT;
3879
3880 nir_variable *fs_out_color =
3881 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
3882 fs_out_color->data.location = FRAG_RESULT_DATA0;
3883
3884 nir_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
3885 const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
3886 tex_coord = nir_channels(&b, tex_coord, channel_mask);
3887
3888 nir_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
3889 dst_samples, src_samples, sampler_dim);
3890
3891 /* For integer textures, if the bit-size of the destination is too small to
3892 * hold source value, Vulkan (CTS) expects the implementation to clamp to the
3893 * maximum value the destination can hold. The hardware can clamp to the
3894 * render target type, which usually matches the component bit-size, but
3895 * there are some cases that won't match, such as rgb10a2, which has a 16-bit
3896 * render target type, so in these cases we need to clamp manually.
3897 */
3898 if (format_needs_software_int_clamp(dst_format)) {
3899 assert(vk_format_is_int(dst_format));
3900 enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
3901 enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
3902
3903 nir_def *c[4];
3904 for (uint32_t i = 0; i < 4; i++) {
3905 c[i] = nir_channel(&b, color, i);
3906
3907 const uint32_t src_bit_size =
3908 util_format_get_component_bits(src_pformat,
3909 UTIL_FORMAT_COLORSPACE_RGB,
3910 i);
3911 const uint32_t dst_bit_size =
3912 util_format_get_component_bits(dst_pformat,
3913 UTIL_FORMAT_COLORSPACE_RGB,
3914 i);
3915
3916 if (dst_bit_size >= src_bit_size)
3917 continue;
3918
3919 assert(dst_bit_size > 0);
3920 if (util_format_is_pure_uint(dst_pformat)) {
3921 nir_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
3922 c[i] = nir_umin(&b, c[i], max);
3923 } else {
3924 nir_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
3925 nir_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
3926 c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
3927 }
3928 }
3929
3930 color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
3931 }
3932
3933 nir_store_var(&b, fs_out_color, color, 0xf);
3934
3935 return b.shader;
3936 }
3937
3938 static bool
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineMultisampleStateCreateInfo * ms_state,const VkPipelineLayout layout,VkPipeline * pipeline)3939 create_pipeline(struct v3dv_device *device,
3940 struct v3dv_render_pass *pass,
3941 struct nir_shader *vs_nir,
3942 struct nir_shader *gs_nir,
3943 struct nir_shader *fs_nir,
3944 const VkPipelineVertexInputStateCreateInfo *vi_state,
3945 const VkPipelineDepthStencilStateCreateInfo *ds_state,
3946 const VkPipelineColorBlendStateCreateInfo *cb_state,
3947 const VkPipelineMultisampleStateCreateInfo *ms_state,
3948 const VkPipelineLayout layout,
3949 VkPipeline *pipeline)
3950 {
3951 struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
3952 struct vk_shader_module fs_m = vk_shader_module_from_nir(fs_nir);
3953 struct vk_shader_module gs_m;
3954
3955 uint32_t num_stages = gs_nir ? 3 : 2;
3956
3957
3958 VkPipelineShaderStageCreateInfo stages[3] = {
3959 {
3960 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3961 .stage = VK_SHADER_STAGE_VERTEX_BIT,
3962 .module = vk_shader_module_to_handle(&vs_m),
3963 .pName = "main",
3964 },
3965 {
3966 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3967 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3968 .module = vk_shader_module_to_handle(&fs_m),
3969 .pName = "main",
3970 },
3971 {
3972 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3973 .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
3974 .module = VK_NULL_HANDLE,
3975 .pName = "main",
3976 },
3977 };
3978
3979 if (gs_nir) {
3980 gs_m = vk_shader_module_from_nir(gs_nir);
3981 stages[2].module = vk_shader_module_to_handle(&gs_m);
3982 }
3983
3984 VkGraphicsPipelineCreateInfo info = {
3985 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3986
3987 .stageCount = num_stages,
3988 .pStages = stages,
3989
3990 .pVertexInputState = vi_state,
3991
3992 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
3993 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3994 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3995 .primitiveRestartEnable = false,
3996 },
3997
3998 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
3999 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
4000 .viewportCount = 1,
4001 .scissorCount = 1,
4002 },
4003
4004 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
4005 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
4006 .rasterizerDiscardEnable = false,
4007 .polygonMode = VK_POLYGON_MODE_FILL,
4008 .cullMode = VK_CULL_MODE_NONE,
4009 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
4010 .depthBiasEnable = false,
4011 },
4012
4013 .pMultisampleState = ms_state,
4014
4015 .pDepthStencilState = ds_state,
4016
4017 .pColorBlendState = cb_state,
4018
4019 /* The meta clear pipeline declares all state as dynamic.
4020 * As a consequence, vkCmdBindPipeline writes no dynamic state
4021 * to the cmd buffer. Therefore, at the end of the meta clear,
4022 * we need only restore dynamic state that was vkCmdSet.
4023 */
4024 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
4025 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
4026 .dynamicStateCount = 6,
4027 .pDynamicStates = (VkDynamicState[]) {
4028 VK_DYNAMIC_STATE_VIEWPORT,
4029 VK_DYNAMIC_STATE_SCISSOR,
4030 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
4031 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
4032 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
4033 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
4034 VK_DYNAMIC_STATE_DEPTH_BIAS,
4035 VK_DYNAMIC_STATE_LINE_WIDTH,
4036 },
4037 },
4038
4039 .flags = 0,
4040 .layout = layout,
4041 .renderPass = v3dv_render_pass_to_handle(pass),
4042 .subpass = 0,
4043 };
4044
4045 VkResult result =
4046 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
4047 VK_NULL_HANDLE,
4048 1, &info,
4049 &device->vk.alloc,
4050 pipeline);
4051
4052 ralloc_free(vs_nir);
4053 ralloc_free(gs_nir);
4054 ralloc_free(fs_nir);
4055
4056 return result == VK_SUCCESS;
4057 }
4058
4059 static enum glsl_sampler_dim
get_sampler_dim(VkImageType type,VkSampleCountFlagBits src_samples)4060 get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
4061 {
4062 /* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
4063 *
4064 * "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
4065 * VK_IMAGE_TYPE_2D, ..."
4066 */
4067 assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
4068
4069 switch (type) {
4070 case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
4071 case VK_IMAGE_TYPE_2D:
4072 return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
4073 GLSL_SAMPLER_DIM_MS;
4074 case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
4075 default:
4076 unreachable("Invalid image type");
4077 }
4078 }
4079
4080 static bool
create_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)4081 create_blit_pipeline(struct v3dv_device *device,
4082 VkFormat dst_format,
4083 VkFormat src_format,
4084 VkColorComponentFlags cmask,
4085 VkImageType src_type,
4086 VkSampleCountFlagBits dst_samples,
4087 VkSampleCountFlagBits src_samples,
4088 VkRenderPass _pass,
4089 VkPipelineLayout pipeline_layout,
4090 VkPipeline *pipeline)
4091 {
4092 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
4093
4094 /* We always rewrite depth/stencil blits to compatible color blits */
4095 assert(vk_format_is_color(dst_format));
4096 assert(vk_format_is_color(src_format));
4097
4098 const nir_shader_compiler_options *options =
4099 v3dv_pipeline_get_nir_options(&device->devinfo);
4100
4101 const enum glsl_sampler_dim sampler_dim =
4102 get_sampler_dim(src_type, src_samples);
4103
4104 nir_shader *vs_nir = get_blit_vs(options);
4105 nir_shader *fs_nir =
4106 get_color_blit_fs(options, device, dst_format, src_format,
4107 dst_samples, src_samples, sampler_dim);
4108
4109 const VkPipelineVertexInputStateCreateInfo vi_state = {
4110 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
4111 .vertexBindingDescriptionCount = 0,
4112 .vertexAttributeDescriptionCount = 0,
4113 };
4114
4115 VkPipelineDepthStencilStateCreateInfo ds_state = {
4116 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
4117 };
4118
4119 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
4120 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
4121 .blendEnable = false,
4122 .colorWriteMask = cmask,
4123 };
4124
4125 const VkPipelineColorBlendStateCreateInfo cb_state = {
4126 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
4127 .logicOpEnable = false,
4128 .attachmentCount = 1,
4129 .pAttachments = blend_att_state
4130 };
4131
4132 const VkPipelineMultisampleStateCreateInfo ms_state = {
4133 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
4134 .rasterizationSamples = dst_samples,
4135 .sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
4136 .pSampleMask = NULL,
4137 .alphaToCoverageEnable = false,
4138 .alphaToOneEnable = false,
4139 };
4140
4141 return create_pipeline(device,
4142 pass,
4143 vs_nir, NULL, fs_nir,
4144 &vi_state,
4145 &ds_state,
4146 &cb_state,
4147 &ms_state,
4148 pipeline_layout,
4149 pipeline);
4150 }
4151
4152 /**
4153 * Return a pipeline suitable for blitting the requested aspect given the
4154 * destination and source formats.
4155 */
4156 static bool
get_blit_pipeline(struct v3dv_cmd_buffer * cmd_buffer,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,struct v3dv_meta_blit_pipeline ** pipeline)4157 get_blit_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
4158 VkFormat dst_format,
4159 VkFormat src_format,
4160 VkColorComponentFlags cmask,
4161 VkImageType src_type,
4162 VkSampleCountFlagBits dst_samples,
4163 VkSampleCountFlagBits src_samples,
4164 struct v3dv_meta_blit_pipeline **pipeline)
4165 {
4166 bool ok = true;
4167 struct v3dv_device *device = cmd_buffer->device;
4168
4169 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
4170 if (device->instance->meta_cache_enabled) {
4171 get_blit_pipeline_cache_key(dst_format, src_format, cmask,
4172 dst_samples, src_samples, key);
4173 mtx_lock(&device->meta.mtx);
4174 struct hash_entry *entry =
4175 _mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
4176 if (entry) {
4177 mtx_unlock(&device->meta.mtx);
4178 *pipeline = entry->data;
4179 return true;
4180 }
4181 }
4182
4183 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
4184 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
4185
4186 if (*pipeline == NULL)
4187 goto fail;
4188
4189 ok = create_blit_render_pass(device, dst_format, src_format,
4190 &(*pipeline)->pass,
4191 &(*pipeline)->pass_no_load);
4192 if (!ok)
4193 goto fail;
4194
4195 /* Create the pipeline using one of the render passes, they are both
4196 * compatible, so we don't care which one we use here.
4197 */
4198 ok = create_blit_pipeline(device,
4199 dst_format,
4200 src_format,
4201 cmask,
4202 src_type,
4203 dst_samples,
4204 src_samples,
4205 (*pipeline)->pass,
4206 device->meta.blit.p_layout,
4207 &(*pipeline)->pipeline);
4208 if (!ok)
4209 goto fail;
4210
4211 if (device->instance->meta_cache_enabled) {
4212 memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
4213 _mesa_hash_table_insert(device->meta.blit.cache[src_type],
4214 &(*pipeline)->key, *pipeline);
4215 mtx_unlock(&device->meta.mtx);
4216 } else {
4217 v3dv_cmd_buffer_add_private_obj(
4218 cmd_buffer, (uintptr_t)*pipeline,
4219 (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_meta_blit_pipeline);
4220 }
4221
4222 return true;
4223
4224 fail:
4225 if (device->instance->meta_cache_enabled)
4226 mtx_unlock(&device->meta.mtx);
4227
4228 VkDevice _device = v3dv_device_to_handle(device);
4229 if (*pipeline) {
4230 if ((*pipeline)->pass)
4231 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
4232 if ((*pipeline)->pass_no_load)
4233 v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
4234 if ((*pipeline)->pipeline)
4235 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
4236 vk_free(&device->vk.alloc, *pipeline);
4237 *pipeline = NULL;
4238 }
4239
4240 return false;
4241 }
4242
4243 static void
compute_blit_box(const VkOffset3D * offsets,uint32_t image_w,uint32_t image_h,uint32_t * x,uint32_t * y,uint32_t * w,uint32_t * h,bool * mirror_x,bool * mirror_y)4244 compute_blit_box(const VkOffset3D *offsets,
4245 uint32_t image_w, uint32_t image_h,
4246 uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
4247 bool *mirror_x, bool *mirror_y)
4248 {
4249 if (offsets[1].x >= offsets[0].x) {
4250 *mirror_x = false;
4251 *x = MIN2(offsets[0].x, image_w - 1);
4252 *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
4253 } else {
4254 *mirror_x = true;
4255 *x = MIN2(offsets[1].x, image_w - 1);
4256 *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
4257 }
4258 if (offsets[1].y >= offsets[0].y) {
4259 *mirror_y = false;
4260 *y = MIN2(offsets[0].y, image_h - 1);
4261 *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
4262 } else {
4263 *mirror_y = true;
4264 *y = MIN2(offsets[1].y, image_h - 1);
4265 *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
4266 }
4267 }
4268
4269 static void
compute_blit_3d_layers(const VkOffset3D * offsets,uint32_t * min_layer,uint32_t * max_layer,bool * mirror_z)4270 compute_blit_3d_layers(const VkOffset3D *offsets,
4271 uint32_t *min_layer, uint32_t *max_layer,
4272 bool *mirror_z)
4273 {
4274 if (offsets[1].z >= offsets[0].z) {
4275 *mirror_z = false;
4276 *min_layer = offsets[0].z;
4277 *max_layer = offsets[1].z;
4278 } else {
4279 *mirror_z = true;
4280 *min_layer = offsets[1].z;
4281 *max_layer = offsets[0].z;
4282 }
4283 }
4284
4285 static VkResult
create_blit_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)4286 create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
4287 {
4288 /* If this is not the first pool we create for this command buffer
4289 * size it based on the size of the currently exhausted pool.
4290 */
4291 uint32_t descriptor_count = 64;
4292 if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
4293 struct v3dv_descriptor_pool *exhausted_pool =
4294 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
4295 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
4296 }
4297
4298 /* Create the descriptor pool */
4299 cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
4300 VkDescriptorPoolSize pool_size = {
4301 .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4302 .descriptorCount = descriptor_count,
4303 };
4304 VkDescriptorPoolCreateInfo info = {
4305 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
4306 .maxSets = descriptor_count,
4307 .poolSizeCount = 1,
4308 .pPoolSizes = &pool_size,
4309 .flags = 0,
4310 };
4311 VkResult result =
4312 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
4313 &info,
4314 &cmd_buffer->device->vk.alloc,
4315 &cmd_buffer->meta.blit.dspool);
4316
4317 if (result == VK_SUCCESS) {
4318 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
4319 const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
4320
4321 v3dv_cmd_buffer_add_private_obj(
4322 cmd_buffer, (uintptr_t) _pool,
4323 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
4324
4325 struct v3dv_descriptor_pool *pool =
4326 v3dv_descriptor_pool_from_handle(_pool);
4327 pool->is_driver_internal = true;
4328 }
4329
4330 return result;
4331 }
4332
4333 static VkResult
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)4334 allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
4335 VkDescriptorSet *set)
4336 {
4337 /* Make sure we have a descriptor pool */
4338 VkResult result;
4339 if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
4340 result = create_blit_descriptor_pool(cmd_buffer);
4341 if (result != VK_SUCCESS)
4342 return result;
4343 }
4344 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
4345
4346 /* Allocate descriptor set */
4347 struct v3dv_device *device = cmd_buffer->device;
4348 VkDevice _device = v3dv_device_to_handle(device);
4349 VkDescriptorSetAllocateInfo info = {
4350 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
4351 .descriptorPool = cmd_buffer->meta.blit.dspool,
4352 .descriptorSetCount = 1,
4353 .pSetLayouts = &device->meta.blit.ds_layout,
4354 };
4355 result = v3dv_AllocateDescriptorSets(_device, &info, set);
4356
4357 /* If we ran out of pool space, grow the pool and try again */
4358 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
4359 result = create_blit_descriptor_pool(cmd_buffer);
4360 if (result == VK_SUCCESS) {
4361 info.descriptorPool = cmd_buffer->meta.blit.dspool;
4362 result = v3dv_AllocateDescriptorSets(_device, &info, set);
4363 }
4364 }
4365
4366 return result;
4367 }
4368
4369 /**
4370 * Returns true if the implementation supports the requested operation (even if
4371 * it failed to process it, for example, due to an out-of-memory error).
4372 *
4373 * The caller can specify the channels on the destination to be written via the
4374 * cmask parameter (which can be 0 to default to all channels), as well as a
4375 * swizzle to apply to the source via the cswizzle parameter (which can be NULL
4376 * to use the default identity swizzle).
4377 *
4378 * Supports multi-plane formats too.
4379 */
4380 static bool
blit_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,VkFormat dst_format,struct v3dv_image * src,VkFormat src_format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,const VkImageBlit2 * region,VkFilter filter,bool dst_is_padded_image)4381 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
4382 struct v3dv_image *dst,
4383 VkFormat dst_format,
4384 struct v3dv_image *src,
4385 VkFormat src_format,
4386 VkColorComponentFlags cmask,
4387 VkComponentMapping *cswizzle,
4388 const VkImageBlit2 *region,
4389 VkFilter filter,
4390 bool dst_is_padded_image)
4391 {
4392 bool handled = true;
4393 VkResult result;
4394
4395 /* Can't sample from linear images */
4396 if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D) {
4397 return false;
4398 }
4399
4400 /* Rewrite combined D/S blits to compatible color blits */
4401 if (vk_format_is_depth_or_stencil(dst_format)) {
4402 assert(src_format == dst_format);
4403 assert(cmask == 0);
4404 switch(dst_format) {
4405 case VK_FORMAT_D16_UNORM:
4406 dst_format = VK_FORMAT_R16_UINT;
4407 break;
4408 case VK_FORMAT_D32_SFLOAT:
4409 dst_format = VK_FORMAT_R32_UINT;
4410 break;
4411 case VK_FORMAT_X8_D24_UNORM_PACK32:
4412 case VK_FORMAT_D24_UNORM_S8_UINT:
4413 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
4414 cmask |= VK_COLOR_COMPONENT_G_BIT |
4415 VK_COLOR_COMPONENT_B_BIT |
4416 VK_COLOR_COMPONENT_A_BIT;
4417 }
4418 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
4419 assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
4420 cmask |= VK_COLOR_COMPONENT_R_BIT;
4421 }
4422 dst_format = VK_FORMAT_R8G8B8A8_UINT;
4423 break;
4424 default:
4425 unreachable("Unsupported depth/stencil format");
4426 };
4427 src_format = dst_format;
4428 }
4429
4430 uint8_t src_plane =
4431 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
4432 assert(src_plane < src->plane_count);
4433 uint8_t dst_plane =
4434 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
4435 assert(dst_plane < dst->plane_count);
4436
4437 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
4438 VK_COLOR_COMPONENT_G_BIT |
4439 VK_COLOR_COMPONENT_B_BIT |
4440 VK_COLOR_COMPONENT_A_BIT;
4441 if (cmask == 0)
4442 cmask = full_cmask;
4443
4444 VkComponentMapping ident_swizzle = {
4445 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
4446 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
4447 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
4448 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
4449 };
4450 if (!cswizzle)
4451 cswizzle = &ident_swizzle;
4452
4453 /* When we get here from a copy between compressed / uncompressed images
4454 * we choose to specify the destination blit region based on the size
4455 * semantics of the source image of the copy (see copy_image_blit), so we
4456 * need to apply those same semantics here when we compute the size of the
4457 * destination image level.
4458 */
4459 const uint32_t dst_block_w =
4460 vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
4461 const uint32_t dst_block_h =
4462 vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
4463 const uint32_t src_block_w =
4464 vk_format_get_blockwidth(src->planes[src_plane].vk_format);
4465 const uint32_t src_block_h =
4466 vk_format_get_blockheight(src->planes[src_plane].vk_format);
4467 const uint32_t dst_level_w =
4468 u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w),
4469 region->dstSubresource.mipLevel);
4470 const uint32_t dst_level_h =
4471 u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h),
4472 region->dstSubresource.mipLevel);
4473
4474 const uint32_t src_level_w =
4475 u_minify(src->planes[src_plane].width, region->srcSubresource.mipLevel);
4476 const uint32_t src_level_h =
4477 u_minify(src->planes[src_plane].height, region->srcSubresource.mipLevel);
4478
4479 assert(src->plane_count == 1 || src->vk.image_type != VK_IMAGE_TYPE_3D);
4480 const uint32_t src_level_d =
4481 u_minify(src->vk.extent.depth, region->srcSubresource.mipLevel);
4482
4483 uint32_t dst_x, dst_y, dst_w, dst_h;
4484 bool dst_mirror_x, dst_mirror_y;
4485 compute_blit_box(region->dstOffsets,
4486 dst_level_w, dst_level_h,
4487 &dst_x, &dst_y, &dst_w, &dst_h,
4488 &dst_mirror_x, &dst_mirror_y);
4489
4490 uint32_t src_x, src_y, src_w, src_h;
4491 bool src_mirror_x, src_mirror_y;
4492 compute_blit_box(region->srcOffsets,
4493 src_level_w, src_level_h,
4494 &src_x, &src_y, &src_w, &src_h,
4495 &src_mirror_x, &src_mirror_y);
4496
4497 uint32_t min_dst_layer;
4498 uint32_t max_dst_layer;
4499 bool dst_mirror_z = false;
4500 if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
4501 min_dst_layer = region->dstSubresource.baseArrayLayer;
4502 max_dst_layer = min_dst_layer +
4503 vk_image_subresource_layer_count(&dst->vk,
4504 ®ion->dstSubresource);
4505 } else {
4506 compute_blit_3d_layers(region->dstOffsets,
4507 &min_dst_layer, &max_dst_layer,
4508 &dst_mirror_z);
4509 }
4510
4511 uint32_t min_src_layer;
4512 uint32_t max_src_layer;
4513 bool src_mirror_z = false;
4514 if (src->vk.image_type != VK_IMAGE_TYPE_3D) {
4515 min_src_layer = region->srcSubresource.baseArrayLayer;
4516 max_src_layer = min_src_layer +
4517 vk_image_subresource_layer_count(&src->vk,
4518 ®ion->srcSubresource);
4519 } else {
4520 compute_blit_3d_layers(region->srcOffsets,
4521 &min_src_layer, &max_src_layer,
4522 &src_mirror_z);
4523 }
4524
4525 uint32_t layer_count = max_dst_layer - min_dst_layer;
4526
4527 /* Translate source blit coordinates to normalized texture coordinates for
4528 * single sampled textures. For multisampled textures we require
4529 * unnormalized coordinates, since we can only do texelFetch on them.
4530 */
4531 float coords[4] = {
4532 (float)src_x,
4533 (float)src_y,
4534 (float)(src_x + src_w),
4535 (float)(src_y + src_h),
4536 };
4537
4538 if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) {
4539 coords[0] /= (float)src_level_w;
4540 coords[1] /= (float)src_level_h;
4541 coords[2] /= (float)src_level_w;
4542 coords[3] /= (float)src_level_h;
4543 }
4544
4545 /* Handle mirroring */
4546 const bool mirror_x = dst_mirror_x != src_mirror_x;
4547 const bool mirror_y = dst_mirror_y != src_mirror_y;
4548 const bool mirror_z = dst_mirror_z != src_mirror_z;
4549 float tex_coords[5] = {
4550 !mirror_x ? coords[0] : coords[2],
4551 !mirror_y ? coords[1] : coords[3],
4552 !mirror_x ? coords[2] : coords[0],
4553 !mirror_y ? coords[3] : coords[1],
4554 /* Z coordinate for 3D blit sources, to be filled for each
4555 * destination layer
4556 */
4557 0.0f
4558 };
4559
4560 /* For blits from 3D images we also need to compute the slice coordinate to
4561 * sample from, which will change for each layer in the destination.
4562 * Compute the step we should increase for each iteration.
4563 */
4564 const float src_z_step =
4565 (float)(max_src_layer - min_src_layer) / (float)layer_count;
4566
4567 /* Get the blit pipeline */
4568 struct v3dv_meta_blit_pipeline *pipeline = NULL;
4569 bool ok = get_blit_pipeline(cmd_buffer,
4570 dst_format, src_format, cmask, src->vk.image_type,
4571 dst->vk.samples, src->vk.samples,
4572 &pipeline);
4573 if (!ok)
4574 return handled;
4575 assert(pipeline && pipeline->pipeline &&
4576 pipeline->pass && pipeline->pass_no_load);
4577
4578 struct v3dv_device *device = cmd_buffer->device;
4579 assert(device->meta.blit.ds_layout);
4580
4581 VkDevice _device = v3dv_device_to_handle(device);
4582 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
4583
4584 /* Create sampler for blit source image */
4585 VkSamplerCreateInfo sampler_info = {
4586 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
4587 .magFilter = filter,
4588 .minFilter = filter,
4589 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4590 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4591 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4592 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
4593 };
4594 VkSampler sampler;
4595 result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
4596 &sampler);
4597 if (result != VK_SUCCESS)
4598 goto fail;
4599
4600 v3dv_cmd_buffer_add_private_obj(
4601 cmd_buffer, (uintptr_t)sampler,
4602 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
4603
4604 /* Push command buffer state before starting meta operation */
4605 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
4606
4607 /* Push state that is common for all layers */
4608 v3dv_CmdBindPipeline(_cmd_buffer,
4609 VK_PIPELINE_BIND_POINT_GRAPHICS,
4610 pipeline->pipeline);
4611
4612 const VkViewport viewport = {
4613 .x = dst_x,
4614 .y = dst_y,
4615 .width = dst_w,
4616 .height = dst_h,
4617 .minDepth = 0.0f,
4618 .maxDepth = 1.0f
4619 };
4620 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
4621
4622 const VkRect2D scissor = {
4623 .offset = { dst_x, dst_y },
4624 .extent = { dst_w, dst_h }
4625 };
4626 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
4627
4628 bool can_skip_tlb_load = false;
4629 const VkRect2D render_area = {
4630 .offset = { dst_x, dst_y },
4631 .extent = { dst_w, dst_h },
4632 };
4633
4634 /* Record per-layer commands */
4635 for (uint32_t i = 0; i < layer_count; i++) {
4636 /* Setup framebuffer */
4637 VkImageViewCreateInfo dst_image_view_info = {
4638 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4639 .image = v3dv_image_to_handle(dst),
4640 .viewType = v3dv_image_type_to_view_type(dst->vk.image_type),
4641 .format = dst_format,
4642 .subresourceRange = {
4643 .aspectMask = region->dstSubresource.aspectMask,
4644 .baseMipLevel = region->dstSubresource.mipLevel,
4645 .levelCount = 1,
4646 .baseArrayLayer = min_dst_layer + i,
4647 .layerCount = 1
4648 },
4649 };
4650 VkImageView dst_image_view;
4651 result = v3dv_create_image_view(device, &dst_image_view_info,
4652 &dst_image_view);
4653 if (result != VK_SUCCESS)
4654 goto fail;
4655
4656 v3dv_cmd_buffer_add_private_obj(
4657 cmd_buffer, (uintptr_t)dst_image_view,
4658 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4659
4660 VkFramebufferCreateInfo fb_info = {
4661 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
4662 .renderPass = pipeline->pass,
4663 .attachmentCount = 1,
4664 .pAttachments = &dst_image_view,
4665 .width = dst_x + dst_w,
4666 .height = dst_y + dst_h,
4667 .layers = 1,
4668 };
4669
4670 VkFramebuffer fb;
4671 result = v3dv_CreateFramebuffer(_device, &fb_info,
4672 &cmd_buffer->device->vk.alloc, &fb);
4673 if (result != VK_SUCCESS)
4674 goto fail;
4675
4676 struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
4677 framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
4678 fb_info.height == dst_level_h &&
4679 dst_is_padded_image;
4680
4681 v3dv_cmd_buffer_add_private_obj(
4682 cmd_buffer, (uintptr_t)fb,
4683 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
4684
4685 /* Setup descriptor set for blit source texture. We don't have to
4686 * register the descriptor as a private command buffer object since
4687 * all descriptors will be freed automatically with the descriptor
4688 * pool.
4689 */
4690 VkDescriptorSet set;
4691 result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
4692 if (result != VK_SUCCESS)
4693 goto fail;
4694
4695 VkImageViewCreateInfo src_image_view_info = {
4696 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4697 .image = v3dv_image_to_handle(src),
4698 .viewType = v3dv_image_type_to_view_type(src->vk.image_type),
4699 .format = src_format,
4700 .components = *cswizzle,
4701 .subresourceRange = {
4702 .aspectMask = region->srcSubresource.aspectMask,
4703 .baseMipLevel = region->srcSubresource.mipLevel,
4704 .levelCount = 1,
4705 .baseArrayLayer =
4706 src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
4707 .layerCount = 1
4708 },
4709 };
4710 VkImageView src_image_view;
4711 result = v3dv_create_image_view(device, &src_image_view_info,
4712 &src_image_view);
4713 if (result != VK_SUCCESS)
4714 goto fail;
4715
4716 v3dv_cmd_buffer_add_private_obj(
4717 cmd_buffer, (uintptr_t)src_image_view,
4718 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4719
4720 VkDescriptorImageInfo image_info = {
4721 .sampler = sampler,
4722 .imageView = src_image_view,
4723 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4724 };
4725 VkWriteDescriptorSet write = {
4726 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
4727 .dstSet = set,
4728 .dstBinding = 0,
4729 .dstArrayElement = 0,
4730 .descriptorCount = 1,
4731 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4732 .pImageInfo = &image_info,
4733 };
4734 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
4735
4736 v3dv_CmdBindDescriptorSets(_cmd_buffer,
4737 VK_PIPELINE_BIND_POINT_GRAPHICS,
4738 device->meta.blit.p_layout,
4739 0, 1, &set,
4740 0, NULL);
4741
4742 /* If the region we are about to blit is tile-aligned, then we can
4743 * use the render pass version that won't pre-load the tile buffer
4744 * with the dst image contents before the blit. The exception is when we
4745 * don't have a full color mask, since in that case we need to preserve
4746 * the original value of some of the color components.
4747 *
4748 * Since all layers have the same area, we only need to compute this for
4749 * the first.
4750 */
4751 if (i == 0) {
4752 struct v3dv_render_pass *pipeline_pass =
4753 v3dv_render_pass_from_handle(pipeline->pass);
4754 can_skip_tlb_load =
4755 cmask == full_cmask &&
4756 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
4757 framebuffer, pipeline_pass, 0);
4758 }
4759
4760 /* Record blit */
4761 VkRenderPassBeginInfo rp_info = {
4762 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
4763 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
4764 pipeline->pass,
4765 .framebuffer = fb,
4766 .renderArea = render_area,
4767 .clearValueCount = 0,
4768 };
4769
4770 VkSubpassBeginInfo sp_info = {
4771 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
4772 .contents = VK_SUBPASS_CONTENTS_INLINE,
4773 };
4774
4775 v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
4776 struct v3dv_job *job = cmd_buffer->state.job;
4777 if (!job)
4778 goto fail;
4779
4780 /* For 3D blits we need to compute the source slice to blit from (the Z
4781 * coordinate of the source sample operation). We want to choose this
4782 * based on the ratio of the depth of the source and the destination
4783 * images, picking the coordinate in the middle of each step.
4784 */
4785 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
4786 tex_coords[4] =
4787 !mirror_z ?
4788 (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
4789 (max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
4790 }
4791
4792 v3dv_CmdPushConstants(_cmd_buffer,
4793 device->meta.blit.p_layout,
4794 VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
4795 &tex_coords);
4796
4797 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
4798
4799 VkSubpassEndInfo sp_end_info = {
4800 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
4801 };
4802
4803 v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
4804 }
4805
4806 fail:
4807 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, true);
4808
4809 return handled;
4810 }
4811
4812 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)4813 v3dv_CmdBlitImage2(VkCommandBuffer commandBuffer,
4814 const VkBlitImageInfo2 *pBlitImageInfo)
4815 {
4816 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4817 V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
4818 V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
4819
4820 /* From vkCmdBlitImage:
4821 * "srcImage must not use a format that requires a sampler YCBCR
4822 * conversion"
4823 * "dstImage must not use a format that requires a sampler YCBCR
4824 * conversion"
4825 */
4826 assert(src->plane_count == 1);
4827 assert(dst->plane_count == 1);
4828
4829 /* This command can only happen outside a render pass */
4830 assert(cmd_buffer->state.pass == NULL);
4831 assert(cmd_buffer->state.job == NULL);
4832
4833 /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
4834 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
4835 src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4836
4837 /* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
4838 assert(!vk_format_is_compressed(dst->vk.format));
4839
4840 cmd_buffer->state.is_transfer = true;
4841
4842 for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
4843 const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
4844
4845 if (blit_tfu(cmd_buffer, dst, src, region))
4846 continue;
4847 if (blit_shader(cmd_buffer,
4848 dst, dst->vk.format,
4849 src, src->vk.format,
4850 0, NULL,
4851 region,
4852 pBlitImageInfo->filter, true)) {
4853 continue;
4854 }
4855 unreachable("Unsupported blit operation");
4856 }
4857
4858 cmd_buffer->state.is_transfer = false;
4859 }
4860
4861 static bool
resolve_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4862 resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
4863 struct v3dv_image *dst,
4864 struct v3dv_image *src,
4865 const VkImageResolve2 *region)
4866 {
4867 /* No resolve for multi-planar images. Using plane 0 */
4868 assert(dst->plane_count == 1);
4869 assert(src->plane_count == 1);
4870
4871 if (!v3dv_meta_can_use_tlb(src, 0, region->srcSubresource.mipLevel,
4872 ®ion->srcOffset, NULL, NULL) ||
4873 !v3dv_meta_can_use_tlb(dst, 0, region->dstSubresource.mipLevel,
4874 ®ion->dstOffset, ®ion->extent, NULL)) {
4875 return false;
4876 }
4877
4878 if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))
4879 return false;
4880
4881 const VkFormat fb_format = src->vk.format;
4882
4883 uint32_t num_layers;
4884 if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
4885 num_layers = vk_image_subresource_layer_count(&dst->vk,
4886 ®ion->dstSubresource);
4887 } else {
4888 num_layers = region->extent.depth;
4889 }
4890 assert(num_layers > 0);
4891
4892 struct v3dv_job *job =
4893 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4894 if (!job)
4895 return true;
4896
4897 const uint32_t block_w =
4898 vk_format_get_blockwidth(dst->planes[0].vk_format);
4899 const uint32_t block_h =
4900 vk_format_get_blockheight(dst->planes[0].vk_format);
4901 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
4902 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
4903
4904 uint32_t internal_type, internal_bpp;
4905 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
4906 (fb_format, region->srcSubresource.aspectMask,
4907 &internal_type, &internal_bpp);
4908
4909 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
4910 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
4911 true);
4912
4913 struct v3dv_meta_framebuffer framebuffer;
4914 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
4915 internal_type, &job->frame_tiling);
4916
4917 v3dv_X(job->device, job_emit_binning_flush)(job);
4918 v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src,
4919 &framebuffer, region);
4920
4921 v3dv_cmd_buffer_finish_job(cmd_buffer);
4922 return true;
4923 }
4924
4925 static bool
resolve_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4926 resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
4927 struct v3dv_image *dst,
4928 struct v3dv_image *src,
4929 const VkImageResolve2 *region)
4930 {
4931 const VkImageBlit2 blit_region = {
4932 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
4933 .srcSubresource = region->srcSubresource,
4934 .srcOffsets = {
4935 region->srcOffset,
4936 {
4937 region->srcOffset.x + region->extent.width,
4938 region->srcOffset.y + region->extent.height,
4939 }
4940 },
4941 .dstSubresource = region->dstSubresource,
4942 .dstOffsets = {
4943 region->dstOffset,
4944 {
4945 region->dstOffset.x + region->extent.width,
4946 region->dstOffset.y + region->extent.height,
4947 }
4948 },
4949 };
4950 return blit_shader(cmd_buffer,
4951 dst, dst->vk.format,
4952 src, src->vk.format,
4953 0, NULL,
4954 &blit_region, VK_FILTER_NEAREST, true);
4955 }
4956
4957 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)4958 v3dv_CmdResolveImage2(VkCommandBuffer commandBuffer,
4959 const VkResolveImageInfo2 *info)
4960
4961 {
4962 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4963 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
4964 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
4965
4966 /* This command can only happen outside a render pass */
4967 assert(cmd_buffer->state.pass == NULL);
4968 assert(cmd_buffer->state.job == NULL);
4969
4970 assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT);
4971 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4972
4973 /* We don't support multi-sampled multi-plane images */
4974 assert(src->plane_count == 1);
4975 assert(dst->plane_count == 1);
4976
4977 cmd_buffer->state.is_transfer = true;
4978
4979 for (uint32_t i = 0; i < info->regionCount; i++) {
4980 if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
4981 continue;
4982 if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
4983 continue;
4984 unreachable("Unsupported multismaple resolve operation");
4985 }
4986
4987 cmd_buffer->state.is_transfer = false;
4988 }
4989