1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <vulkan/vulkan.h>
29
30 #include "pvr_blit.h"
31 #include "pvr_clear.h"
32 #include "pvr_csb.h"
33 #include "pvr_formats.h"
34 #include "pvr_job_transfer.h"
35 #include "pvr_private.h"
36 #include "pvr_shader_factory.h"
37 #include "pvr_static_shaders.h"
38 #include "pvr_types.h"
39 #include "util/bitscan.h"
40 #include "util/list.h"
41 #include "util/macros.h"
42 #include "util/u_math.h"
43 #include "vk_alloc.h"
44 #include "vk_command_buffer.h"
45 #include "vk_command_pool.h"
46 #include "vk_format.h"
47 #include "vk_log.h"
48
49 /* TODO: Investigate where this limit comes from. */
50 #define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U
51
52 static struct pvr_transfer_cmd *
pvr_transfer_cmd_alloc(struct pvr_cmd_buffer * cmd_buffer)53 pvr_transfer_cmd_alloc(struct pvr_cmd_buffer *cmd_buffer)
54 {
55 struct pvr_transfer_cmd *transfer_cmd;
56
57 transfer_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
58 sizeof(*transfer_cmd),
59 8U,
60 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
61 if (!transfer_cmd) {
62 vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
63 return NULL;
64 }
65
66 /* transfer_cmd->mapping_count is already set to zero. */
67 transfer_cmd->sources[0].filter = PVR_FILTER_POINT;
68 transfer_cmd->sources[0].resolve_op = PVR_RESOLVE_BLEND;
69 transfer_cmd->sources[0].addr_mode = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
70 transfer_cmd->cmd_buffer = cmd_buffer;
71
72 return transfer_cmd;
73 }
74
pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,pvr_dev_addr_t dev_addr,VkDeviceSize offset,VkFormat vk_format,VkFormat image_format,uint32_t width,uint32_t height,uint32_t stride)75 static void pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface *surface,
76 VkRect2D *rect,
77 pvr_dev_addr_t dev_addr,
78 VkDeviceSize offset,
79 VkFormat vk_format,
80 VkFormat image_format,
81 uint32_t width,
82 uint32_t height,
83 uint32_t stride)
84 {
85 enum pipe_format pformat = vk_format_to_pipe_format(image_format);
86
87 surface->dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, offset);
88 surface->width = width;
89 surface->height = height;
90 surface->stride = stride;
91 surface->vk_format = vk_format;
92 surface->mem_layout = PVR_MEMLAYOUT_LINEAR;
93 surface->sample_count = 1;
94
95 /* Initialize rectangle extent. Also, rectangle.offset should be set to
96 * zero, as the offset is already adjusted in the device address above. We
97 * don't explicitly set offset to zero as transfer_cmd is zero allocated.
98 */
99 rect->extent.width = width;
100 rect->extent.height = height;
101
102 if (util_format_is_compressed(pformat)) {
103 uint32_t block_width = util_format_get_blockwidth(pformat);
104 uint32_t block_height = util_format_get_blockheight(pformat);
105
106 surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
107 surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
108 surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
109
110 rect->offset.x /= block_width;
111 rect->offset.y /= block_height;
112 rect->extent.width =
113 MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
114 rect->extent.height =
115 MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
116 }
117 }
118
pvr_get_raw_copy_format(VkFormat format)119 VkFormat pvr_get_raw_copy_format(VkFormat format)
120 {
121 switch (vk_format_get_blocksize(format)) {
122 case 1:
123 return VK_FORMAT_R8_UINT;
124 case 2:
125 return VK_FORMAT_R8G8_UINT;
126 case 3:
127 return VK_FORMAT_R8G8B8_UINT;
128 case 4:
129 return VK_FORMAT_R32_UINT;
130 case 6:
131 return VK_FORMAT_R16G16B16_UINT;
132 case 8:
133 return VK_FORMAT_R32G32_UINT;
134 case 12:
135 return VK_FORMAT_R32G32B32_UINT;
136 case 16:
137 return VK_FORMAT_R32G32B32A32_UINT;
138 default:
139 unreachable("Unhandled copy block size.");
140 }
141 }
142
pvr_setup_transfer_surface(struct pvr_device * device,struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,const struct pvr_image * image,uint32_t array_layer,uint32_t mip_level,const VkOffset3D * offset,const VkExtent3D * extent,float fdepth,VkFormat format,VkImageAspectFlags aspect_mask)143 static void pvr_setup_transfer_surface(struct pvr_device *device,
144 struct pvr_transfer_cmd_surface *surface,
145 VkRect2D *rect,
146 const struct pvr_image *image,
147 uint32_t array_layer,
148 uint32_t mip_level,
149 const VkOffset3D *offset,
150 const VkExtent3D *extent,
151 float fdepth,
152 VkFormat format,
153 VkImageAspectFlags aspect_mask)
154 {
155 const uint32_t height = MAX2(image->vk.extent.height >> mip_level, 1U);
156 const uint32_t width = MAX2(image->vk.extent.width >> mip_level, 1U);
157 enum pipe_format image_pformat = vk_format_to_pipe_format(image->vk.format);
158 enum pipe_format pformat = vk_format_to_pipe_format(format);
159 const VkImageSubresource sub_resource = {
160 .aspectMask = aspect_mask,
161 .mipLevel = mip_level,
162 .arrayLayer = array_layer,
163 };
164 VkSubresourceLayout info;
165 uint32_t depth;
166
167 if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
168 depth = MAX2(image->vk.extent.depth >> mip_level, 1U);
169 else
170 depth = 1U;
171
172 pvr_get_image_subresource_layout(image, &sub_resource, &info);
173
174 surface->dev_addr = PVR_DEV_ADDR_OFFSET(image->dev_addr, info.offset);
175 surface->width = width;
176 surface->height = height;
177 surface->depth = depth;
178
179 assert(info.rowPitch % vk_format_get_blocksize(format) == 0);
180 surface->stride = info.rowPitch / vk_format_get_blocksize(format);
181
182 surface->vk_format = format;
183 surface->mem_layout = image->memlayout;
184 surface->sample_count = image->vk.samples;
185
186 if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
187 surface->z_position = fdepth;
188 else
189 surface->dev_addr.addr += info.depthPitch * ((uint32_t)fdepth);
190
191 rect->offset.x = offset->x;
192 rect->offset.y = offset->y;
193 rect->extent.width = extent->width;
194 rect->extent.height = extent->height;
195
196 if (util_format_is_compressed(image_pformat) &&
197 !util_format_is_compressed(pformat)) {
198 uint32_t block_width = util_format_get_blockwidth(image_pformat);
199 uint32_t block_height = util_format_get_blockheight(image_pformat);
200
201 surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
202 surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
203 surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
204
205 rect->offset.x /= block_width;
206 rect->offset.y /= block_height;
207 rect->extent.width =
208 MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
209 rect->extent.height =
210 MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
211 }
212 }
213
pvr_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)214 void pvr_CmdBlitImage2(VkCommandBuffer commandBuffer,
215 const VkBlitImageInfo2 *pBlitImageInfo)
216 {
217 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
218 PVR_FROM_HANDLE(pvr_image, src, pBlitImageInfo->srcImage);
219 PVR_FROM_HANDLE(pvr_image, dst, pBlitImageInfo->dstImage);
220 struct pvr_device *device = cmd_buffer->device;
221 enum pvr_filter filter = PVR_FILTER_DONTCARE;
222
223 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
224
225 if (pBlitImageInfo->filter == VK_FILTER_LINEAR)
226 filter = PVR_FILTER_LINEAR;
227
228 for (uint32_t i = 0U; i < pBlitImageInfo->regionCount; i++) {
229 const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
230
231 assert(region->srcSubresource.layerCount ==
232 region->dstSubresource.layerCount);
233 const bool inverted_dst_z =
234 (region->dstOffsets[1].z < region->dstOffsets[0].z);
235 const bool inverted_src_z =
236 (region->srcOffsets[1].z < region->srcOffsets[0].z);
237 const uint32_t min_src_z = inverted_src_z ? region->srcOffsets[1].z
238 : region->srcOffsets[0].z;
239 const uint32_t max_src_z = inverted_src_z ? region->srcOffsets[0].z
240 : region->srcOffsets[1].z;
241 const uint32_t min_dst_z = inverted_dst_z ? region->dstOffsets[1].z
242 : region->dstOffsets[0].z;
243 const uint32_t max_dst_z = inverted_dst_z ? region->dstOffsets[0].z
244 : region->dstOffsets[1].z;
245
246 const uint32_t src_width =
247 region->srcOffsets[1].x - region->srcOffsets[0].x;
248 const uint32_t src_height =
249 region->srcOffsets[1].y - region->srcOffsets[0].y;
250 uint32_t dst_width;
251 uint32_t dst_height;
252
253 float initial_depth_offset;
254 VkExtent3D src_extent;
255 VkExtent3D dst_extent;
256 VkOffset3D dst_offset = region->dstOffsets[0];
257 float z_slice_stride;
258 bool flip_x;
259 bool flip_y;
260
261 if (region->dstOffsets[1].x > region->dstOffsets[0].x) {
262 dst_width = region->dstOffsets[1].x - region->dstOffsets[0].x;
263 flip_x = false;
264 } else {
265 dst_width = region->dstOffsets[0].x - region->dstOffsets[1].x;
266 flip_x = true;
267 dst_offset.x = region->dstOffsets[1].x;
268 }
269
270 if (region->dstOffsets[1].y > region->dstOffsets[0].y) {
271 dst_height = region->dstOffsets[1].y - region->dstOffsets[0].y;
272 flip_y = false;
273 } else {
274 dst_height = region->dstOffsets[0].y - region->dstOffsets[1].y;
275 flip_y = true;
276 dst_offset.y = region->dstOffsets[1].y;
277 }
278
279 /* If any of the extent regions is zero, then reject the blit and
280 * continue.
281 */
282 if (!src_width || !src_height || !dst_width || !dst_height ||
283 !(max_dst_z - min_dst_z) || !(max_src_z - min_src_z)) {
284 mesa_loge("BlitImage: Region %i has an area of zero", i);
285 continue;
286 }
287
288 src_extent = (VkExtent3D){
289 .width = src_width,
290 .height = src_height,
291 .depth = 0U,
292 };
293
294 dst_extent = (VkExtent3D){
295 .width = dst_width,
296 .height = dst_height,
297 .depth = 0U,
298 };
299
300 /* The z_position of a transfer surface is intended to be in the range
301 * of 0.0f <= z_position <= depth. It will be used as a texture coordinate
302 * in the source surface for cases where linear filtering is enabled, so
303 * the fractional part will need to represent the exact midpoint of a z
304 * slice range in the source texture, as it maps to each destination
305 * slice.
306 *
307 * For destination surfaces, the fractional part is discarded, so
308 * we can safely pass the slice index.
309 */
310
311 /* Calculate the ratio of z slices in our source region to that of our
312 * destination region, to get the number of z slices in our source region
313 * to iterate over for each destination slice.
314 *
315 * If our destination region is inverted, we iterate backwards.
316 */
317 z_slice_stride =
318 (inverted_dst_z ? -1.0f : 1.0f) *
319 ((float)(max_src_z - min_src_z) / (float)(max_dst_z - min_dst_z));
320
321 /* Offset the initial depth offset by half of the z slice stride, into the
322 * blit region's z range.
323 */
324 initial_depth_offset =
325 (inverted_dst_z ? max_src_z : min_src_z) + (0.5f * z_slice_stride);
326
327 for (uint32_t j = 0U; j < region->srcSubresource.layerCount; j++) {
328 struct pvr_transfer_cmd_surface src_surface = { 0 };
329 struct pvr_transfer_cmd_surface dst_surface = { 0 };
330 VkRect2D src_rect;
331 VkRect2D dst_rect;
332
333 /* Get the subresource info for the src and dst images, this is
334 * required when incrementing the address of the depth slice used by
335 * the transfer surface.
336 */
337 VkSubresourceLayout src_info, dst_info;
338 const VkImageSubresource src_sub_resource = {
339 .aspectMask = region->srcSubresource.aspectMask,
340 .mipLevel = region->srcSubresource.mipLevel,
341 .arrayLayer = region->srcSubresource.baseArrayLayer + j,
342 };
343 const VkImageSubresource dst_sub_resource = {
344 .aspectMask = region->dstSubresource.aspectMask,
345 .mipLevel = region->dstSubresource.mipLevel,
346 .arrayLayer = region->dstSubresource.baseArrayLayer + j,
347 };
348
349 pvr_get_image_subresource_layout(src, &src_sub_resource, &src_info);
350 pvr_get_image_subresource_layout(dst, &dst_sub_resource, &dst_info);
351
352 /* Setup the transfer surfaces once per image layer, which saves us
353 * from repeating subresource queries by manually incrementing the
354 * depth slices.
355 */
356 pvr_setup_transfer_surface(device,
357 &src_surface,
358 &src_rect,
359 src,
360 region->srcSubresource.baseArrayLayer + j,
361 region->srcSubresource.mipLevel,
362 ®ion->srcOffsets[0],
363 &src_extent,
364 initial_depth_offset,
365 src->vk.format,
366 region->srcSubresource.aspectMask);
367
368 pvr_setup_transfer_surface(device,
369 &dst_surface,
370 &dst_rect,
371 dst,
372 region->dstSubresource.baseArrayLayer + j,
373 region->dstSubresource.mipLevel,
374 &dst_offset,
375 &dst_extent,
376 min_dst_z,
377 dst->vk.format,
378 region->dstSubresource.aspectMask);
379
380 for (uint32_t dst_z = min_dst_z; dst_z < max_dst_z; dst_z++) {
381 struct pvr_transfer_cmd *transfer_cmd;
382 VkResult result;
383
384 /* TODO: See if we can allocate all the transfer cmds in one go. */
385 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
386 if (!transfer_cmd)
387 return;
388
389 transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
390 transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
391 transfer_cmd->sources[0].mappings[0].flip_x = flip_x;
392 transfer_cmd->sources[0].mappings[0].flip_y = flip_y;
393 transfer_cmd->sources[0].mapping_count++;
394
395 transfer_cmd->sources[0].surface = src_surface;
396 transfer_cmd->sources[0].filter = filter;
397 transfer_cmd->source_count = 1;
398
399 transfer_cmd->dst = dst_surface;
400 transfer_cmd->scissor = dst_rect;
401
402 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
403 if (result != VK_SUCCESS) {
404 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
405 return;
406 }
407
408 if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
409 src_surface.z_position += z_slice_stride;
410 } else {
411 src_surface.dev_addr.addr +=
412 src_info.depthPitch * ((uint32_t)z_slice_stride);
413 }
414
415 if (dst_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
416 dst_surface.z_position += 1.0f;
417 else
418 dst_surface.dev_addr.addr += dst_info.depthPitch;
419 }
420 }
421 }
422 }
423
pvr_get_copy_format(VkFormat format)424 static VkFormat pvr_get_copy_format(VkFormat format)
425 {
426 switch (format) {
427 case VK_FORMAT_R8_SNORM:
428 return VK_FORMAT_R8_SINT;
429 case VK_FORMAT_R8G8_SNORM:
430 return VK_FORMAT_R8G8_SINT;
431 case VK_FORMAT_R8G8B8_SNORM:
432 return VK_FORMAT_R8G8B8_SINT;
433 case VK_FORMAT_R8G8B8A8_SNORM:
434 return VK_FORMAT_R8G8B8A8_SINT;
435 case VK_FORMAT_B8G8R8A8_SNORM:
436 return VK_FORMAT_B8G8R8A8_SINT;
437 default:
438 return format;
439 }
440 }
441
442 static void
pvr_setup_surface_for_image(struct pvr_device * device,struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,const struct pvr_image * image,uint32_t array_layer,uint32_t array_offset,uint32_t mip_level,const VkOffset3D * offset,const VkExtent3D * extent,uint32_t depth,VkFormat format,const VkImageAspectFlags aspect_mask)443 pvr_setup_surface_for_image(struct pvr_device *device,
444 struct pvr_transfer_cmd_surface *surface,
445 VkRect2D *rect,
446 const struct pvr_image *image,
447 uint32_t array_layer,
448 uint32_t array_offset,
449 uint32_t mip_level,
450 const VkOffset3D *offset,
451 const VkExtent3D *extent,
452 uint32_t depth,
453 VkFormat format,
454 const VkImageAspectFlags aspect_mask)
455 {
456 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
457 pvr_setup_transfer_surface(device,
458 surface,
459 rect,
460 image,
461 array_layer + array_offset,
462 mip_level,
463 offset,
464 extent,
465 0.0f,
466 format,
467 aspect_mask);
468 } else {
469 pvr_setup_transfer_surface(device,
470 surface,
471 rect,
472 image,
473 array_layer,
474 mip_level,
475 offset,
476 extent,
477 (float)depth,
478 format,
479 aspect_mask);
480 }
481 }
482
483 static VkResult
pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer * cmd_buffer,enum pvr_resolve_op resolve_op,const struct pvr_image * src,const struct pvr_image * dst,const VkImageCopy2 * region)484 pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer *cmd_buffer,
485 enum pvr_resolve_op resolve_op,
486 const struct pvr_image *src,
487 const struct pvr_image *dst,
488 const VkImageCopy2 *region)
489 {
490 enum pipe_format src_pformat = vk_format_to_pipe_format(src->vk.format);
491 enum pipe_format dst_pformat = vk_format_to_pipe_format(dst->vk.format);
492 bool src_block_compressed = util_format_is_compressed(src_pformat);
493 bool dst_block_compressed = util_format_is_compressed(dst_pformat);
494 VkExtent3D src_extent;
495 VkExtent3D dst_extent;
496 VkFormat dst_format;
497 VkFormat src_format;
498 uint32_t dst_layers;
499 uint32_t src_layers;
500 uint32_t max_slices;
501 uint32_t flags = 0U;
502
503 if (src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
504 region->srcSubresource.aspectMask !=
505 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
506 /* Takes the stencil of the source and the depth of the destination and
507 * combines the two interleaved.
508 */
509 flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
510
511 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
512 /* Takes the depth of the source and the stencil of the destination and
513 * combines the two interleaved.
514 */
515 flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
516 }
517 }
518
519 src_extent = region->extent;
520 dst_extent = region->extent;
521
522 if (src_block_compressed && !dst_block_compressed) {
523 uint32_t block_width = util_format_get_blockwidth(src_pformat);
524 uint32_t block_height = util_format_get_blockheight(src_pformat);
525
526 dst_extent.width = MAX2(1U, DIV_ROUND_UP(src_extent.width, block_width));
527 dst_extent.height =
528 MAX2(1U, DIV_ROUND_UP(src_extent.height, block_height));
529 } else if (!src_block_compressed && dst_block_compressed) {
530 uint32_t block_width = util_format_get_blockwidth(dst_pformat);
531 uint32_t block_height = util_format_get_blockheight(dst_pformat);
532
533 dst_extent.width = MAX2(1U, src_extent.width * block_width);
534 dst_extent.height = MAX2(1U, src_extent.height * block_height);
535 }
536
537 /* We don't care what format dst is as it's guaranteed to be size compatible
538 * with src.
539 */
540 dst_format = pvr_get_raw_copy_format(src->vk.format);
541 src_format = dst_format;
542
543 src_layers =
544 vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource);
545 dst_layers =
546 vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource);
547
548 /* srcSubresource.layerCount must match layerCount of dstSubresource in
549 * copies not involving 3D images. In copies involving 3D images, if there is
550 * a 2D image it's layerCount.
551 */
552 max_slices = MAX3(src_layers, dst_layers, region->extent.depth);
553
554 for (uint32_t i = 0U; i < max_slices; i++) {
555 struct pvr_transfer_cmd *transfer_cmd;
556 VkResult result;
557
558 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
559 if (!transfer_cmd)
560 return VK_ERROR_OUT_OF_HOST_MEMORY;
561
562 transfer_cmd->flags |= flags;
563 transfer_cmd->sources[0].resolve_op = resolve_op;
564
565 pvr_setup_surface_for_image(
566 cmd_buffer->device,
567 &transfer_cmd->sources[0].surface,
568 &transfer_cmd->sources[0].mappings[0U].src_rect,
569 src,
570 region->srcSubresource.baseArrayLayer,
571 i,
572 region->srcSubresource.mipLevel,
573 ®ion->srcOffset,
574 &src_extent,
575 region->srcOffset.z + i,
576 src_format,
577 region->srcSubresource.aspectMask);
578
579 pvr_setup_surface_for_image(cmd_buffer->device,
580 &transfer_cmd->dst,
581 &transfer_cmd->scissor,
582 dst,
583 region->dstSubresource.baseArrayLayer,
584 i,
585 region->dstSubresource.mipLevel,
586 ®ion->dstOffset,
587 &dst_extent,
588 region->dstOffset.z + i,
589 dst_format,
590 region->dstSubresource.aspectMask);
591
592 transfer_cmd->sources[0].mappings[0U].dst_rect = transfer_cmd->scissor;
593 transfer_cmd->sources[0].mapping_count++;
594 transfer_cmd->source_count = 1;
595
596 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
597 if (result != VK_SUCCESS) {
598 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
599 return result;
600 }
601 }
602
603 return VK_SUCCESS;
604 }
605
606 VkResult
pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_image * src,const struct pvr_image * dst,const VkImageCopy2 * region)607 pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer *cmd_buffer,
608 const struct pvr_image *src,
609 const struct pvr_image *dst,
610 const VkImageCopy2 *region)
611 {
612 enum pvr_resolve_op resolve_op = PVR_RESOLVE_BLEND;
613
614 if (src->vk.samples > 1U && dst->vk.samples < 2U) {
615 /* Integer resolve picks a single sample. */
616 if (vk_format_is_int(src->vk.format))
617 resolve_op = PVR_RESOLVE_SAMPLE0;
618 }
619
620 return pvr_copy_or_resolve_image_region(cmd_buffer,
621 resolve_op,
622 src,
623 dst,
624 region);
625 }
626
pvr_can_merge_ds_regions(const VkImageCopy2 * pRegionA,const VkImageCopy2 * pRegionB)627 static bool pvr_can_merge_ds_regions(const VkImageCopy2 *pRegionA,
628 const VkImageCopy2 *pRegionB)
629 {
630 assert(pRegionA->srcSubresource.aspectMask != 0U);
631 assert(pRegionB->srcSubresource.aspectMask != 0U);
632
633 if (!((pRegionA->srcSubresource.aspectMask ^
634 pRegionB->srcSubresource.aspectMask) &
635 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
636 return false;
637 }
638
639 /* Assert if aspectMask mismatch between src and dst, given it's a depth and
640 * stencil image so not multi-planar and from the Vulkan 1.0.223 spec:
641 *
642 * If neither srcImage nor dstImage has a multi-planar image format then
643 * for each element of pRegions, srcSubresource.aspectMask and
644 * dstSubresource.aspectMask must match.
645 */
646 assert(pRegionA->srcSubresource.aspectMask ==
647 pRegionA->dstSubresource.aspectMask);
648 assert(pRegionB->srcSubresource.aspectMask ==
649 pRegionB->dstSubresource.aspectMask);
650
651 if (!(pRegionA->srcSubresource.mipLevel ==
652 pRegionB->srcSubresource.mipLevel &&
653 pRegionA->srcSubresource.baseArrayLayer ==
654 pRegionB->srcSubresource.baseArrayLayer &&
655 pRegionA->srcSubresource.layerCount ==
656 pRegionB->srcSubresource.layerCount)) {
657 return false;
658 }
659
660 if (!(pRegionA->dstSubresource.mipLevel ==
661 pRegionB->dstSubresource.mipLevel &&
662 pRegionA->dstSubresource.baseArrayLayer ==
663 pRegionB->dstSubresource.baseArrayLayer &&
664 pRegionA->dstSubresource.layerCount ==
665 pRegionB->dstSubresource.layerCount)) {
666 return false;
667 }
668
669 if (!(pRegionA->srcOffset.x == pRegionB->srcOffset.x &&
670 pRegionA->srcOffset.y == pRegionB->srcOffset.y &&
671 pRegionA->srcOffset.z == pRegionB->srcOffset.z)) {
672 return false;
673 }
674
675 if (!(pRegionA->dstOffset.x == pRegionB->dstOffset.x &&
676 pRegionA->dstOffset.y == pRegionB->dstOffset.y &&
677 pRegionA->dstOffset.z == pRegionB->dstOffset.z)) {
678 return false;
679 }
680
681 if (!(pRegionA->extent.width == pRegionB->extent.width &&
682 pRegionA->extent.height == pRegionB->extent.height &&
683 pRegionA->extent.depth == pRegionB->extent.depth)) {
684 return false;
685 }
686
687 return true;
688 }
689
pvr_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)690 void pvr_CmdCopyImage2(VkCommandBuffer commandBuffer,
691 const VkCopyImageInfo2 *pCopyImageInfo)
692 {
693 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
694 PVR_FROM_HANDLE(pvr_image, src, pCopyImageInfo->srcImage);
695 PVR_FROM_HANDLE(pvr_image, dst, pCopyImageInfo->dstImage);
696
697 const bool can_merge_ds = src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
698 dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT;
699
700 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
701
702 for (uint32_t i = 0U; i < pCopyImageInfo->regionCount; i++) {
703 VkResult result;
704
705 /* If an application has split a copy between D24S8 images into two
706 * separate copy regions (one for the depth aspect and one for the
707 * stencil aspect) attempt to merge the two regions back into one blit.
708 *
709 * This can only be merged if both regions are identical apart from the
710 * aspectMask, one of which has to be depth and the other has to be
711 * stencil.
712 *
713 * Only attempt to merge consecutive regions, ignore the case of merging
714 * non-consecutive regions.
715 */
716 if (can_merge_ds && i != (pCopyImageInfo->regionCount - 1)) {
717 const bool ret =
718 pvr_can_merge_ds_regions(&pCopyImageInfo->pRegions[i],
719 &pCopyImageInfo->pRegions[i + 1]);
720 if (ret) {
721 VkImageCopy2 region = pCopyImageInfo->pRegions[i];
722
723 region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
724 VK_IMAGE_ASPECT_STENCIL_BIT;
725 region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
726 VK_IMAGE_ASPECT_STENCIL_BIT;
727
728 result = pvr_copy_or_resolve_color_image_region(cmd_buffer,
729 src,
730 dst,
731 ®ion);
732 if (result != VK_SUCCESS)
733 return;
734
735 /* Skip the next region as it has been processed with the last
736 * region.
737 */
738 i++;
739
740 continue;
741 }
742 }
743
744 result =
745 pvr_copy_or_resolve_color_image_region(cmd_buffer,
746 src,
747 dst,
748 &pCopyImageInfo->pRegions[i]);
749 if (result != VK_SUCCESS)
750 return;
751 }
752 }
753
754 VkResult
pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer * const cmd_buffer,const pvr_dev_addr_t buffer_dev_addr,const struct pvr_image * const image,const VkBufferImageCopy2 * const region,const VkFormat src_format,const VkFormat dst_format,const uint32_t flags)755 pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer *const cmd_buffer,
756 const pvr_dev_addr_t buffer_dev_addr,
757 const struct pvr_image *const image,
758 const VkBufferImageCopy2 *const region,
759 const VkFormat src_format,
760 const VkFormat dst_format,
761 const uint32_t flags)
762 {
763 enum pipe_format pformat = vk_format_to_pipe_format(dst_format);
764 uint32_t row_length_in_texels;
765 uint32_t buffer_slice_size;
766 uint32_t buffer_layer_size;
767 uint32_t height_in_blks;
768 uint32_t row_length;
769
770 if (region->bufferRowLength == 0)
771 row_length_in_texels = region->imageExtent.width;
772 else
773 row_length_in_texels = region->bufferRowLength;
774
775 if (region->bufferImageHeight == 0)
776 height_in_blks = region->imageExtent.height;
777 else
778 height_in_blks = region->bufferImageHeight;
779
780 if (util_format_is_compressed(pformat)) {
781 uint32_t block_width = util_format_get_blockwidth(pformat);
782 uint32_t block_height = util_format_get_blockheight(pformat);
783 uint32_t block_size = util_format_get_blocksize(pformat);
784
785 height_in_blks = DIV_ROUND_UP(height_in_blks, block_height);
786 row_length_in_texels =
787 DIV_ROUND_UP(row_length_in_texels, block_width) * block_size;
788 }
789
790 row_length = row_length_in_texels * vk_format_get_blocksize(src_format);
791
792 buffer_slice_size = height_in_blks * row_length;
793 buffer_layer_size = buffer_slice_size * region->imageExtent.depth;
794
795 for (uint32_t i = 0; i < region->imageExtent.depth; i++) {
796 const uint32_t depth = i + (uint32_t)region->imageOffset.z;
797
798 for (uint32_t j = 0; j < region->imageSubresource.layerCount; j++) {
799 const VkDeviceSize buffer_offset = region->bufferOffset +
800 (j * buffer_layer_size) +
801 (i * buffer_slice_size);
802 struct pvr_transfer_cmd *transfer_cmd;
803 VkResult result;
804
805 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
806 if (!transfer_cmd)
807 return VK_ERROR_OUT_OF_HOST_MEMORY;
808
809 transfer_cmd->flags = flags;
810
811 pvr_setup_buffer_surface(
812 &transfer_cmd->sources[0].surface,
813 &transfer_cmd->sources[0].mappings[0].src_rect,
814 buffer_dev_addr,
815 buffer_offset,
816 src_format,
817 image->vk.format,
818 region->imageExtent.width,
819 region->imageExtent.height,
820 row_length_in_texels);
821
822 transfer_cmd->sources[0].surface.depth = 1;
823 transfer_cmd->source_count = 1;
824
825 pvr_setup_transfer_surface(cmd_buffer->device,
826 &transfer_cmd->dst,
827 &transfer_cmd->scissor,
828 image,
829 region->imageSubresource.baseArrayLayer + j,
830 region->imageSubresource.mipLevel,
831 ®ion->imageOffset,
832 ®ion->imageExtent,
833 depth,
834 dst_format,
835 region->imageSubresource.aspectMask);
836
837 transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
838 transfer_cmd->sources[0].mapping_count++;
839
840 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
841 if (result != VK_SUCCESS) {
842 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
843 return result;
844 }
845 }
846 }
847
848 return VK_SUCCESS;
849 }
850
851 VkResult
pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer * const cmd_buffer,const pvr_dev_addr_t buffer_dev_addr,const struct pvr_image * const image,const VkBufferImageCopy2 * const region)852 pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer *const cmd_buffer,
853 const pvr_dev_addr_t buffer_dev_addr,
854 const struct pvr_image *const image,
855 const VkBufferImageCopy2 *const region)
856 {
857 const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
858 VkFormat src_format;
859 VkFormat dst_format;
860 uint32_t flags = 0;
861
862 if (vk_format_has_depth(image->vk.format) &&
863 vk_format_has_stencil(image->vk.format)) {
864 flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
865
866 if ((aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) {
867 src_format = vk_format_stencil_only(image->vk.format);
868 } else {
869 src_format = vk_format_depth_only(image->vk.format);
870 flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
871 }
872
873 dst_format = image->vk.format;
874 } else {
875 src_format = pvr_get_raw_copy_format(image->vk.format);
876 dst_format = src_format;
877 }
878
879 return pvr_copy_buffer_to_image_region_format(cmd_buffer,
880 buffer_dev_addr,
881 image,
882 region,
883 src_format,
884 dst_format,
885 flags);
886 }
887
pvr_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)888 void pvr_CmdCopyBufferToImage2(
889 VkCommandBuffer commandBuffer,
890 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
891 {
892 PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferToImageInfo->srcBuffer);
893 PVR_FROM_HANDLE(pvr_image, dst, pCopyBufferToImageInfo->dstImage);
894 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
895
896 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
897
898 for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
899 const VkResult result =
900 pvr_copy_buffer_to_image_region(cmd_buffer,
901 src->dev_addr,
902 dst,
903 &pCopyBufferToImageInfo->pRegions[i]);
904 if (result != VK_SUCCESS)
905 return;
906 }
907 }
908
909 VkResult
pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer * const cmd_buffer,const struct pvr_image * const image,const pvr_dev_addr_t buffer_dev_addr,const VkBufferImageCopy2 * const region,const VkFormat src_format,const VkFormat dst_format)910 pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer *const cmd_buffer,
911 const struct pvr_image *const image,
912 const pvr_dev_addr_t buffer_dev_addr,
913 const VkBufferImageCopy2 *const region,
914 const VkFormat src_format,
915 const VkFormat dst_format)
916 {
917 enum pipe_format pformat = vk_format_to_pipe_format(image->vk.format);
918 struct pvr_transfer_cmd_surface dst_surface = { 0 };
919 VkImageSubresource sub_resource;
920 uint32_t buffer_image_height;
921 uint32_t buffer_row_length;
922 uint32_t buffer_slice_size;
923 uint32_t max_array_layers;
924 VkRect2D dst_rect = { 0 };
925 uint32_t max_depth_slice;
926 VkSubresourceLayout info;
927
928 /* Only images with VK_SAMPLE_COUNT_1_BIT can be copied to buffer. */
929 assert(image->vk.samples == 1);
930
931 if (region->bufferRowLength == 0)
932 buffer_row_length = region->imageExtent.width;
933 else
934 buffer_row_length = region->bufferRowLength;
935
936 if (region->bufferImageHeight == 0)
937 buffer_image_height = region->imageExtent.height;
938 else
939 buffer_image_height = region->bufferImageHeight;
940
941 max_array_layers =
942 region->imageSubresource.baseArrayLayer +
943 vk_image_subresource_layer_count(&image->vk, ®ion->imageSubresource);
944
945 buffer_slice_size = buffer_image_height * buffer_row_length *
946 vk_format_get_blocksize(dst_format);
947
948 max_depth_slice = region->imageExtent.depth + region->imageOffset.z;
949
950 pvr_setup_buffer_surface(&dst_surface,
951 &dst_rect,
952 buffer_dev_addr,
953 region->bufferOffset,
954 dst_format,
955 image->vk.format,
956 buffer_row_length,
957 buffer_image_height,
958 buffer_row_length);
959
960 dst_rect.extent.width = region->imageExtent.width;
961 dst_rect.extent.height = region->imageExtent.height;
962
963 if (util_format_is_compressed(pformat)) {
964 uint32_t block_width = util_format_get_blockwidth(pformat);
965 uint32_t block_height = util_format_get_blockheight(pformat);
966
967 dst_rect.extent.width =
968 MAX2(1U, DIV_ROUND_UP(dst_rect.extent.width, block_width));
969 dst_rect.extent.height =
970 MAX2(1U, DIV_ROUND_UP(dst_rect.extent.height, block_height));
971 }
972
973 sub_resource = (VkImageSubresource){
974 .aspectMask = region->imageSubresource.aspectMask,
975 .mipLevel = region->imageSubresource.mipLevel,
976 .arrayLayer = region->imageSubresource.baseArrayLayer,
977 };
978
979 pvr_get_image_subresource_layout(image, &sub_resource, &info);
980
981 for (uint32_t i = region->imageSubresource.baseArrayLayer;
982 i < max_array_layers;
983 i++) {
984 struct pvr_transfer_cmd_surface src_surface = { 0 };
985 VkRect2D src_rect = { 0 };
986
987 /* Note: Set the depth to the initial depth offset, the memory address (or
988 * the z_position) for the depth slice will be incremented manually in the
989 * loop below.
990 */
991 pvr_setup_transfer_surface(cmd_buffer->device,
992 &src_surface,
993 &src_rect,
994 image,
995 i,
996 region->imageSubresource.mipLevel,
997 ®ion->imageOffset,
998 ®ion->imageExtent,
999 region->imageOffset.z,
1000 src_format,
1001 region->imageSubresource.aspectMask);
1002
1003 for (uint32_t j = region->imageOffset.z; j < max_depth_slice; j++) {
1004 struct pvr_transfer_cmd *transfer_cmd;
1005 VkResult result;
1006
1007 /* TODO: See if we can allocate all the transfer cmds in one go. */
1008 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1009 if (!transfer_cmd)
1010 return vk_error(cmd_buffer->device, VK_ERROR_OUT_OF_HOST_MEMORY);
1011
1012 transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
1013 transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
1014 transfer_cmd->sources[0].mapping_count++;
1015
1016 transfer_cmd->sources[0].surface = src_surface;
1017 transfer_cmd->source_count = 1;
1018
1019 transfer_cmd->dst = dst_surface;
1020 transfer_cmd->scissor = dst_rect;
1021
1022 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1023 if (result != VK_SUCCESS) {
1024 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1025 return result;
1026 }
1027
1028 dst_surface.dev_addr.addr += buffer_slice_size;
1029
1030 if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1031 src_surface.z_position += 1.0f;
1032 else
1033 src_surface.dev_addr.addr += info.depthPitch;
1034 }
1035 }
1036
1037 return VK_SUCCESS;
1038 }
1039
1040 VkResult
pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer * const cmd_buffer,const struct pvr_image * const image,const pvr_dev_addr_t buffer_dev_addr,const VkBufferImageCopy2 * const region)1041 pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer *const cmd_buffer,
1042 const struct pvr_image *const image,
1043 const pvr_dev_addr_t buffer_dev_addr,
1044 const VkBufferImageCopy2 *const region)
1045 {
1046 const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
1047
1048 VkFormat src_format = pvr_get_copy_format(image->vk.format);
1049 VkFormat dst_format;
1050
1051 /* Color and depth aspect copies can be done using an appropriate raw format.
1052 */
1053 if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)) {
1054 src_format = pvr_get_raw_copy_format(src_format);
1055 dst_format = src_format;
1056 } else if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
1057 /* From the Vulkan spec:
1058 *
1059 * Data copied to or from the stencil aspect of any depth/stencil
1060 * format is tightly packed with one VK_FORMAT_S8_UINT value per texel.
1061 */
1062 dst_format = VK_FORMAT_S8_UINT;
1063 } else {
1064 /* YUV Planes require specific formats. */
1065 dst_format = src_format;
1066 }
1067
1068 return pvr_copy_image_to_buffer_region_format(cmd_buffer,
1069 image,
1070 buffer_dev_addr,
1071 region,
1072 src_format,
1073 dst_format);
1074 }
1075
pvr_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1076 void pvr_CmdCopyImageToBuffer2(
1077 VkCommandBuffer commandBuffer,
1078 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1079 {
1080 PVR_FROM_HANDLE(pvr_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
1081 PVR_FROM_HANDLE(pvr_image, src, pCopyImageToBufferInfo->srcImage);
1082 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1083
1084 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1085
1086 for (uint32_t i = 0U; i < pCopyImageToBufferInfo->regionCount; i++) {
1087 const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[i];
1088
1089 const VkResult result = pvr_copy_image_to_buffer_region(cmd_buffer,
1090 src,
1091 dst->dev_addr,
1092 region);
1093 if (result != VK_SUCCESS)
1094 return;
1095 }
1096 }
1097
pvr_calc_mip_level_extents(const struct pvr_image * image,uint16_t mip_level,VkExtent3D * extent_out)1098 static void pvr_calc_mip_level_extents(const struct pvr_image *image,
1099 uint16_t mip_level,
1100 VkExtent3D *extent_out)
1101 {
1102 /* 3D textures are clamped to 4x4x4. */
1103 const uint32_t clamp = (image->vk.image_type == VK_IMAGE_TYPE_3D) ? 4 : 1;
1104 const VkExtent3D *extent = &image->vk.extent;
1105
1106 extent_out->width = MAX2(extent->width >> mip_level, clamp);
1107 extent_out->height = MAX2(extent->height >> mip_level, clamp);
1108 extent_out->depth = MAX2(extent->depth >> mip_level, clamp);
1109 }
1110
pvr_clear_image_range(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_image * image,const VkClearColorValue * pColor,const VkImageSubresourceRange * psRange,uint32_t flags)1111 static VkResult pvr_clear_image_range(struct pvr_cmd_buffer *cmd_buffer,
1112 const struct pvr_image *image,
1113 const VkClearColorValue *pColor,
1114 const VkImageSubresourceRange *psRange,
1115 uint32_t flags)
1116 {
1117 const uint32_t layer_count =
1118 vk_image_subresource_layer_count(&image->vk, psRange);
1119 const uint32_t max_layers = psRange->baseArrayLayer + layer_count;
1120 VkFormat format = image->vk.format;
1121 const VkOffset3D offset = { 0 };
1122 VkExtent3D mip_extent;
1123
1124 assert((psRange->baseArrayLayer + layer_count) <= image->vk.array_layers);
1125
1126 for (uint32_t layer = psRange->baseArrayLayer; layer < max_layers; layer++) {
1127 const uint32_t level_count =
1128 vk_image_subresource_level_count(&image->vk, psRange);
1129 const uint32_t max_level = psRange->baseMipLevel + level_count;
1130
1131 assert((psRange->baseMipLevel + level_count) <= image->vk.mip_levels);
1132
1133 for (uint32_t level = psRange->baseMipLevel; level < max_level; level++) {
1134 pvr_calc_mip_level_extents(image, level, &mip_extent);
1135
1136 for (uint32_t depth = 0; depth < mip_extent.depth; depth++) {
1137 struct pvr_transfer_cmd *transfer_cmd;
1138 VkResult result;
1139
1140 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1141 if (!transfer_cmd)
1142 return VK_ERROR_OUT_OF_HOST_MEMORY;
1143
1144 transfer_cmd->flags |= flags;
1145 transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
1146
1147 for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
1148 transfer_cmd->clear_color[i].ui = pColor->uint32[i];
1149
1150 pvr_setup_transfer_surface(cmd_buffer->device,
1151 &transfer_cmd->dst,
1152 &transfer_cmd->scissor,
1153 image,
1154 layer,
1155 level,
1156 &offset,
1157 &mip_extent,
1158 depth,
1159 format,
1160 psRange->aspectMask);
1161
1162 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1163 if (result != VK_SUCCESS) {
1164 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1165 return result;
1166 }
1167 }
1168 }
1169 }
1170
1171 return VK_SUCCESS;
1172 }
1173
pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1174 void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,
1175 VkImage _image,
1176 VkImageLayout imageLayout,
1177 const VkClearColorValue *pColor,
1178 uint32_t rangeCount,
1179 const VkImageSubresourceRange *pRanges)
1180 {
1181 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1182 PVR_FROM_HANDLE(pvr_image, image, _image);
1183
1184 for (uint32_t i = 0; i < rangeCount; i++) {
1185 const VkResult result =
1186 pvr_clear_image_range(cmd_buffer, image, pColor, &pRanges[i], 0);
1187 if (result != VK_SUCCESS)
1188 return;
1189 }
1190 }
1191
pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1192 void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
1193 VkImage _image,
1194 VkImageLayout imageLayout,
1195 const VkClearDepthStencilValue *pDepthStencil,
1196 uint32_t rangeCount,
1197 const VkImageSubresourceRange *pRanges)
1198 {
1199 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1200 PVR_FROM_HANDLE(pvr_image, image, _image);
1201
1202 for (uint32_t i = 0; i < rangeCount; i++) {
1203 const VkImageAspectFlags ds_aspect = VK_IMAGE_ASPECT_DEPTH_BIT |
1204 VK_IMAGE_ASPECT_STENCIL_BIT;
1205 VkClearColorValue clear_ds = { 0 };
1206 uint32_t flags = 0U;
1207 VkResult result;
1208
1209 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
1210 pRanges[i].aspectMask != ds_aspect) {
1211 /* A depth or stencil blit to a packed_depth_stencil requires a merge
1212 * operation.
1213 */
1214 flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
1215
1216 if (pRanges[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
1217 flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
1218 }
1219
1220 clear_ds.float32[0] = pDepthStencil->depth;
1221 clear_ds.uint32[1] = pDepthStencil->stencil;
1222
1223 result =
1224 pvr_clear_image_range(cmd_buffer, image, &clear_ds, pRanges + i, flags);
1225 if (result != VK_SUCCESS)
1226 return;
1227 }
1228 }
1229
pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer * cmd_buffer,pvr_dev_addr_t src_addr,VkDeviceSize src_offset,pvr_dev_addr_t dst_addr,VkDeviceSize dst_offset,VkDeviceSize size,uint32_t fill_data,bool is_fill)1230 static VkResult pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer *cmd_buffer,
1231 pvr_dev_addr_t src_addr,
1232 VkDeviceSize src_offset,
1233 pvr_dev_addr_t dst_addr,
1234 VkDeviceSize dst_offset,
1235 VkDeviceSize size,
1236 uint32_t fill_data,
1237 bool is_fill)
1238 {
1239 VkDeviceSize offset = 0;
1240
1241 while (offset < size) {
1242 const VkDeviceSize remaining_size = size - offset;
1243 struct pvr_transfer_cmd *transfer_cmd;
1244 uint32_t texel_width;
1245 VkDeviceSize texels;
1246 VkFormat vk_format;
1247 VkResult result;
1248 uint32_t height;
1249 uint32_t width;
1250
1251 if (is_fill) {
1252 vk_format = VK_FORMAT_R32_UINT;
1253 texel_width = 4U;
1254 } else if (remaining_size >= 16U) {
1255 vk_format = VK_FORMAT_R32G32B32A32_UINT;
1256 texel_width = 16U;
1257 } else if (remaining_size >= 4U) {
1258 vk_format = VK_FORMAT_R32_UINT;
1259 texel_width = 4U;
1260 } else {
1261 vk_format = VK_FORMAT_R8_UINT;
1262 texel_width = 1U;
1263 }
1264
1265 texels = remaining_size / texel_width;
1266
1267 /* Try to do max-width rects, fall back to a 1-height rect for the
1268 * remainder.
1269 */
1270 if (texels > PVR_MAX_TRANSFER_SIZE_IN_TEXELS) {
1271 width = PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
1272 height = texels / PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
1273 height = MIN2(height, PVR_MAX_TRANSFER_SIZE_IN_TEXELS);
1274 } else {
1275 width = texels;
1276 height = 1;
1277 }
1278
1279 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1280 if (!transfer_cmd)
1281 return VK_ERROR_OUT_OF_HOST_MEMORY;
1282
1283 if (!is_fill) {
1284 pvr_setup_buffer_surface(
1285 &transfer_cmd->sources[0].surface,
1286 &transfer_cmd->sources[0].mappings[0].src_rect,
1287 src_addr,
1288 offset + src_offset,
1289 vk_format,
1290 vk_format,
1291 width,
1292 height,
1293 width);
1294 transfer_cmd->source_count = 1;
1295 } else {
1296 transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
1297
1298 for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
1299 transfer_cmd->clear_color[i].ui = fill_data;
1300 }
1301
1302 pvr_setup_buffer_surface(&transfer_cmd->dst,
1303 &transfer_cmd->scissor,
1304 dst_addr,
1305 offset + dst_offset,
1306 vk_format,
1307 vk_format,
1308 width,
1309 height,
1310 width);
1311
1312 if (transfer_cmd->source_count > 0) {
1313 transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
1314
1315 transfer_cmd->sources[0].mapping_count++;
1316 }
1317
1318 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1319 if (result != VK_SUCCESS) {
1320 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1321 return result;
1322 }
1323
1324 offset += width * height * texel_width;
1325 }
1326
1327 return VK_SUCCESS;
1328 }
1329
pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1330 void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1331 VkBuffer dstBuffer,
1332 VkDeviceSize dstOffset,
1333 VkDeviceSize dataSize,
1334 const void *pData)
1335 {
1336 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1337 PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
1338 struct pvr_suballoc_bo *pvr_bo;
1339 VkResult result;
1340
1341 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1342
1343 result = pvr_cmd_buffer_upload_general(cmd_buffer, pData, dataSize, &pvr_bo);
1344 if (result != VK_SUCCESS)
1345 return;
1346
1347 pvr_cmd_copy_buffer_region(cmd_buffer,
1348 pvr_bo->dev_addr,
1349 0,
1350 dst->dev_addr,
1351 dstOffset,
1352 dataSize,
1353 0U,
1354 false);
1355 }
1356
pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1357 void pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1358 const VkCopyBufferInfo2 *pCopyBufferInfo)
1359 {
1360 PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
1361 PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
1362 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1363
1364 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1365
1366 for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1367 const VkResult result =
1368 pvr_cmd_copy_buffer_region(cmd_buffer,
1369 src->dev_addr,
1370 pCopyBufferInfo->pRegions[i].srcOffset,
1371 dst->dev_addr,
1372 pCopyBufferInfo->pRegions[i].dstOffset,
1373 pCopyBufferInfo->pRegions[i].size,
1374 0U,
1375 false);
1376 if (result != VK_SUCCESS)
1377 return;
1378 }
1379 }
1380
pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)1381 void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,
1382 VkBuffer dstBuffer,
1383 VkDeviceSize dstOffset,
1384 VkDeviceSize fillSize,
1385 uint32_t data)
1386 {
1387 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1388 PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
1389
1390 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1391
1392 fillSize = vk_buffer_range(&dst->vk, dstOffset, fillSize);
1393
1394 /* From the Vulkan spec:
1395 *
1396 * "size is the number of bytes to fill, and must be either a multiple
1397 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1398 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1399 * buffer is not a multiple of 4, then the nearest smaller multiple is
1400 * used."
1401 */
1402 fillSize &= ~3ULL;
1403
1404 pvr_cmd_copy_buffer_region(cmd_buffer,
1405 PVR_DEV_ADDR_INVALID,
1406 0,
1407 dst->dev_addr,
1408 dstOffset,
1409 fillSize,
1410 data,
1411 true);
1412 }
1413
1414 /**
1415 * \brief Returns the maximum number of layers to clear starting from base_layer
1416 * that contain or match the target rectangle.
1417 *
1418 * \param[in] target_rect The region which the clear should contain or
1419 * match.
1420 * \param[in] base_layer The layer index to start at.
1421 * \param[in] clear_rect_count Amount of clear_rects
1422 * \param[in] clear_rects Array of clear rects.
1423 *
1424 * \return Max number of layers that cover or match the target region.
1425 */
1426 static uint32_t
pvr_get_max_layers_covering_target(VkRect2D target_rect,uint32_t base_layer,uint32_t clear_rect_count,const VkClearRect * clear_rects)1427 pvr_get_max_layers_covering_target(VkRect2D target_rect,
1428 uint32_t base_layer,
1429 uint32_t clear_rect_count,
1430 const VkClearRect *clear_rects)
1431 {
1432 const int32_t target_x0 = target_rect.offset.x;
1433 const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width;
1434 const int32_t target_y0 = target_rect.offset.y;
1435 const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height;
1436
1437 uint32_t layer_count = 0;
1438
1439 assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX);
1440 assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX);
1441
1442 for (uint32_t i = 0; i < clear_rect_count; i++) {
1443 const VkClearRect *clear_rect = &clear_rects[i];
1444 const uint32_t max_layer =
1445 clear_rect->baseArrayLayer + clear_rect->layerCount;
1446 bool target_is_covered;
1447 int32_t x0, x1;
1448 int32_t y0, y1;
1449
1450 if (clear_rect->baseArrayLayer == 0)
1451 continue;
1452
1453 assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <=
1454 UINT32_MAX);
1455
1456 /* Check for layer intersection. */
1457 if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer)
1458 continue;
1459
1460 x0 = clear_rect->rect.offset.x;
1461 x1 = x0 + (int32_t)clear_rect->rect.extent.width;
1462 y0 = clear_rect->rect.offset.y;
1463 y1 = y0 + (int32_t)clear_rect->rect.extent.height;
1464
1465 assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX);
1466 assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <=
1467 INT32_MAX);
1468
1469 target_is_covered = x0 <= target_x0 && x1 >= target_x1;
1470 target_is_covered &= y0 <= target_y0 && y1 >= target_y1;
1471
1472 if (target_is_covered)
1473 layer_count = MAX2(layer_count, max_layer - base_layer);
1474 }
1475
1476 return layer_count;
1477 }
1478
1479 /* Return true if vertex shader is required to output render target id to pick
1480 * the texture array layer.
1481 */
1482 static inline bool
pvr_clear_needs_rt_id_output(struct pvr_device_info * dev_info,uint32_t rect_count,const VkClearRect * rects)1483 pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
1484 uint32_t rect_count,
1485 const VkClearRect *rects)
1486 {
1487 if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
1488 return false;
1489
1490 for (uint32_t i = 0; i < rect_count; i++) {
1491 if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1)
1492 return true;
1493 }
1494
1495 return false;
1496 }
1497
pvr_clear_color_attachment_static_create_consts_buffer(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_shader_factory_info * shader_info,const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],ASSERTED bool uses_tile_buffer,uint32_t tile_buffer_idx,struct pvr_suballoc_bo ** const const_shareds_buffer_out)1498 static VkResult pvr_clear_color_attachment_static_create_consts_buffer(
1499 struct pvr_cmd_buffer *cmd_buffer,
1500 const struct pvr_shader_factory_info *shader_info,
1501 const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
1502 ASSERTED bool uses_tile_buffer,
1503 uint32_t tile_buffer_idx,
1504 struct pvr_suballoc_bo **const const_shareds_buffer_out)
1505 {
1506 struct pvr_device *device = cmd_buffer->device;
1507 struct pvr_suballoc_bo *const_shareds_buffer;
1508 struct pvr_bo *tile_buffer;
1509 uint64_t tile_dev_addr;
1510 uint32_t *buffer;
1511 VkResult result;
1512
1513 /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
1514 * Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
1515 */
1516 result =
1517 pvr_cmd_buffer_alloc_mem(cmd_buffer,
1518 device->heaps.general_heap,
1519 PVR_DW_TO_BYTES(shader_info->const_shared_regs),
1520 &const_shareds_buffer);
1521 if (result != VK_SUCCESS)
1522 return result;
1523
1524 buffer = pvr_bo_suballoc_get_map_addr(const_shareds_buffer);
1525
1526 for (uint32_t i = 0; i < PVR_CLEAR_ATTACHMENT_CONST_COUNT; i++) {
1527 uint32_t dest_idx = shader_info->driver_const_location_map[i];
1528
1529 if (dest_idx == PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED)
1530 continue;
1531
1532 assert(dest_idx < shader_info->const_shared_regs);
1533
1534 switch (i) {
1535 case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0:
1536 case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1:
1537 case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2:
1538 case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3:
1539 buffer[dest_idx] = clear_color[i];
1540 break;
1541
1542 case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER:
1543 assert(uses_tile_buffer);
1544 tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
1545 tile_dev_addr = tile_buffer->vma->dev_addr.addr;
1546 buffer[dest_idx] = (uint32_t)(tile_dev_addr >> 32);
1547 break;
1548
1549 case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER:
1550 assert(uses_tile_buffer);
1551 tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
1552 tile_dev_addr = tile_buffer->vma->dev_addr.addr;
1553 buffer[dest_idx] = (uint32_t)tile_dev_addr;
1554 break;
1555
1556 default:
1557 unreachable("Unsupported clear attachment const type.");
1558 }
1559 }
1560
1561 for (uint32_t i = 0; i < shader_info->num_static_const; i++) {
1562 const struct pvr_static_buffer *static_buff =
1563 &shader_info->static_const_buffer[i];
1564
1565 assert(static_buff->dst_idx < shader_info->const_shared_regs);
1566
1567 buffer[static_buff->dst_idx] = static_buff->value;
1568 }
1569
1570 *const_shareds_buffer_out = const_shareds_buffer;
1571
1572 return VK_SUCCESS;
1573 }
1574
pvr_clear_color_attachment_static(struct pvr_cmd_buffer * cmd_buffer,const struct usc_mrt_resource * mrt_resource,VkFormat format,uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],uint32_t template_idx,uint32_t stencil,bool vs_has_rt_id_output)1575 static VkResult pvr_clear_color_attachment_static(
1576 struct pvr_cmd_buffer *cmd_buffer,
1577 const struct usc_mrt_resource *mrt_resource,
1578 VkFormat format,
1579 uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
1580 uint32_t template_idx,
1581 uint32_t stencil,
1582 bool vs_has_rt_id_output)
1583 {
1584 struct pvr_device *device = cmd_buffer->device;
1585 ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1586 ASSERTED const bool has_eight_output_registers =
1587 PVR_HAS_FEATURE(dev_info, eight_output_registers);
1588 const struct pvr_device_static_clear_state *dev_clear_state =
1589 &device->static_clear_state;
1590 const bool uses_tile_buffer = mrt_resource->type ==
1591 USC_MRT_RESOURCE_TYPE_MEMORY;
1592 const struct pvr_pds_clear_attachment_program_info *clear_attachment_program;
1593 struct pvr_pds_pixel_shader_sa_program texture_program;
1594 uint32_t pds_state[PVR_STATIC_CLEAR_PDS_STATE_COUNT];
1595 const struct pvr_shader_factory_info *shader_info;
1596 struct pvr_suballoc_bo *pds_texture_program_bo;
1597 struct pvr_static_clear_ppp_template template;
1598 struct pvr_suballoc_bo *const_shareds_buffer;
1599 uint64_t pds_texture_program_addr;
1600 struct pvr_suballoc_bo *pvr_bo;
1601 uint32_t tile_buffer_idx = 0;
1602 uint32_t out_reg_count;
1603 uint32_t output_offset;
1604 uint32_t program_idx;
1605 uint32_t *buffer;
1606 VkResult result;
1607
1608 out_reg_count =
1609 DIV_ROUND_UP(pvr_get_pbe_accum_format_size_in_bytes(format), 4U);
1610
1611 if (uses_tile_buffer) {
1612 tile_buffer_idx = mrt_resource->mem.tile_buffer;
1613 output_offset = mrt_resource->mem.offset_dw;
1614 } else {
1615 output_offset = mrt_resource->reg.output_reg;
1616 }
1617
1618 assert(has_eight_output_registers || out_reg_count + output_offset <= 4);
1619
1620 program_idx = pvr_get_clear_attachment_program_index(out_reg_count,
1621 output_offset,
1622 uses_tile_buffer);
1623
1624 shader_info = clear_attachment_collection[program_idx].info;
1625
1626 result = pvr_clear_color_attachment_static_create_consts_buffer(
1627 cmd_buffer,
1628 shader_info,
1629 clear_color,
1630 uses_tile_buffer,
1631 tile_buffer_idx,
1632 &const_shareds_buffer);
1633 if (result != VK_SUCCESS)
1634 return result;
1635
1636 /* clang-format off */
1637 texture_program = (struct pvr_pds_pixel_shader_sa_program){
1638 .num_texture_dma_kicks = 1,
1639 .texture_dma_address = {
1640 [0] = const_shareds_buffer->dev_addr.addr,
1641 }
1642 };
1643 /* clang-format on */
1644
1645 pvr_csb_pack (&texture_program.texture_dma_control[0],
1646 PDSINST_DOUT_FIELDS_DOUTD_SRC1,
1647 doutd_src1) {
1648 doutd_src1.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
1649 doutd_src1.bsize = shader_info->const_shared_regs;
1650 }
1651
1652 clear_attachment_program =
1653 &dev_clear_state->pds_clear_attachment_program_info[program_idx];
1654
1655 /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
1656 * Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
1657 */
1658 result = pvr_cmd_buffer_alloc_mem(
1659 cmd_buffer,
1660 device->heaps.pds_heap,
1661 clear_attachment_program->texture_program_data_size,
1662 &pds_texture_program_bo);
1663 if (result != VK_SUCCESS) {
1664 list_del(&const_shareds_buffer->link);
1665 pvr_bo_suballoc_free(const_shareds_buffer);
1666
1667 return result;
1668 }
1669
1670 buffer = pvr_bo_suballoc_get_map_addr(pds_texture_program_bo);
1671 pds_texture_program_addr = pds_texture_program_bo->dev_addr.addr -
1672 device->heaps.pds_heap->base_addr.addr;
1673
1674 pvr_pds_generate_pixel_shader_sa_texture_state_data(
1675 &texture_program,
1676 buffer,
1677 &device->pdevice->dev_info);
1678
1679 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SHADERBASE],
1680 TA_STATE_PDS_SHADERBASE,
1681 shaderbase) {
1682 shaderbase.addr = clear_attachment_program->pixel_program_offset;
1683 }
1684
1685 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXUNICODEBASE],
1686 TA_STATE_PDS_TEXUNICODEBASE,
1687 texunicodebase) {
1688 texunicodebase.addr = clear_attachment_program->texture_program_offset;
1689 }
1690
1691 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO1],
1692 TA_STATE_PDS_SIZEINFO1,
1693 sizeinfo1) {
1694 sizeinfo1.pds_texturestatesize = DIV_ROUND_UP(
1695 clear_attachment_program->texture_program_data_size,
1696 PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE));
1697
1698 sizeinfo1.pds_tempsize =
1699 DIV_ROUND_UP(clear_attachment_program->texture_program_pds_temps_count,
1700 PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE));
1701 }
1702
1703 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO2],
1704 TA_STATE_PDS_SIZEINFO2,
1705 sizeinfo2) {
1706 sizeinfo2.usc_sharedsize =
1707 DIV_ROUND_UP(shader_info->const_shared_regs,
1708 PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
1709 }
1710
1711 /* Dummy coefficient loading program. */
1712 pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_VARYINGBASE] = 0;
1713
1714 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXTUREDATABASE],
1715 TA_STATE_PDS_TEXTUREDATABASE,
1716 texturedatabase) {
1717 texturedatabase.addr = PVR_DEV_ADDR(pds_texture_program_addr);
1718 }
1719
1720 assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
1721 template =
1722 cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
1723
1724 template.config.pds_state = &pds_state;
1725
1726 template.config.ispctl.upass =
1727 cmd_buffer->state.render_pass_info.isp_userpass;
1728
1729 if (template_idx & VK_IMAGE_ASPECT_STENCIL_BIT)
1730 template.config.ispa.sref = stencil;
1731
1732 if (vs_has_rt_id_output) {
1733 template.config.output_sel.rhw_pres = true;
1734 template.config.output_sel.render_tgt_pres = true;
1735 template.config.output_sel.vtxsize = 4 + 1;
1736 }
1737
1738 result = pvr_emit_ppp_from_template(
1739 &cmd_buffer->state.current_sub_cmd->gfx.control_stream,
1740 &template,
1741 &pvr_bo);
1742 if (result != VK_SUCCESS) {
1743 list_del(&pds_texture_program_bo->link);
1744 pvr_bo_suballoc_free(pds_texture_program_bo);
1745
1746 list_del(&const_shareds_buffer->link);
1747 pvr_bo_suballoc_free(const_shareds_buffer);
1748
1749 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
1750 }
1751
1752 list_add(&pvr_bo->link, &cmd_buffer->bo_list);
1753
1754 return VK_SUCCESS;
1755 }
1756
1757 /**
1758 * \brief Record a deferred clear operation into the command buffer.
1759 *
1760 * Devices which don't have gs_rta_support require extra handling for RTA
1761 * clears. We setup a list of deferred clear transfer commands which will be
1762 * processed at the end of the graphics sub command to account for the missing
1763 * feature.
1764 */
pvr_add_deferred_rta_clear(struct pvr_cmd_buffer * cmd_buffer,const VkClearAttachment * attachment,const VkClearRect * rect,bool is_render_init)1765 static VkResult pvr_add_deferred_rta_clear(struct pvr_cmd_buffer *cmd_buffer,
1766 const VkClearAttachment *attachment,
1767 const VkClearRect *rect,
1768 bool is_render_init)
1769 {
1770 struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
1771 struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
1772 const struct pvr_renderpass_hwsetup_render *hw_render =
1773 &pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx];
1774 struct pvr_transfer_cmd *transfer_cmd_list;
1775 const struct pvr_image_view *image_view;
1776 const struct pvr_image *image;
1777 uint32_t base_layer;
1778
1779 const VkOffset3D offset = {
1780 .x = rect->rect.offset.x,
1781 .y = rect->rect.offset.y,
1782 .z = 1,
1783 };
1784 const VkExtent3D extent = {
1785 .width = rect->rect.extent.width,
1786 .height = rect->rect.extent.height,
1787 .depth = 1,
1788 };
1789
1790 assert(
1791 !PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, gs_rta_support));
1792
1793 transfer_cmd_list = util_dynarray_grow(&cmd_buffer->deferred_clears,
1794 struct pvr_transfer_cmd,
1795 rect->layerCount);
1796 if (!transfer_cmd_list) {
1797 return vk_command_buffer_set_error(&cmd_buffer->vk,
1798 VK_ERROR_OUT_OF_HOST_MEMORY);
1799 }
1800
1801 /* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019:
1802 *
1803 * "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not
1804 * include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT"
1805 *
1806 */
1807 if (attachment->aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) {
1808 assert(attachment->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ||
1809 attachment->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT ||
1810 attachment->aspectMask ==
1811 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1812
1813 image_view = pass_info->attachments[hw_render->ds_attach_idx];
1814 } else if (is_render_init) {
1815 uint32_t index;
1816
1817 assert(attachment->colorAttachment < hw_render->color_init_count);
1818 index = hw_render->color_init[attachment->colorAttachment].index;
1819
1820 image_view = pass_info->attachments[index];
1821 } else {
1822 const struct pvr_renderpass_hwsetup_subpass *hw_pass =
1823 pvr_get_hw_subpass(pass_info->pass, pass_info->subpass_idx);
1824 const struct pvr_render_subpass *sub_pass =
1825 &pass_info->pass->subpasses[hw_pass->index];
1826 const uint32_t attachment_idx =
1827 sub_pass->color_attachments[attachment->colorAttachment];
1828
1829 assert(attachment->colorAttachment < sub_pass->color_count);
1830
1831 image_view = pass_info->attachments[attachment_idx];
1832 }
1833
1834 base_layer = image_view->vk.base_array_layer + rect->baseArrayLayer;
1835 image = vk_to_pvr_image(image_view->vk.image);
1836
1837 for (uint32_t i = 0; i < rect->layerCount; i++) {
1838 struct pvr_transfer_cmd *transfer_cmd = &transfer_cmd_list[i];
1839
1840 /* TODO: Add an init function for when we don't want to use
1841 * pvr_transfer_cmd_alloc()? And use it here.
1842 */
1843 *transfer_cmd = (struct pvr_transfer_cmd){
1844 .flags = PVR_TRANSFER_CMD_FLAGS_FILL,
1845 .cmd_buffer = cmd_buffer,
1846 .is_deferred_clear = true,
1847 };
1848
1849 if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1850 for (uint32_t j = 0; j < ARRAY_SIZE(transfer_cmd->clear_color); j++) {
1851 transfer_cmd->clear_color[j].ui =
1852 attachment->clearValue.color.uint32[j];
1853 }
1854 } else {
1855 transfer_cmd->clear_color[0].f =
1856 attachment->clearValue.depthStencil.depth;
1857 transfer_cmd->clear_color[1].ui =
1858 attachment->clearValue.depthStencil.stencil;
1859 }
1860
1861 pvr_setup_transfer_surface(cmd_buffer->device,
1862 &transfer_cmd->dst,
1863 &transfer_cmd->scissor,
1864 image,
1865 base_layer + i,
1866 0,
1867 &offset,
1868 &extent,
1869 0.0f,
1870 image->vk.format,
1871 attachment->aspectMask);
1872 }
1873
1874 return VK_SUCCESS;
1875 }
1876
pvr_clear_attachments(struct pvr_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t rect_count,const VkClearRect * rects,bool is_render_init)1877 static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
1878 uint32_t attachment_count,
1879 const VkClearAttachment *attachments,
1880 uint32_t rect_count,
1881 const VkClearRect *rects,
1882 bool is_render_init)
1883 {
1884 const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass;
1885 struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
1886 const struct pvr_renderpass_hwsetup_subpass *hw_pass =
1887 pvr_get_hw_subpass(pass, pass_info->subpass_idx);
1888 struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
1889 struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
1890 struct pvr_render_subpass *sub_pass = &pass->subpasses[hw_pass->index];
1891 uint32_t vs_output_size_in_bytes;
1892 bool vs_has_rt_id_output;
1893
1894 /* TODO: This function can be optimized so that most of the device memory
1895 * gets allocated together in one go and then filled as needed. There might
1896 * also be opportunities to reuse pds code and data segments.
1897 */
1898
1899 assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
1900
1901 pvr_reset_graphics_dirty_state(cmd_buffer, false);
1902
1903 /* We'll be emitting to the control stream. */
1904 sub_cmd->empty_cmd = false;
1905
1906 vs_has_rt_id_output =
1907 pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
1908
1909 /* 4 because we're expecting the USC to output X, Y, Z, and W. */
1910 vs_output_size_in_bytes = PVR_DW_TO_BYTES(4);
1911 if (vs_has_rt_id_output)
1912 vs_output_size_in_bytes += PVR_DW_TO_BYTES(1);
1913
1914 for (uint32_t i = 0; i < attachment_count; i++) {
1915 const VkClearAttachment *attachment = &attachments[i];
1916 struct pvr_pds_vertex_shader_program pds_program;
1917 struct pvr_pds_upload pds_program_upload = { 0 };
1918 uint64_t current_base_array_layer = ~0;
1919 VkResult result;
1920 float depth;
1921
1922 if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1923 uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
1924 const struct usc_mrt_resource *mrt_resource;
1925 uint32_t global_attachment_idx;
1926 uint32_t local_attachment_idx;
1927 VkFormat format;
1928
1929 local_attachment_idx = attachment->colorAttachment;
1930
1931 if (is_render_init) {
1932 struct pvr_renderpass_hwsetup_render *hw_render;
1933
1934 assert(pass->hw_setup->render_count > 0);
1935 hw_render = &pass->hw_setup->renders[0];
1936
1937 mrt_resource =
1938 &hw_render->init_setup.mrt_resources[local_attachment_idx];
1939
1940 assert(local_attachment_idx < hw_render->color_init_count);
1941 global_attachment_idx =
1942 hw_render->color_init[local_attachment_idx].index;
1943 } else {
1944 mrt_resource = &hw_pass->setup.mrt_resources[local_attachment_idx];
1945
1946 assert(local_attachment_idx < sub_pass->color_count);
1947 global_attachment_idx =
1948 sub_pass->color_attachments[local_attachment_idx];
1949 }
1950
1951 if (global_attachment_idx == VK_ATTACHMENT_UNUSED)
1952 continue;
1953
1954 assert(global_attachment_idx < pass->attachment_count);
1955 format = pass->attachments[global_attachment_idx].vk_format;
1956
1957 assert(format != VK_FORMAT_UNDEFINED);
1958
1959 pvr_get_hw_clear_color(format,
1960 attachment->clearValue.color,
1961 packed_clear_color);
1962
1963 result = pvr_clear_color_attachment_static(cmd_buffer,
1964 mrt_resource,
1965 format,
1966 packed_clear_color,
1967 VK_IMAGE_ASPECT_COLOR_BIT,
1968 0,
1969 vs_has_rt_id_output);
1970 if (result != VK_SUCCESS)
1971 return;
1972 } else if (hw_pass->z_replicate != -1 &&
1973 attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1974 const VkClearColorValue clear_color = {
1975 .float32 = { [0] = attachment->clearValue.depthStencil.depth, },
1976 };
1977 const uint32_t template_idx = attachment->aspectMask |
1978 VK_IMAGE_ASPECT_COLOR_BIT;
1979 const uint32_t stencil = attachment->clearValue.depthStencil.stencil;
1980 uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
1981 const struct usc_mrt_resource *mrt_resource;
1982
1983 mrt_resource = &hw_pass->setup.mrt_resources[hw_pass->z_replicate];
1984
1985 pvr_get_hw_clear_color(VK_FORMAT_R32_SFLOAT,
1986 clear_color,
1987 packed_clear_color);
1988
1989 result = pvr_clear_color_attachment_static(cmd_buffer,
1990 mrt_resource,
1991 VK_FORMAT_R32_SFLOAT,
1992 packed_clear_color,
1993 template_idx,
1994 stencil,
1995 vs_has_rt_id_output);
1996 if (result != VK_SUCCESS)
1997 return;
1998 } else {
1999 const uint32_t template_idx = attachment->aspectMask;
2000 struct pvr_static_clear_ppp_template template;
2001 struct pvr_suballoc_bo *pvr_bo;
2002
2003 assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
2004 template =
2005 cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
2006
2007 if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2008 template.config.ispa.sref =
2009 attachment->clearValue.depthStencil.stencil;
2010 }
2011
2012 if (vs_has_rt_id_output) {
2013 template.config.output_sel.rhw_pres = true;
2014 template.config.output_sel.render_tgt_pres = true;
2015 template.config.output_sel.vtxsize = 4 + 1;
2016 }
2017
2018 result = pvr_emit_ppp_from_template(&sub_cmd->control_stream,
2019 &template,
2020 &pvr_bo);
2021 if (result != VK_SUCCESS) {
2022 pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2023 return;
2024 }
2025
2026 list_add(&pvr_bo->link, &cmd_buffer->bo_list);
2027 }
2028
2029 if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2030 depth = attachment->clearValue.depthStencil.depth;
2031 else
2032 depth = 1.0f;
2033
2034 if (vs_has_rt_id_output) {
2035 const struct pvr_device_static_clear_state *dev_clear_state =
2036 &cmd_buffer->device->static_clear_state;
2037 const struct pvr_suballoc_bo *multi_layer_vert_bo =
2038 dev_clear_state->usc_multi_layer_vertex_shader_bo;
2039
2040 /* We can't use the device's passthrough pds program since it doesn't
2041 * have iterate_instance_id enabled. We'll be uploading code sections
2042 * per each clear rect.
2043 */
2044
2045 /* TODO: See if we can allocate all the code section memory in one go.
2046 * We'd need to make sure that changing instance_id_modifier doesn't
2047 * change the code section size.
2048 * Also check if we can reuse the same code segment for each rect.
2049 * Seems like the instance_id_modifier is written into the data section
2050 * and used by the pds ADD instruction that way instead of it being
2051 * embedded into the code section.
2052 */
2053
2054 pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program,
2055 multi_layer_vert_bo);
2056 } else {
2057 /* We can reuse the device's code section but we'll need to upload data
2058 * sections so initialize the program.
2059 */
2060 pvr_pds_clear_vertex_shader_program_init_base(
2061 &pds_program,
2062 cmd_buffer->device->static_clear_state.usc_vertex_shader_bo);
2063
2064 pds_program_upload.code_offset =
2065 cmd_buffer->device->static_clear_state.pds.code_offset;
2066 /* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe
2067 * let's change its interface to make that clear and not set this?
2068 */
2069 pds_program_upload.code_size =
2070 cmd_buffer->device->static_clear_state.pds.code_size;
2071 }
2072
2073 for (uint32_t j = 0; j < rect_count; j++) {
2074 struct pvr_pds_upload pds_program_data_upload;
2075 const VkClearRect *clear_rect = &rects[j];
2076 struct pvr_suballoc_bo *vertices_bo;
2077 uint32_t vdm_cs_size_in_dw;
2078 uint32_t *vdm_cs_buffer;
2079 VkResult result;
2080
2081 if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) &&
2082 (clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) {
2083 result = pvr_add_deferred_rta_clear(cmd_buffer,
2084 attachment,
2085 clear_rect,
2086 is_render_init);
2087 if (result != VK_SUCCESS)
2088 return;
2089
2090 if (clear_rect->baseArrayLayer != 0)
2091 continue;
2092 }
2093
2094 /* TODO: Allocate all the buffers in one go before the loop, and add
2095 * support to multi-alloc bo.
2096 */
2097 result = pvr_clear_vertices_upload(cmd_buffer->device,
2098 &clear_rect->rect,
2099 depth,
2100 &vertices_bo);
2101 if (result != VK_SUCCESS) {
2102 pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2103 return;
2104 }
2105
2106 list_add(&vertices_bo->link, &cmd_buffer->bo_list);
2107
2108 if (vs_has_rt_id_output) {
2109 if (current_base_array_layer != clear_rect->baseArrayLayer) {
2110 const uint32_t base_array_layer = clear_rect->baseArrayLayer;
2111 struct pvr_pds_upload pds_program_code_upload;
2112
2113 result =
2114 pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
2115 &pds_program,
2116 cmd_buffer,
2117 base_array_layer,
2118 &pds_program_code_upload);
2119 if (result != VK_SUCCESS) {
2120 pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2121 return;
2122 }
2123
2124 pds_program_upload.code_offset =
2125 pds_program_code_upload.code_offset;
2126 /* TODO: The code size doesn't get used by pvr_clear_vdm_state()
2127 * maybe let's change its interface to make that clear and not
2128 * set this?
2129 */
2130 pds_program_upload.code_size = pds_program_code_upload.code_size;
2131
2132 current_base_array_layer = base_array_layer;
2133 }
2134
2135 result =
2136 pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data(
2137 &pds_program,
2138 cmd_buffer,
2139 vertices_bo,
2140 &pds_program_data_upload);
2141 if (result != VK_SUCCESS)
2142 return;
2143 } else {
2144 result = pvr_pds_clear_vertex_shader_program_create_and_upload_data(
2145 &pds_program,
2146 cmd_buffer,
2147 vertices_bo,
2148 &pds_program_data_upload);
2149 if (result != VK_SUCCESS)
2150 return;
2151 }
2152
2153 pds_program_upload.data_offset = pds_program_data_upload.data_offset;
2154 pds_program_upload.data_size = pds_program_data_upload.data_size;
2155
2156 vdm_cs_size_in_dw =
2157 pvr_clear_vdm_state_get_size_in_dw(dev_info,
2158 clear_rect->layerCount);
2159
2160 pvr_csb_set_relocation_mark(&sub_cmd->control_stream);
2161
2162 vdm_cs_buffer =
2163 pvr_csb_alloc_dwords(&sub_cmd->control_stream, vdm_cs_size_in_dw);
2164 if (!vdm_cs_buffer) {
2165 pvr_cmd_buffer_set_error_unwarned(cmd_buffer,
2166 sub_cmd->control_stream.status);
2167 return;
2168 }
2169
2170 pvr_pack_clear_vdm_state(dev_info,
2171 &pds_program_upload,
2172 pds_program.temps_used,
2173 4,
2174 vs_output_size_in_bytes,
2175 clear_rect->layerCount,
2176 vdm_cs_buffer);
2177
2178 pvr_csb_clear_relocation_mark(&sub_cmd->control_stream);
2179 }
2180 }
2181 }
2182
pvr_clear_attachments_render_init(struct pvr_cmd_buffer * cmd_buffer,const VkClearAttachment * attachment,const VkClearRect * rect)2183 void pvr_clear_attachments_render_init(struct pvr_cmd_buffer *cmd_buffer,
2184 const VkClearAttachment *attachment,
2185 const VkClearRect *rect)
2186 {
2187 pvr_clear_attachments(cmd_buffer, 1, attachment, 1, rect, true);
2188 }
2189
pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2190 void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
2191 uint32_t attachmentCount,
2192 const VkClearAttachment *pAttachments,
2193 uint32_t rectCount,
2194 const VkClearRect *pRects)
2195 {
2196 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2197 struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2198 struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx;
2199
2200 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2201 assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
2202
2203 /* TODO: There are some optimizations that can be made here:
2204 * - For a full screen clear, update the clear values for the corresponding
2205 * attachment index.
2206 * - For a full screen color attachment clear, add its index to a load op
2207 * override to add it to the background shader. This will elide any load
2208 * op loads currently in the background shader as well as the usual
2209 * frag kick for geometry clear.
2210 */
2211
2212 /* If we have any depth/stencil clears, update the sub command depth/stencil
2213 * modification and usage flags.
2214 */
2215 if (state->depth_format != VK_FORMAT_UNDEFINED) {
2216 uint32_t full_screen_clear_count;
2217 bool has_stencil_clear = false;
2218 bool has_depth_clear = false;
2219
2220 for (uint32_t i = 0; i < attachmentCount; i++) {
2221 const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask;
2222
2223 if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
2224 has_stencil_clear = true;
2225
2226 if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
2227 has_depth_clear = true;
2228
2229 if (has_stencil_clear && has_depth_clear)
2230 break;
2231 }
2232
2233 sub_cmd->modifies_stencil |= has_stencil_clear;
2234 sub_cmd->modifies_depth |= has_depth_clear;
2235
2236 /* We only care about clears that have a baseArrayLayer of 0 as any
2237 * attachment clears we move to the background shader must apply to all of
2238 * the attachment's sub resources.
2239 */
2240 full_screen_clear_count =
2241 pvr_get_max_layers_covering_target(state->render_pass_info.render_area,
2242 0,
2243 rectCount,
2244 pRects);
2245
2246 if (full_screen_clear_count > 0) {
2247 if (has_stencil_clear &&
2248 sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
2249 sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
2250 }
2251
2252 if (has_depth_clear &&
2253 sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
2254 sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
2255 }
2256 }
2257 }
2258
2259 pvr_clear_attachments(cmd_buffer,
2260 attachmentCount,
2261 pAttachments,
2262 rectCount,
2263 pRects,
2264 false);
2265 }
2266
pvr_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)2267 void pvr_CmdResolveImage2(VkCommandBuffer commandBuffer,
2268 const VkResolveImageInfo2 *pResolveImageInfo)
2269 {
2270 PVR_FROM_HANDLE(pvr_image, src, pResolveImageInfo->srcImage);
2271 PVR_FROM_HANDLE(pvr_image, dst, pResolveImageInfo->dstImage);
2272 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2273
2274 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2275
2276 for (uint32_t i = 0U; i < pResolveImageInfo->regionCount; i++) {
2277 VkImageCopy2 region = {
2278 .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
2279 .srcSubresource = pResolveImageInfo->pRegions[i].srcSubresource,
2280 .srcOffset = pResolveImageInfo->pRegions[i].srcOffset,
2281 .dstSubresource = pResolveImageInfo->pRegions[i].dstSubresource,
2282 .dstOffset = pResolveImageInfo->pRegions[i].dstOffset,
2283 .extent = pResolveImageInfo->pRegions[i].extent,
2284 };
2285
2286 VkResult result =
2287 pvr_copy_or_resolve_color_image_region(cmd_buffer, src, dst, ®ion);
2288 if (result != VK_SUCCESS)
2289 return;
2290 }
2291 }
2292