xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_blit.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <vulkan/vulkan.h>
29 
30 #include "pvr_blit.h"
31 #include "pvr_clear.h"
32 #include "pvr_csb.h"
33 #include "pvr_formats.h"
34 #include "pvr_job_transfer.h"
35 #include "pvr_private.h"
36 #include "pvr_shader_factory.h"
37 #include "pvr_static_shaders.h"
38 #include "pvr_types.h"
39 #include "util/bitscan.h"
40 #include "util/list.h"
41 #include "util/macros.h"
42 #include "util/u_math.h"
43 #include "vk_alloc.h"
44 #include "vk_command_buffer.h"
45 #include "vk_command_pool.h"
46 #include "vk_format.h"
47 #include "vk_log.h"
48 
49 /* TODO: Investigate where this limit comes from. */
50 #define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U
51 
52 static struct pvr_transfer_cmd *
pvr_transfer_cmd_alloc(struct pvr_cmd_buffer * cmd_buffer)53 pvr_transfer_cmd_alloc(struct pvr_cmd_buffer *cmd_buffer)
54 {
55    struct pvr_transfer_cmd *transfer_cmd;
56 
57    transfer_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
58                             sizeof(*transfer_cmd),
59                             8U,
60                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
61    if (!transfer_cmd) {
62       vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
63       return NULL;
64    }
65 
66    /* transfer_cmd->mapping_count is already set to zero. */
67    transfer_cmd->sources[0].filter = PVR_FILTER_POINT;
68    transfer_cmd->sources[0].resolve_op = PVR_RESOLVE_BLEND;
69    transfer_cmd->sources[0].addr_mode = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
70    transfer_cmd->cmd_buffer = cmd_buffer;
71 
72    return transfer_cmd;
73 }
74 
pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,pvr_dev_addr_t dev_addr,VkDeviceSize offset,VkFormat vk_format,VkFormat image_format,uint32_t width,uint32_t height,uint32_t stride)75 static void pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface *surface,
76                                      VkRect2D *rect,
77                                      pvr_dev_addr_t dev_addr,
78                                      VkDeviceSize offset,
79                                      VkFormat vk_format,
80                                      VkFormat image_format,
81                                      uint32_t width,
82                                      uint32_t height,
83                                      uint32_t stride)
84 {
85    enum pipe_format pformat = vk_format_to_pipe_format(image_format);
86 
87    surface->dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, offset);
88    surface->width = width;
89    surface->height = height;
90    surface->stride = stride;
91    surface->vk_format = vk_format;
92    surface->mem_layout = PVR_MEMLAYOUT_LINEAR;
93    surface->sample_count = 1;
94 
95    /* Initialize rectangle extent. Also, rectangle.offset should be set to
96     * zero, as the offset is already adjusted in the device address above. We
97     * don't explicitly set offset to zero as transfer_cmd is zero allocated.
98     */
99    rect->extent.width = width;
100    rect->extent.height = height;
101 
102    if (util_format_is_compressed(pformat)) {
103       uint32_t block_width = util_format_get_blockwidth(pformat);
104       uint32_t block_height = util_format_get_blockheight(pformat);
105 
106       surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
107       surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
108       surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
109 
110       rect->offset.x /= block_width;
111       rect->offset.y /= block_height;
112       rect->extent.width =
113          MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
114       rect->extent.height =
115          MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
116    }
117 }
118 
pvr_get_raw_copy_format(VkFormat format)119 VkFormat pvr_get_raw_copy_format(VkFormat format)
120 {
121    switch (vk_format_get_blocksize(format)) {
122    case 1:
123       return VK_FORMAT_R8_UINT;
124    case 2:
125       return VK_FORMAT_R8G8_UINT;
126    case 3:
127       return VK_FORMAT_R8G8B8_UINT;
128    case 4:
129       return VK_FORMAT_R32_UINT;
130    case 6:
131       return VK_FORMAT_R16G16B16_UINT;
132    case 8:
133       return VK_FORMAT_R32G32_UINT;
134    case 12:
135       return VK_FORMAT_R32G32B32_UINT;
136    case 16:
137       return VK_FORMAT_R32G32B32A32_UINT;
138    default:
139       unreachable("Unhandled copy block size.");
140    }
141 }
142 
pvr_setup_transfer_surface(struct pvr_device * device,struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,const struct pvr_image * image,uint32_t array_layer,uint32_t mip_level,const VkOffset3D * offset,const VkExtent3D * extent,float fdepth,VkFormat format,VkImageAspectFlags aspect_mask)143 static void pvr_setup_transfer_surface(struct pvr_device *device,
144                                        struct pvr_transfer_cmd_surface *surface,
145                                        VkRect2D *rect,
146                                        const struct pvr_image *image,
147                                        uint32_t array_layer,
148                                        uint32_t mip_level,
149                                        const VkOffset3D *offset,
150                                        const VkExtent3D *extent,
151                                        float fdepth,
152                                        VkFormat format,
153                                        VkImageAspectFlags aspect_mask)
154 {
155    const uint32_t height = MAX2(image->vk.extent.height >> mip_level, 1U);
156    const uint32_t width = MAX2(image->vk.extent.width >> mip_level, 1U);
157    enum pipe_format image_pformat = vk_format_to_pipe_format(image->vk.format);
158    enum pipe_format pformat = vk_format_to_pipe_format(format);
159    const VkImageSubresource sub_resource = {
160       .aspectMask = aspect_mask,
161       .mipLevel = mip_level,
162       .arrayLayer = array_layer,
163    };
164    VkSubresourceLayout info;
165    uint32_t depth;
166 
167    if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
168       depth = MAX2(image->vk.extent.depth >> mip_level, 1U);
169    else
170       depth = 1U;
171 
172    pvr_get_image_subresource_layout(image, &sub_resource, &info);
173 
174    surface->dev_addr = PVR_DEV_ADDR_OFFSET(image->dev_addr, info.offset);
175    surface->width = width;
176    surface->height = height;
177    surface->depth = depth;
178 
179    assert(info.rowPitch % vk_format_get_blocksize(format) == 0);
180    surface->stride = info.rowPitch / vk_format_get_blocksize(format);
181 
182    surface->vk_format = format;
183    surface->mem_layout = image->memlayout;
184    surface->sample_count = image->vk.samples;
185 
186    if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
187       surface->z_position = fdepth;
188    else
189       surface->dev_addr.addr += info.depthPitch * ((uint32_t)fdepth);
190 
191    rect->offset.x = offset->x;
192    rect->offset.y = offset->y;
193    rect->extent.width = extent->width;
194    rect->extent.height = extent->height;
195 
196    if (util_format_is_compressed(image_pformat) &&
197        !util_format_is_compressed(pformat)) {
198       uint32_t block_width = util_format_get_blockwidth(image_pformat);
199       uint32_t block_height = util_format_get_blockheight(image_pformat);
200 
201       surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
202       surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
203       surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
204 
205       rect->offset.x /= block_width;
206       rect->offset.y /= block_height;
207       rect->extent.width =
208          MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
209       rect->extent.height =
210          MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
211    }
212 }
213 
pvr_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)214 void pvr_CmdBlitImage2(VkCommandBuffer commandBuffer,
215                           const VkBlitImageInfo2 *pBlitImageInfo)
216 {
217    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
218    PVR_FROM_HANDLE(pvr_image, src, pBlitImageInfo->srcImage);
219    PVR_FROM_HANDLE(pvr_image, dst, pBlitImageInfo->dstImage);
220    struct pvr_device *device = cmd_buffer->device;
221    enum pvr_filter filter = PVR_FILTER_DONTCARE;
222 
223    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
224 
225    if (pBlitImageInfo->filter == VK_FILTER_LINEAR)
226       filter = PVR_FILTER_LINEAR;
227 
228    for (uint32_t i = 0U; i < pBlitImageInfo->regionCount; i++) {
229       const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
230 
231       assert(region->srcSubresource.layerCount ==
232              region->dstSubresource.layerCount);
233       const bool inverted_dst_z =
234          (region->dstOffsets[1].z < region->dstOffsets[0].z);
235       const bool inverted_src_z =
236          (region->srcOffsets[1].z < region->srcOffsets[0].z);
237       const uint32_t min_src_z = inverted_src_z ? region->srcOffsets[1].z
238                                                 : region->srcOffsets[0].z;
239       const uint32_t max_src_z = inverted_src_z ? region->srcOffsets[0].z
240                                                 : region->srcOffsets[1].z;
241       const uint32_t min_dst_z = inverted_dst_z ? region->dstOffsets[1].z
242                                                 : region->dstOffsets[0].z;
243       const uint32_t max_dst_z = inverted_dst_z ? region->dstOffsets[0].z
244                                                 : region->dstOffsets[1].z;
245 
246       const uint32_t src_width =
247          region->srcOffsets[1].x - region->srcOffsets[0].x;
248       const uint32_t src_height =
249          region->srcOffsets[1].y - region->srcOffsets[0].y;
250       uint32_t dst_width;
251       uint32_t dst_height;
252 
253       float initial_depth_offset;
254       VkExtent3D src_extent;
255       VkExtent3D dst_extent;
256       VkOffset3D dst_offset = region->dstOffsets[0];
257       float z_slice_stride;
258       bool flip_x;
259       bool flip_y;
260 
261       if (region->dstOffsets[1].x > region->dstOffsets[0].x) {
262          dst_width = region->dstOffsets[1].x - region->dstOffsets[0].x;
263          flip_x = false;
264       } else {
265          dst_width = region->dstOffsets[0].x - region->dstOffsets[1].x;
266          flip_x = true;
267          dst_offset.x = region->dstOffsets[1].x;
268       }
269 
270       if (region->dstOffsets[1].y > region->dstOffsets[0].y) {
271          dst_height = region->dstOffsets[1].y - region->dstOffsets[0].y;
272          flip_y = false;
273       } else {
274          dst_height = region->dstOffsets[0].y - region->dstOffsets[1].y;
275          flip_y = true;
276          dst_offset.y = region->dstOffsets[1].y;
277       }
278 
279       /* If any of the extent regions is zero, then reject the blit and
280        * continue.
281        */
282       if (!src_width || !src_height || !dst_width || !dst_height ||
283           !(max_dst_z - min_dst_z) || !(max_src_z - min_src_z)) {
284          mesa_loge("BlitImage: Region %i has an area of zero", i);
285          continue;
286       }
287 
288       src_extent = (VkExtent3D){
289          .width = src_width,
290          .height = src_height,
291          .depth = 0U,
292       };
293 
294       dst_extent = (VkExtent3D){
295          .width = dst_width,
296          .height = dst_height,
297          .depth = 0U,
298       };
299 
300       /* The z_position of a transfer surface is intended to be in the range
301        * of 0.0f <= z_position <= depth. It will be used as a texture coordinate
302        * in the source surface for cases where linear filtering is enabled, so
303        * the fractional part will need to represent the exact midpoint of a z
304        * slice range in the source texture, as it maps to each destination
305        * slice.
306        *
307        * For destination surfaces, the fractional part is discarded, so
308        * we can safely pass the slice index.
309        */
310 
311       /* Calculate the ratio of z slices in our source region to that of our
312        * destination region, to get the number of z slices in our source region
313        * to iterate over for each destination slice.
314        *
315        * If our destination region is inverted, we iterate backwards.
316        */
317       z_slice_stride =
318          (inverted_dst_z ? -1.0f : 1.0f) *
319          ((float)(max_src_z - min_src_z) / (float)(max_dst_z - min_dst_z));
320 
321       /* Offset the initial depth offset by half of the z slice stride, into the
322        * blit region's z range.
323        */
324       initial_depth_offset =
325          (inverted_dst_z ? max_src_z : min_src_z) + (0.5f * z_slice_stride);
326 
327       for (uint32_t j = 0U; j < region->srcSubresource.layerCount; j++) {
328          struct pvr_transfer_cmd_surface src_surface = { 0 };
329          struct pvr_transfer_cmd_surface dst_surface = { 0 };
330          VkRect2D src_rect;
331          VkRect2D dst_rect;
332 
333          /* Get the subresource info for the src and dst images, this is
334           * required when incrementing the address of the depth slice used by
335           * the transfer surface.
336           */
337          VkSubresourceLayout src_info, dst_info;
338          const VkImageSubresource src_sub_resource = {
339             .aspectMask = region->srcSubresource.aspectMask,
340             .mipLevel = region->srcSubresource.mipLevel,
341             .arrayLayer = region->srcSubresource.baseArrayLayer + j,
342          };
343          const VkImageSubresource dst_sub_resource = {
344             .aspectMask = region->dstSubresource.aspectMask,
345             .mipLevel = region->dstSubresource.mipLevel,
346             .arrayLayer = region->dstSubresource.baseArrayLayer + j,
347          };
348 
349          pvr_get_image_subresource_layout(src, &src_sub_resource, &src_info);
350          pvr_get_image_subresource_layout(dst, &dst_sub_resource, &dst_info);
351 
352          /* Setup the transfer surfaces once per image layer, which saves us
353           * from repeating subresource queries by manually incrementing the
354           * depth slices.
355           */
356          pvr_setup_transfer_surface(device,
357                                     &src_surface,
358                                     &src_rect,
359                                     src,
360                                     region->srcSubresource.baseArrayLayer + j,
361                                     region->srcSubresource.mipLevel,
362                                     &region->srcOffsets[0],
363                                     &src_extent,
364                                     initial_depth_offset,
365                                     src->vk.format,
366                                     region->srcSubresource.aspectMask);
367 
368          pvr_setup_transfer_surface(device,
369                                     &dst_surface,
370                                     &dst_rect,
371                                     dst,
372                                     region->dstSubresource.baseArrayLayer + j,
373                                     region->dstSubresource.mipLevel,
374                                     &dst_offset,
375                                     &dst_extent,
376                                     min_dst_z,
377                                     dst->vk.format,
378                                     region->dstSubresource.aspectMask);
379 
380          for (uint32_t dst_z = min_dst_z; dst_z < max_dst_z; dst_z++) {
381             struct pvr_transfer_cmd *transfer_cmd;
382             VkResult result;
383 
384             /* TODO: See if we can allocate all the transfer cmds in one go. */
385             transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
386             if (!transfer_cmd)
387                return;
388 
389             transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
390             transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
391             transfer_cmd->sources[0].mappings[0].flip_x = flip_x;
392             transfer_cmd->sources[0].mappings[0].flip_y = flip_y;
393             transfer_cmd->sources[0].mapping_count++;
394 
395             transfer_cmd->sources[0].surface = src_surface;
396             transfer_cmd->sources[0].filter = filter;
397             transfer_cmd->source_count = 1;
398 
399             transfer_cmd->dst = dst_surface;
400             transfer_cmd->scissor = dst_rect;
401 
402             result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
403             if (result != VK_SUCCESS) {
404                vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
405                return;
406             }
407 
408             if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
409                src_surface.z_position += z_slice_stride;
410             } else {
411                src_surface.dev_addr.addr +=
412                   src_info.depthPitch * ((uint32_t)z_slice_stride);
413             }
414 
415             if (dst_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
416                dst_surface.z_position += 1.0f;
417             else
418                dst_surface.dev_addr.addr += dst_info.depthPitch;
419          }
420       }
421    }
422 }
423 
pvr_get_copy_format(VkFormat format)424 static VkFormat pvr_get_copy_format(VkFormat format)
425 {
426    switch (format) {
427    case VK_FORMAT_R8_SNORM:
428       return VK_FORMAT_R8_SINT;
429    case VK_FORMAT_R8G8_SNORM:
430       return VK_FORMAT_R8G8_SINT;
431    case VK_FORMAT_R8G8B8_SNORM:
432       return VK_FORMAT_R8G8B8_SINT;
433    case VK_FORMAT_R8G8B8A8_SNORM:
434       return VK_FORMAT_R8G8B8A8_SINT;
435    case VK_FORMAT_B8G8R8A8_SNORM:
436       return VK_FORMAT_B8G8R8A8_SINT;
437    default:
438       return format;
439    }
440 }
441 
442 static void
pvr_setup_surface_for_image(struct pvr_device * device,struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,const struct pvr_image * image,uint32_t array_layer,uint32_t array_offset,uint32_t mip_level,const VkOffset3D * offset,const VkExtent3D * extent,uint32_t depth,VkFormat format,const VkImageAspectFlags aspect_mask)443 pvr_setup_surface_for_image(struct pvr_device *device,
444                             struct pvr_transfer_cmd_surface *surface,
445                             VkRect2D *rect,
446                             const struct pvr_image *image,
447                             uint32_t array_layer,
448                             uint32_t array_offset,
449                             uint32_t mip_level,
450                             const VkOffset3D *offset,
451                             const VkExtent3D *extent,
452                             uint32_t depth,
453                             VkFormat format,
454                             const VkImageAspectFlags aspect_mask)
455 {
456    if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
457       pvr_setup_transfer_surface(device,
458                                  surface,
459                                  rect,
460                                  image,
461                                  array_layer + array_offset,
462                                  mip_level,
463                                  offset,
464                                  extent,
465                                  0.0f,
466                                  format,
467                                  aspect_mask);
468    } else {
469       pvr_setup_transfer_surface(device,
470                                  surface,
471                                  rect,
472                                  image,
473                                  array_layer,
474                                  mip_level,
475                                  offset,
476                                  extent,
477                                  (float)depth,
478                                  format,
479                                  aspect_mask);
480    }
481 }
482 
483 static VkResult
pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer * cmd_buffer,enum pvr_resolve_op resolve_op,const struct pvr_image * src,const struct pvr_image * dst,const VkImageCopy2 * region)484 pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer *cmd_buffer,
485                                  enum pvr_resolve_op resolve_op,
486                                  const struct pvr_image *src,
487                                  const struct pvr_image *dst,
488                                  const VkImageCopy2 *region)
489 {
490    enum pipe_format src_pformat = vk_format_to_pipe_format(src->vk.format);
491    enum pipe_format dst_pformat = vk_format_to_pipe_format(dst->vk.format);
492    bool src_block_compressed = util_format_is_compressed(src_pformat);
493    bool dst_block_compressed = util_format_is_compressed(dst_pformat);
494    VkExtent3D src_extent;
495    VkExtent3D dst_extent;
496    VkFormat dst_format;
497    VkFormat src_format;
498    uint32_t dst_layers;
499    uint32_t src_layers;
500    uint32_t max_slices;
501    uint32_t flags = 0U;
502 
503    if (src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
504        region->srcSubresource.aspectMask !=
505           (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
506       /* Takes the stencil of the source and the depth of the destination and
507        * combines the two interleaved.
508        */
509       flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
510 
511       if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
512          /* Takes the depth of the source and the stencil of the destination and
513           * combines the two interleaved.
514           */
515          flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
516       }
517    }
518 
519    src_extent = region->extent;
520    dst_extent = region->extent;
521 
522    if (src_block_compressed && !dst_block_compressed) {
523       uint32_t block_width = util_format_get_blockwidth(src_pformat);
524       uint32_t block_height = util_format_get_blockheight(src_pformat);
525 
526       dst_extent.width = MAX2(1U, DIV_ROUND_UP(src_extent.width, block_width));
527       dst_extent.height =
528          MAX2(1U, DIV_ROUND_UP(src_extent.height, block_height));
529    } else if (!src_block_compressed && dst_block_compressed) {
530       uint32_t block_width = util_format_get_blockwidth(dst_pformat);
531       uint32_t block_height = util_format_get_blockheight(dst_pformat);
532 
533       dst_extent.width = MAX2(1U, src_extent.width * block_width);
534       dst_extent.height = MAX2(1U, src_extent.height * block_height);
535    }
536 
537    /* We don't care what format dst is as it's guaranteed to be size compatible
538     * with src.
539     */
540    dst_format = pvr_get_raw_copy_format(src->vk.format);
541    src_format = dst_format;
542 
543    src_layers =
544       vk_image_subresource_layer_count(&src->vk, &region->srcSubresource);
545    dst_layers =
546       vk_image_subresource_layer_count(&dst->vk, &region->dstSubresource);
547 
548    /* srcSubresource.layerCount must match layerCount of dstSubresource in
549     * copies not involving 3D images. In copies involving 3D images, if there is
550     * a 2D image it's layerCount.
551     */
552    max_slices = MAX3(src_layers, dst_layers, region->extent.depth);
553 
554    for (uint32_t i = 0U; i < max_slices; i++) {
555       struct pvr_transfer_cmd *transfer_cmd;
556       VkResult result;
557 
558       transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
559       if (!transfer_cmd)
560          return VK_ERROR_OUT_OF_HOST_MEMORY;
561 
562       transfer_cmd->flags |= flags;
563       transfer_cmd->sources[0].resolve_op = resolve_op;
564 
565       pvr_setup_surface_for_image(
566          cmd_buffer->device,
567          &transfer_cmd->sources[0].surface,
568          &transfer_cmd->sources[0].mappings[0U].src_rect,
569          src,
570          region->srcSubresource.baseArrayLayer,
571          i,
572          region->srcSubresource.mipLevel,
573          &region->srcOffset,
574          &src_extent,
575          region->srcOffset.z + i,
576          src_format,
577          region->srcSubresource.aspectMask);
578 
579       pvr_setup_surface_for_image(cmd_buffer->device,
580                                   &transfer_cmd->dst,
581                                   &transfer_cmd->scissor,
582                                   dst,
583                                   region->dstSubresource.baseArrayLayer,
584                                   i,
585                                   region->dstSubresource.mipLevel,
586                                   &region->dstOffset,
587                                   &dst_extent,
588                                   region->dstOffset.z + i,
589                                   dst_format,
590                                   region->dstSubresource.aspectMask);
591 
592       transfer_cmd->sources[0].mappings[0U].dst_rect = transfer_cmd->scissor;
593       transfer_cmd->sources[0].mapping_count++;
594       transfer_cmd->source_count = 1;
595 
596       result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
597       if (result != VK_SUCCESS) {
598          vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
599          return result;
600       }
601    }
602 
603    return VK_SUCCESS;
604 }
605 
606 VkResult
pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_image * src,const struct pvr_image * dst,const VkImageCopy2 * region)607 pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer *cmd_buffer,
608                                        const struct pvr_image *src,
609                                        const struct pvr_image *dst,
610                                        const VkImageCopy2 *region)
611 {
612    enum pvr_resolve_op resolve_op = PVR_RESOLVE_BLEND;
613 
614    if (src->vk.samples > 1U && dst->vk.samples < 2U) {
615       /* Integer resolve picks a single sample. */
616       if (vk_format_is_int(src->vk.format))
617          resolve_op = PVR_RESOLVE_SAMPLE0;
618    }
619 
620    return pvr_copy_or_resolve_image_region(cmd_buffer,
621                                            resolve_op,
622                                            src,
623                                            dst,
624                                            region);
625 }
626 
pvr_can_merge_ds_regions(const VkImageCopy2 * pRegionA,const VkImageCopy2 * pRegionB)627 static bool pvr_can_merge_ds_regions(const VkImageCopy2 *pRegionA,
628                                      const VkImageCopy2 *pRegionB)
629 {
630    assert(pRegionA->srcSubresource.aspectMask != 0U);
631    assert(pRegionB->srcSubresource.aspectMask != 0U);
632 
633    if (!((pRegionA->srcSubresource.aspectMask ^
634           pRegionB->srcSubresource.aspectMask) &
635          (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
636       return false;
637    }
638 
639    /* Assert if aspectMask mismatch between src and dst, given it's a depth and
640     * stencil image so not multi-planar and from the Vulkan 1.0.223 spec:
641     *
642     *    If neither srcImage nor dstImage has a multi-planar image format then
643     *    for each element of pRegions, srcSubresource.aspectMask and
644     *    dstSubresource.aspectMask must match.
645     */
646    assert(pRegionA->srcSubresource.aspectMask ==
647           pRegionA->dstSubresource.aspectMask);
648    assert(pRegionB->srcSubresource.aspectMask ==
649           pRegionB->dstSubresource.aspectMask);
650 
651    if (!(pRegionA->srcSubresource.mipLevel ==
652             pRegionB->srcSubresource.mipLevel &&
653          pRegionA->srcSubresource.baseArrayLayer ==
654             pRegionB->srcSubresource.baseArrayLayer &&
655          pRegionA->srcSubresource.layerCount ==
656             pRegionB->srcSubresource.layerCount)) {
657       return false;
658    }
659 
660    if (!(pRegionA->dstSubresource.mipLevel ==
661             pRegionB->dstSubresource.mipLevel &&
662          pRegionA->dstSubresource.baseArrayLayer ==
663             pRegionB->dstSubresource.baseArrayLayer &&
664          pRegionA->dstSubresource.layerCount ==
665             pRegionB->dstSubresource.layerCount)) {
666       return false;
667    }
668 
669    if (!(pRegionA->srcOffset.x == pRegionB->srcOffset.x &&
670          pRegionA->srcOffset.y == pRegionB->srcOffset.y &&
671          pRegionA->srcOffset.z == pRegionB->srcOffset.z)) {
672       return false;
673    }
674 
675    if (!(pRegionA->dstOffset.x == pRegionB->dstOffset.x &&
676          pRegionA->dstOffset.y == pRegionB->dstOffset.y &&
677          pRegionA->dstOffset.z == pRegionB->dstOffset.z)) {
678       return false;
679    }
680 
681    if (!(pRegionA->extent.width == pRegionB->extent.width &&
682          pRegionA->extent.height == pRegionB->extent.height &&
683          pRegionA->extent.depth == pRegionB->extent.depth)) {
684       return false;
685    }
686 
687    return true;
688 }
689 
pvr_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)690 void pvr_CmdCopyImage2(VkCommandBuffer commandBuffer,
691                           const VkCopyImageInfo2 *pCopyImageInfo)
692 {
693    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
694    PVR_FROM_HANDLE(pvr_image, src, pCopyImageInfo->srcImage);
695    PVR_FROM_HANDLE(pvr_image, dst, pCopyImageInfo->dstImage);
696 
697    const bool can_merge_ds = src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
698                              dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT;
699 
700    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
701 
702    for (uint32_t i = 0U; i < pCopyImageInfo->regionCount; i++) {
703       VkResult result;
704 
705       /* If an application has split a copy between D24S8 images into two
706        * separate copy regions (one for the depth aspect and one for the
707        * stencil aspect) attempt to merge the two regions back into one blit.
708        *
709        * This can only be merged if both regions are identical apart from the
710        * aspectMask, one of which has to be depth and the other has to be
711        * stencil.
712        *
713        * Only attempt to merge consecutive regions, ignore the case of merging
714        * non-consecutive regions.
715        */
716       if (can_merge_ds && i != (pCopyImageInfo->regionCount - 1)) {
717          const bool ret =
718             pvr_can_merge_ds_regions(&pCopyImageInfo->pRegions[i],
719                                      &pCopyImageInfo->pRegions[i + 1]);
720          if (ret) {
721             VkImageCopy2 region = pCopyImageInfo->pRegions[i];
722 
723             region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
724                                                VK_IMAGE_ASPECT_STENCIL_BIT;
725             region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
726                                                VK_IMAGE_ASPECT_STENCIL_BIT;
727 
728             result = pvr_copy_or_resolve_color_image_region(cmd_buffer,
729                                                             src,
730                                                             dst,
731                                                             &region);
732             if (result != VK_SUCCESS)
733                return;
734 
735             /* Skip the next region as it has been processed with the last
736              * region.
737              */
738             i++;
739 
740             continue;
741          }
742       }
743 
744       result =
745          pvr_copy_or_resolve_color_image_region(cmd_buffer,
746                                                 src,
747                                                 dst,
748                                                 &pCopyImageInfo->pRegions[i]);
749       if (result != VK_SUCCESS)
750          return;
751    }
752 }
753 
754 VkResult
pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer * const cmd_buffer,const pvr_dev_addr_t buffer_dev_addr,const struct pvr_image * const image,const VkBufferImageCopy2 * const region,const VkFormat src_format,const VkFormat dst_format,const uint32_t flags)755 pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer *const cmd_buffer,
756                                        const pvr_dev_addr_t buffer_dev_addr,
757                                        const struct pvr_image *const image,
758                                        const VkBufferImageCopy2 *const region,
759                                        const VkFormat src_format,
760                                        const VkFormat dst_format,
761                                        const uint32_t flags)
762 {
763    enum pipe_format pformat = vk_format_to_pipe_format(dst_format);
764    uint32_t row_length_in_texels;
765    uint32_t buffer_slice_size;
766    uint32_t buffer_layer_size;
767    uint32_t height_in_blks;
768    uint32_t row_length;
769 
770    if (region->bufferRowLength == 0)
771       row_length_in_texels = region->imageExtent.width;
772    else
773       row_length_in_texels = region->bufferRowLength;
774 
775    if (region->bufferImageHeight == 0)
776       height_in_blks = region->imageExtent.height;
777    else
778       height_in_blks = region->bufferImageHeight;
779 
780    if (util_format_is_compressed(pformat)) {
781       uint32_t block_width = util_format_get_blockwidth(pformat);
782       uint32_t block_height = util_format_get_blockheight(pformat);
783       uint32_t block_size = util_format_get_blocksize(pformat);
784 
785       height_in_blks = DIV_ROUND_UP(height_in_blks, block_height);
786       row_length_in_texels =
787          DIV_ROUND_UP(row_length_in_texels, block_width) * block_size;
788    }
789 
790    row_length = row_length_in_texels * vk_format_get_blocksize(src_format);
791 
792    buffer_slice_size = height_in_blks * row_length;
793    buffer_layer_size = buffer_slice_size * region->imageExtent.depth;
794 
795    for (uint32_t i = 0; i < region->imageExtent.depth; i++) {
796       const uint32_t depth = i + (uint32_t)region->imageOffset.z;
797 
798       for (uint32_t j = 0; j < region->imageSubresource.layerCount; j++) {
799          const VkDeviceSize buffer_offset = region->bufferOffset +
800                                             (j * buffer_layer_size) +
801                                             (i * buffer_slice_size);
802          struct pvr_transfer_cmd *transfer_cmd;
803          VkResult result;
804 
805          transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
806          if (!transfer_cmd)
807             return VK_ERROR_OUT_OF_HOST_MEMORY;
808 
809          transfer_cmd->flags = flags;
810 
811          pvr_setup_buffer_surface(
812             &transfer_cmd->sources[0].surface,
813             &transfer_cmd->sources[0].mappings[0].src_rect,
814             buffer_dev_addr,
815             buffer_offset,
816             src_format,
817             image->vk.format,
818             region->imageExtent.width,
819             region->imageExtent.height,
820             row_length_in_texels);
821 
822          transfer_cmd->sources[0].surface.depth = 1;
823          transfer_cmd->source_count = 1;
824 
825          pvr_setup_transfer_surface(cmd_buffer->device,
826                                     &transfer_cmd->dst,
827                                     &transfer_cmd->scissor,
828                                     image,
829                                     region->imageSubresource.baseArrayLayer + j,
830                                     region->imageSubresource.mipLevel,
831                                     &region->imageOffset,
832                                     &region->imageExtent,
833                                     depth,
834                                     dst_format,
835                                     region->imageSubresource.aspectMask);
836 
837          transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
838          transfer_cmd->sources[0].mapping_count++;
839 
840          result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
841          if (result != VK_SUCCESS) {
842             vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
843             return result;
844          }
845       }
846    }
847 
848    return VK_SUCCESS;
849 }
850 
851 VkResult
pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer * const cmd_buffer,const pvr_dev_addr_t buffer_dev_addr,const struct pvr_image * const image,const VkBufferImageCopy2 * const region)852 pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer *const cmd_buffer,
853                                 const pvr_dev_addr_t buffer_dev_addr,
854                                 const struct pvr_image *const image,
855                                 const VkBufferImageCopy2 *const region)
856 {
857    const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
858    VkFormat src_format;
859    VkFormat dst_format;
860    uint32_t flags = 0;
861 
862    if (vk_format_has_depth(image->vk.format) &&
863        vk_format_has_stencil(image->vk.format)) {
864       flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
865 
866       if ((aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) {
867          src_format = vk_format_stencil_only(image->vk.format);
868       } else {
869          src_format = vk_format_depth_only(image->vk.format);
870          flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
871       }
872 
873       dst_format = image->vk.format;
874    } else {
875       src_format = pvr_get_raw_copy_format(image->vk.format);
876       dst_format = src_format;
877    }
878 
879    return pvr_copy_buffer_to_image_region_format(cmd_buffer,
880                                                  buffer_dev_addr,
881                                                  image,
882                                                  region,
883                                                  src_format,
884                                                  dst_format,
885                                                  flags);
886 }
887 
pvr_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)888 void pvr_CmdCopyBufferToImage2(
889    VkCommandBuffer commandBuffer,
890    const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
891 {
892    PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferToImageInfo->srcBuffer);
893    PVR_FROM_HANDLE(pvr_image, dst, pCopyBufferToImageInfo->dstImage);
894    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
895 
896    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
897 
898    for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
899       const VkResult result =
900          pvr_copy_buffer_to_image_region(cmd_buffer,
901                                          src->dev_addr,
902                                          dst,
903                                          &pCopyBufferToImageInfo->pRegions[i]);
904       if (result != VK_SUCCESS)
905          return;
906    }
907 }
908 
909 VkResult
pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer * const cmd_buffer,const struct pvr_image * const image,const pvr_dev_addr_t buffer_dev_addr,const VkBufferImageCopy2 * const region,const VkFormat src_format,const VkFormat dst_format)910 pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer *const cmd_buffer,
911                                        const struct pvr_image *const image,
912                                        const pvr_dev_addr_t buffer_dev_addr,
913                                        const VkBufferImageCopy2 *const region,
914                                        const VkFormat src_format,
915                                        const VkFormat dst_format)
916 {
917    enum pipe_format pformat = vk_format_to_pipe_format(image->vk.format);
918    struct pvr_transfer_cmd_surface dst_surface = { 0 };
919    VkImageSubresource sub_resource;
920    uint32_t buffer_image_height;
921    uint32_t buffer_row_length;
922    uint32_t buffer_slice_size;
923    uint32_t max_array_layers;
924    VkRect2D dst_rect = { 0 };
925    uint32_t max_depth_slice;
926    VkSubresourceLayout info;
927 
928    /* Only images with VK_SAMPLE_COUNT_1_BIT can be copied to buffer. */
929    assert(image->vk.samples == 1);
930 
931    if (region->bufferRowLength == 0)
932       buffer_row_length = region->imageExtent.width;
933    else
934       buffer_row_length = region->bufferRowLength;
935 
936    if (region->bufferImageHeight == 0)
937       buffer_image_height = region->imageExtent.height;
938    else
939       buffer_image_height = region->bufferImageHeight;
940 
941    max_array_layers =
942       region->imageSubresource.baseArrayLayer +
943       vk_image_subresource_layer_count(&image->vk, &region->imageSubresource);
944 
945    buffer_slice_size = buffer_image_height * buffer_row_length *
946                        vk_format_get_blocksize(dst_format);
947 
948    max_depth_slice = region->imageExtent.depth + region->imageOffset.z;
949 
950    pvr_setup_buffer_surface(&dst_surface,
951                             &dst_rect,
952                             buffer_dev_addr,
953                             region->bufferOffset,
954                             dst_format,
955                             image->vk.format,
956                             buffer_row_length,
957                             buffer_image_height,
958                             buffer_row_length);
959 
960    dst_rect.extent.width = region->imageExtent.width;
961    dst_rect.extent.height = region->imageExtent.height;
962 
963    if (util_format_is_compressed(pformat)) {
964       uint32_t block_width = util_format_get_blockwidth(pformat);
965       uint32_t block_height = util_format_get_blockheight(pformat);
966 
967       dst_rect.extent.width =
968          MAX2(1U, DIV_ROUND_UP(dst_rect.extent.width, block_width));
969       dst_rect.extent.height =
970          MAX2(1U, DIV_ROUND_UP(dst_rect.extent.height, block_height));
971    }
972 
973    sub_resource = (VkImageSubresource){
974       .aspectMask = region->imageSubresource.aspectMask,
975       .mipLevel = region->imageSubresource.mipLevel,
976       .arrayLayer = region->imageSubresource.baseArrayLayer,
977    };
978 
979    pvr_get_image_subresource_layout(image, &sub_resource, &info);
980 
981    for (uint32_t i = region->imageSubresource.baseArrayLayer;
982         i < max_array_layers;
983         i++) {
984       struct pvr_transfer_cmd_surface src_surface = { 0 };
985       VkRect2D src_rect = { 0 };
986 
987       /* Note: Set the depth to the initial depth offset, the memory address (or
988        * the z_position) for the depth slice will be incremented manually in the
989        * loop below.
990        */
991       pvr_setup_transfer_surface(cmd_buffer->device,
992                                  &src_surface,
993                                  &src_rect,
994                                  image,
995                                  i,
996                                  region->imageSubresource.mipLevel,
997                                  &region->imageOffset,
998                                  &region->imageExtent,
999                                  region->imageOffset.z,
1000                                  src_format,
1001                                  region->imageSubresource.aspectMask);
1002 
1003       for (uint32_t j = region->imageOffset.z; j < max_depth_slice; j++) {
1004          struct pvr_transfer_cmd *transfer_cmd;
1005          VkResult result;
1006 
1007          /* TODO: See if we can allocate all the transfer cmds in one go. */
1008          transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1009          if (!transfer_cmd)
1010             return vk_error(cmd_buffer->device, VK_ERROR_OUT_OF_HOST_MEMORY);
1011 
1012          transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
1013          transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
1014          transfer_cmd->sources[0].mapping_count++;
1015 
1016          transfer_cmd->sources[0].surface = src_surface;
1017          transfer_cmd->source_count = 1;
1018 
1019          transfer_cmd->dst = dst_surface;
1020          transfer_cmd->scissor = dst_rect;
1021 
1022          result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1023          if (result != VK_SUCCESS) {
1024             vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1025             return result;
1026          }
1027 
1028          dst_surface.dev_addr.addr += buffer_slice_size;
1029 
1030          if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1031             src_surface.z_position += 1.0f;
1032          else
1033             src_surface.dev_addr.addr += info.depthPitch;
1034       }
1035    }
1036 
1037    return VK_SUCCESS;
1038 }
1039 
1040 VkResult
pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer * const cmd_buffer,const struct pvr_image * const image,const pvr_dev_addr_t buffer_dev_addr,const VkBufferImageCopy2 * const region)1041 pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer *const cmd_buffer,
1042                                 const struct pvr_image *const image,
1043                                 const pvr_dev_addr_t buffer_dev_addr,
1044                                 const VkBufferImageCopy2 *const region)
1045 {
1046    const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
1047 
1048    VkFormat src_format = pvr_get_copy_format(image->vk.format);
1049    VkFormat dst_format;
1050 
1051    /* Color and depth aspect copies can be done using an appropriate raw format.
1052     */
1053    if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)) {
1054       src_format = pvr_get_raw_copy_format(src_format);
1055       dst_format = src_format;
1056    } else if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
1057       /* From the Vulkan spec:
1058        *
1059        *    Data copied to or from the stencil aspect of any depth/stencil
1060        *    format is tightly packed with one VK_FORMAT_S8_UINT value per texel.
1061        */
1062       dst_format = VK_FORMAT_S8_UINT;
1063    } else {
1064       /* YUV Planes require specific formats. */
1065       dst_format = src_format;
1066    }
1067 
1068    return pvr_copy_image_to_buffer_region_format(cmd_buffer,
1069                                                  image,
1070                                                  buffer_dev_addr,
1071                                                  region,
1072                                                  src_format,
1073                                                  dst_format);
1074 }
1075 
pvr_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1076 void pvr_CmdCopyImageToBuffer2(
1077    VkCommandBuffer commandBuffer,
1078    const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1079 {
1080    PVR_FROM_HANDLE(pvr_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
1081    PVR_FROM_HANDLE(pvr_image, src, pCopyImageToBufferInfo->srcImage);
1082    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1083 
1084    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1085 
1086    for (uint32_t i = 0U; i < pCopyImageToBufferInfo->regionCount; i++) {
1087       const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[i];
1088 
1089       const VkResult result = pvr_copy_image_to_buffer_region(cmd_buffer,
1090                                                               src,
1091                                                               dst->dev_addr,
1092                                                               region);
1093       if (result != VK_SUCCESS)
1094          return;
1095    }
1096 }
1097 
pvr_calc_mip_level_extents(const struct pvr_image * image,uint16_t mip_level,VkExtent3D * extent_out)1098 static void pvr_calc_mip_level_extents(const struct pvr_image *image,
1099                                        uint16_t mip_level,
1100                                        VkExtent3D *extent_out)
1101 {
1102    /* 3D textures are clamped to 4x4x4. */
1103    const uint32_t clamp = (image->vk.image_type == VK_IMAGE_TYPE_3D) ? 4 : 1;
1104    const VkExtent3D *extent = &image->vk.extent;
1105 
1106    extent_out->width = MAX2(extent->width >> mip_level, clamp);
1107    extent_out->height = MAX2(extent->height >> mip_level, clamp);
1108    extent_out->depth = MAX2(extent->depth >> mip_level, clamp);
1109 }
1110 
pvr_clear_image_range(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_image * image,const VkClearColorValue * pColor,const VkImageSubresourceRange * psRange,uint32_t flags)1111 static VkResult pvr_clear_image_range(struct pvr_cmd_buffer *cmd_buffer,
1112                                       const struct pvr_image *image,
1113                                       const VkClearColorValue *pColor,
1114                                       const VkImageSubresourceRange *psRange,
1115                                       uint32_t flags)
1116 {
1117    const uint32_t layer_count =
1118       vk_image_subresource_layer_count(&image->vk, psRange);
1119    const uint32_t max_layers = psRange->baseArrayLayer + layer_count;
1120    VkFormat format = image->vk.format;
1121    const VkOffset3D offset = { 0 };
1122    VkExtent3D mip_extent;
1123 
1124    assert((psRange->baseArrayLayer + layer_count) <= image->vk.array_layers);
1125 
1126    for (uint32_t layer = psRange->baseArrayLayer; layer < max_layers; layer++) {
1127       const uint32_t level_count =
1128          vk_image_subresource_level_count(&image->vk, psRange);
1129       const uint32_t max_level = psRange->baseMipLevel + level_count;
1130 
1131       assert((psRange->baseMipLevel + level_count) <= image->vk.mip_levels);
1132 
1133       for (uint32_t level = psRange->baseMipLevel; level < max_level; level++) {
1134          pvr_calc_mip_level_extents(image, level, &mip_extent);
1135 
1136          for (uint32_t depth = 0; depth < mip_extent.depth; depth++) {
1137             struct pvr_transfer_cmd *transfer_cmd;
1138             VkResult result;
1139 
1140             transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1141             if (!transfer_cmd)
1142                return VK_ERROR_OUT_OF_HOST_MEMORY;
1143 
1144             transfer_cmd->flags |= flags;
1145             transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
1146 
1147             for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
1148                transfer_cmd->clear_color[i].ui = pColor->uint32[i];
1149 
1150             pvr_setup_transfer_surface(cmd_buffer->device,
1151                                        &transfer_cmd->dst,
1152                                        &transfer_cmd->scissor,
1153                                        image,
1154                                        layer,
1155                                        level,
1156                                        &offset,
1157                                        &mip_extent,
1158                                        depth,
1159                                        format,
1160                                        psRange->aspectMask);
1161 
1162             result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1163             if (result != VK_SUCCESS) {
1164                vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1165                return result;
1166             }
1167          }
1168       }
1169    }
1170 
1171    return VK_SUCCESS;
1172 }
1173 
pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1174 void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,
1175                             VkImage _image,
1176                             VkImageLayout imageLayout,
1177                             const VkClearColorValue *pColor,
1178                             uint32_t rangeCount,
1179                             const VkImageSubresourceRange *pRanges)
1180 {
1181    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1182    PVR_FROM_HANDLE(pvr_image, image, _image);
1183 
1184    for (uint32_t i = 0; i < rangeCount; i++) {
1185       const VkResult result =
1186          pvr_clear_image_range(cmd_buffer, image, pColor, &pRanges[i], 0);
1187       if (result != VK_SUCCESS)
1188          return;
1189    }
1190 }
1191 
pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1192 void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
1193                                    VkImage _image,
1194                                    VkImageLayout imageLayout,
1195                                    const VkClearDepthStencilValue *pDepthStencil,
1196                                    uint32_t rangeCount,
1197                                    const VkImageSubresourceRange *pRanges)
1198 {
1199    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1200    PVR_FROM_HANDLE(pvr_image, image, _image);
1201 
1202    for (uint32_t i = 0; i < rangeCount; i++) {
1203       const VkImageAspectFlags ds_aspect = VK_IMAGE_ASPECT_DEPTH_BIT |
1204                                            VK_IMAGE_ASPECT_STENCIL_BIT;
1205       VkClearColorValue clear_ds = { 0 };
1206       uint32_t flags = 0U;
1207       VkResult result;
1208 
1209       if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
1210           pRanges[i].aspectMask != ds_aspect) {
1211          /* A depth or stencil blit to a packed_depth_stencil requires a merge
1212           * operation.
1213           */
1214          flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
1215 
1216          if (pRanges[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
1217             flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
1218       }
1219 
1220       clear_ds.float32[0] = pDepthStencil->depth;
1221       clear_ds.uint32[1] = pDepthStencil->stencil;
1222 
1223       result =
1224          pvr_clear_image_range(cmd_buffer, image, &clear_ds, pRanges + i, flags);
1225       if (result != VK_SUCCESS)
1226          return;
1227    }
1228 }
1229 
pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer * cmd_buffer,pvr_dev_addr_t src_addr,VkDeviceSize src_offset,pvr_dev_addr_t dst_addr,VkDeviceSize dst_offset,VkDeviceSize size,uint32_t fill_data,bool is_fill)1230 static VkResult pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer *cmd_buffer,
1231                                            pvr_dev_addr_t src_addr,
1232                                            VkDeviceSize src_offset,
1233                                            pvr_dev_addr_t dst_addr,
1234                                            VkDeviceSize dst_offset,
1235                                            VkDeviceSize size,
1236                                            uint32_t fill_data,
1237                                            bool is_fill)
1238 {
1239    VkDeviceSize offset = 0;
1240 
1241    while (offset < size) {
1242       const VkDeviceSize remaining_size = size - offset;
1243       struct pvr_transfer_cmd *transfer_cmd;
1244       uint32_t texel_width;
1245       VkDeviceSize texels;
1246       VkFormat vk_format;
1247       VkResult result;
1248       uint32_t height;
1249       uint32_t width;
1250 
1251       if (is_fill) {
1252          vk_format = VK_FORMAT_R32_UINT;
1253          texel_width = 4U;
1254       } else if (remaining_size >= 16U) {
1255          vk_format = VK_FORMAT_R32G32B32A32_UINT;
1256          texel_width = 16U;
1257       } else if (remaining_size >= 4U) {
1258          vk_format = VK_FORMAT_R32_UINT;
1259          texel_width = 4U;
1260       } else {
1261          vk_format = VK_FORMAT_R8_UINT;
1262          texel_width = 1U;
1263       }
1264 
1265       texels = remaining_size / texel_width;
1266 
1267       /* Try to do max-width rects, fall back to a 1-height rect for the
1268        * remainder.
1269        */
1270       if (texels > PVR_MAX_TRANSFER_SIZE_IN_TEXELS) {
1271          width = PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
1272          height = texels / PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
1273          height = MIN2(height, PVR_MAX_TRANSFER_SIZE_IN_TEXELS);
1274       } else {
1275          width = texels;
1276          height = 1;
1277       }
1278 
1279       transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1280       if (!transfer_cmd)
1281          return VK_ERROR_OUT_OF_HOST_MEMORY;
1282 
1283       if (!is_fill) {
1284          pvr_setup_buffer_surface(
1285             &transfer_cmd->sources[0].surface,
1286             &transfer_cmd->sources[0].mappings[0].src_rect,
1287             src_addr,
1288             offset + src_offset,
1289             vk_format,
1290             vk_format,
1291             width,
1292             height,
1293             width);
1294          transfer_cmd->source_count = 1;
1295       } else {
1296          transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
1297 
1298          for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
1299             transfer_cmd->clear_color[i].ui = fill_data;
1300       }
1301 
1302       pvr_setup_buffer_surface(&transfer_cmd->dst,
1303                                &transfer_cmd->scissor,
1304                                dst_addr,
1305                                offset + dst_offset,
1306                                vk_format,
1307                                vk_format,
1308                                width,
1309                                height,
1310                                width);
1311 
1312       if (transfer_cmd->source_count > 0) {
1313          transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
1314 
1315          transfer_cmd->sources[0].mapping_count++;
1316       }
1317 
1318       result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1319       if (result != VK_SUCCESS) {
1320          vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1321          return result;
1322       }
1323 
1324       offset += width * height * texel_width;
1325    }
1326 
1327    return VK_SUCCESS;
1328 }
1329 
pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1330 void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1331                          VkBuffer dstBuffer,
1332                          VkDeviceSize dstOffset,
1333                          VkDeviceSize dataSize,
1334                          const void *pData)
1335 {
1336    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1337    PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
1338    struct pvr_suballoc_bo *pvr_bo;
1339    VkResult result;
1340 
1341    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1342 
1343    result = pvr_cmd_buffer_upload_general(cmd_buffer, pData, dataSize, &pvr_bo);
1344    if (result != VK_SUCCESS)
1345       return;
1346 
1347    pvr_cmd_copy_buffer_region(cmd_buffer,
1348                               pvr_bo->dev_addr,
1349                               0,
1350                               dst->dev_addr,
1351                               dstOffset,
1352                               dataSize,
1353                               0U,
1354                               false);
1355 }
1356 
pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1357 void pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1358                            const VkCopyBufferInfo2 *pCopyBufferInfo)
1359 {
1360    PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
1361    PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
1362    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1363 
1364    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1365 
1366    for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1367       const VkResult result =
1368          pvr_cmd_copy_buffer_region(cmd_buffer,
1369                                     src->dev_addr,
1370                                     pCopyBufferInfo->pRegions[i].srcOffset,
1371                                     dst->dev_addr,
1372                                     pCopyBufferInfo->pRegions[i].dstOffset,
1373                                     pCopyBufferInfo->pRegions[i].size,
1374                                     0U,
1375                                     false);
1376       if (result != VK_SUCCESS)
1377          return;
1378    }
1379 }
1380 
pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)1381 void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,
1382                        VkBuffer dstBuffer,
1383                        VkDeviceSize dstOffset,
1384                        VkDeviceSize fillSize,
1385                        uint32_t data)
1386 {
1387    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1388    PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
1389 
1390    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1391 
1392    fillSize = vk_buffer_range(&dst->vk, dstOffset, fillSize);
1393 
1394    /* From the Vulkan spec:
1395     *
1396     *    "size is the number of bytes to fill, and must be either a multiple
1397     *    of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1398     *    the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1399     *    buffer is not a multiple of 4, then the nearest smaller multiple is
1400     *    used."
1401     */
1402    fillSize &= ~3ULL;
1403 
1404    pvr_cmd_copy_buffer_region(cmd_buffer,
1405                               PVR_DEV_ADDR_INVALID,
1406                               0,
1407                               dst->dev_addr,
1408                               dstOffset,
1409                               fillSize,
1410                               data,
1411                               true);
1412 }
1413 
1414 /**
1415  * \brief Returns the maximum number of layers to clear starting from base_layer
1416  * that contain or match the target rectangle.
1417  *
1418  * \param[in] target_rect      The region which the clear should contain or
1419  *                             match.
1420  * \param[in] base_layer       The layer index to start at.
1421  * \param[in] clear_rect_count Amount of clear_rects
1422  * \param[in] clear_rects      Array of clear rects.
1423  *
1424  * \return Max number of layers that cover or match the target region.
1425  */
1426 static uint32_t
pvr_get_max_layers_covering_target(VkRect2D target_rect,uint32_t base_layer,uint32_t clear_rect_count,const VkClearRect * clear_rects)1427 pvr_get_max_layers_covering_target(VkRect2D target_rect,
1428                                    uint32_t base_layer,
1429                                    uint32_t clear_rect_count,
1430                                    const VkClearRect *clear_rects)
1431 {
1432    const int32_t target_x0 = target_rect.offset.x;
1433    const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width;
1434    const int32_t target_y0 = target_rect.offset.y;
1435    const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height;
1436 
1437    uint32_t layer_count = 0;
1438 
1439    assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX);
1440    assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX);
1441 
1442    for (uint32_t i = 0; i < clear_rect_count; i++) {
1443       const VkClearRect *clear_rect = &clear_rects[i];
1444       const uint32_t max_layer =
1445          clear_rect->baseArrayLayer + clear_rect->layerCount;
1446       bool target_is_covered;
1447       int32_t x0, x1;
1448       int32_t y0, y1;
1449 
1450       if (clear_rect->baseArrayLayer == 0)
1451          continue;
1452 
1453       assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <=
1454              UINT32_MAX);
1455 
1456       /* Check for layer intersection. */
1457       if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer)
1458          continue;
1459 
1460       x0 = clear_rect->rect.offset.x;
1461       x1 = x0 + (int32_t)clear_rect->rect.extent.width;
1462       y0 = clear_rect->rect.offset.y;
1463       y1 = y0 + (int32_t)clear_rect->rect.extent.height;
1464 
1465       assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX);
1466       assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <=
1467              INT32_MAX);
1468 
1469       target_is_covered = x0 <= target_x0 && x1 >= target_x1;
1470       target_is_covered &= y0 <= target_y0 && y1 >= target_y1;
1471 
1472       if (target_is_covered)
1473          layer_count = MAX2(layer_count, max_layer - base_layer);
1474    }
1475 
1476    return layer_count;
1477 }
1478 
1479 /* Return true if vertex shader is required to output render target id to pick
1480  * the texture array layer.
1481  */
1482 static inline bool
pvr_clear_needs_rt_id_output(struct pvr_device_info * dev_info,uint32_t rect_count,const VkClearRect * rects)1483 pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
1484                              uint32_t rect_count,
1485                              const VkClearRect *rects)
1486 {
1487    if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
1488       return false;
1489 
1490    for (uint32_t i = 0; i < rect_count; i++) {
1491       if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1)
1492          return true;
1493    }
1494 
1495    return false;
1496 }
1497 
pvr_clear_color_attachment_static_create_consts_buffer(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_shader_factory_info * shader_info,const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],ASSERTED bool uses_tile_buffer,uint32_t tile_buffer_idx,struct pvr_suballoc_bo ** const const_shareds_buffer_out)1498 static VkResult pvr_clear_color_attachment_static_create_consts_buffer(
1499    struct pvr_cmd_buffer *cmd_buffer,
1500    const struct pvr_shader_factory_info *shader_info,
1501    const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
1502    ASSERTED bool uses_tile_buffer,
1503    uint32_t tile_buffer_idx,
1504    struct pvr_suballoc_bo **const const_shareds_buffer_out)
1505 {
1506    struct pvr_device *device = cmd_buffer->device;
1507    struct pvr_suballoc_bo *const_shareds_buffer;
1508    struct pvr_bo *tile_buffer;
1509    uint64_t tile_dev_addr;
1510    uint32_t *buffer;
1511    VkResult result;
1512 
1513    /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
1514     * Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
1515     */
1516    result =
1517       pvr_cmd_buffer_alloc_mem(cmd_buffer,
1518                                device->heaps.general_heap,
1519                                PVR_DW_TO_BYTES(shader_info->const_shared_regs),
1520                                &const_shareds_buffer);
1521    if (result != VK_SUCCESS)
1522       return result;
1523 
1524    buffer = pvr_bo_suballoc_get_map_addr(const_shareds_buffer);
1525 
1526    for (uint32_t i = 0; i < PVR_CLEAR_ATTACHMENT_CONST_COUNT; i++) {
1527       uint32_t dest_idx = shader_info->driver_const_location_map[i];
1528 
1529       if (dest_idx == PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED)
1530          continue;
1531 
1532       assert(dest_idx < shader_info->const_shared_regs);
1533 
1534       switch (i) {
1535       case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0:
1536       case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1:
1537       case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2:
1538       case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3:
1539          buffer[dest_idx] = clear_color[i];
1540          break;
1541 
1542       case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER:
1543          assert(uses_tile_buffer);
1544          tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
1545          tile_dev_addr = tile_buffer->vma->dev_addr.addr;
1546          buffer[dest_idx] = (uint32_t)(tile_dev_addr >> 32);
1547          break;
1548 
1549       case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER:
1550          assert(uses_tile_buffer);
1551          tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
1552          tile_dev_addr = tile_buffer->vma->dev_addr.addr;
1553          buffer[dest_idx] = (uint32_t)tile_dev_addr;
1554          break;
1555 
1556       default:
1557          unreachable("Unsupported clear attachment const type.");
1558       }
1559    }
1560 
1561    for (uint32_t i = 0; i < shader_info->num_static_const; i++) {
1562       const struct pvr_static_buffer *static_buff =
1563          &shader_info->static_const_buffer[i];
1564 
1565       assert(static_buff->dst_idx < shader_info->const_shared_regs);
1566 
1567       buffer[static_buff->dst_idx] = static_buff->value;
1568    }
1569 
1570    *const_shareds_buffer_out = const_shareds_buffer;
1571 
1572    return VK_SUCCESS;
1573 }
1574 
pvr_clear_color_attachment_static(struct pvr_cmd_buffer * cmd_buffer,const struct usc_mrt_resource * mrt_resource,VkFormat format,uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],uint32_t template_idx,uint32_t stencil,bool vs_has_rt_id_output)1575 static VkResult pvr_clear_color_attachment_static(
1576    struct pvr_cmd_buffer *cmd_buffer,
1577    const struct usc_mrt_resource *mrt_resource,
1578    VkFormat format,
1579    uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
1580    uint32_t template_idx,
1581    uint32_t stencil,
1582    bool vs_has_rt_id_output)
1583 {
1584    struct pvr_device *device = cmd_buffer->device;
1585    ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1586    ASSERTED const bool has_eight_output_registers =
1587       PVR_HAS_FEATURE(dev_info, eight_output_registers);
1588    const struct pvr_device_static_clear_state *dev_clear_state =
1589       &device->static_clear_state;
1590    const bool uses_tile_buffer = mrt_resource->type ==
1591                                  USC_MRT_RESOURCE_TYPE_MEMORY;
1592    const struct pvr_pds_clear_attachment_program_info *clear_attachment_program;
1593    struct pvr_pds_pixel_shader_sa_program texture_program;
1594    uint32_t pds_state[PVR_STATIC_CLEAR_PDS_STATE_COUNT];
1595    const struct pvr_shader_factory_info *shader_info;
1596    struct pvr_suballoc_bo *pds_texture_program_bo;
1597    struct pvr_static_clear_ppp_template template;
1598    struct pvr_suballoc_bo *const_shareds_buffer;
1599    uint64_t pds_texture_program_addr;
1600    struct pvr_suballoc_bo *pvr_bo;
1601    uint32_t tile_buffer_idx = 0;
1602    uint32_t out_reg_count;
1603    uint32_t output_offset;
1604    uint32_t program_idx;
1605    uint32_t *buffer;
1606    VkResult result;
1607 
1608    out_reg_count =
1609       DIV_ROUND_UP(pvr_get_pbe_accum_format_size_in_bytes(format), 4U);
1610 
1611    if (uses_tile_buffer) {
1612       tile_buffer_idx = mrt_resource->mem.tile_buffer;
1613       output_offset = mrt_resource->mem.offset_dw;
1614    } else {
1615       output_offset = mrt_resource->reg.output_reg;
1616    }
1617 
1618    assert(has_eight_output_registers || out_reg_count + output_offset <= 4);
1619 
1620    program_idx = pvr_get_clear_attachment_program_index(out_reg_count,
1621                                                         output_offset,
1622                                                         uses_tile_buffer);
1623 
1624    shader_info = clear_attachment_collection[program_idx].info;
1625 
1626    result = pvr_clear_color_attachment_static_create_consts_buffer(
1627       cmd_buffer,
1628       shader_info,
1629       clear_color,
1630       uses_tile_buffer,
1631       tile_buffer_idx,
1632       &const_shareds_buffer);
1633    if (result != VK_SUCCESS)
1634       return result;
1635 
1636    /* clang-format off */
1637    texture_program = (struct pvr_pds_pixel_shader_sa_program){
1638       .num_texture_dma_kicks = 1,
1639       .texture_dma_address = {
1640          [0] = const_shareds_buffer->dev_addr.addr,
1641       }
1642    };
1643    /* clang-format on */
1644 
1645    pvr_csb_pack (&texture_program.texture_dma_control[0],
1646                  PDSINST_DOUT_FIELDS_DOUTD_SRC1,
1647                  doutd_src1) {
1648       doutd_src1.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
1649       doutd_src1.bsize = shader_info->const_shared_regs;
1650    }
1651 
1652    clear_attachment_program =
1653       &dev_clear_state->pds_clear_attachment_program_info[program_idx];
1654 
1655    /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
1656     * Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
1657     */
1658    result = pvr_cmd_buffer_alloc_mem(
1659       cmd_buffer,
1660       device->heaps.pds_heap,
1661       clear_attachment_program->texture_program_data_size,
1662       &pds_texture_program_bo);
1663    if (result != VK_SUCCESS) {
1664       list_del(&const_shareds_buffer->link);
1665       pvr_bo_suballoc_free(const_shareds_buffer);
1666 
1667       return result;
1668    }
1669 
1670    buffer = pvr_bo_suballoc_get_map_addr(pds_texture_program_bo);
1671    pds_texture_program_addr = pds_texture_program_bo->dev_addr.addr -
1672                               device->heaps.pds_heap->base_addr.addr;
1673 
1674    pvr_pds_generate_pixel_shader_sa_texture_state_data(
1675       &texture_program,
1676       buffer,
1677       &device->pdevice->dev_info);
1678 
1679    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SHADERBASE],
1680                  TA_STATE_PDS_SHADERBASE,
1681                  shaderbase) {
1682       shaderbase.addr = clear_attachment_program->pixel_program_offset;
1683    }
1684 
1685    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXUNICODEBASE],
1686                  TA_STATE_PDS_TEXUNICODEBASE,
1687                  texunicodebase) {
1688       texunicodebase.addr = clear_attachment_program->texture_program_offset;
1689    }
1690 
1691    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO1],
1692                  TA_STATE_PDS_SIZEINFO1,
1693                  sizeinfo1) {
1694       sizeinfo1.pds_texturestatesize = DIV_ROUND_UP(
1695          clear_attachment_program->texture_program_data_size,
1696          PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE));
1697 
1698       sizeinfo1.pds_tempsize =
1699          DIV_ROUND_UP(clear_attachment_program->texture_program_pds_temps_count,
1700                       PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE));
1701    }
1702 
1703    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO2],
1704                  TA_STATE_PDS_SIZEINFO2,
1705                  sizeinfo2) {
1706       sizeinfo2.usc_sharedsize =
1707          DIV_ROUND_UP(shader_info->const_shared_regs,
1708                       PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
1709    }
1710 
1711    /* Dummy coefficient loading program. */
1712    pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_VARYINGBASE] = 0;
1713 
1714    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXTUREDATABASE],
1715                  TA_STATE_PDS_TEXTUREDATABASE,
1716                  texturedatabase) {
1717       texturedatabase.addr = PVR_DEV_ADDR(pds_texture_program_addr);
1718    }
1719 
1720    assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
1721    template =
1722       cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
1723 
1724    template.config.pds_state = &pds_state;
1725 
1726    template.config.ispctl.upass =
1727       cmd_buffer->state.render_pass_info.isp_userpass;
1728 
1729    if (template_idx & VK_IMAGE_ASPECT_STENCIL_BIT)
1730       template.config.ispa.sref = stencil;
1731 
1732    if (vs_has_rt_id_output) {
1733       template.config.output_sel.rhw_pres = true;
1734       template.config.output_sel.render_tgt_pres = true;
1735       template.config.output_sel.vtxsize = 4 + 1;
1736    }
1737 
1738    result = pvr_emit_ppp_from_template(
1739       &cmd_buffer->state.current_sub_cmd->gfx.control_stream,
1740       &template,
1741       &pvr_bo);
1742    if (result != VK_SUCCESS) {
1743       list_del(&pds_texture_program_bo->link);
1744       pvr_bo_suballoc_free(pds_texture_program_bo);
1745 
1746       list_del(&const_shareds_buffer->link);
1747       pvr_bo_suballoc_free(const_shareds_buffer);
1748 
1749       return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
1750    }
1751 
1752    list_add(&pvr_bo->link, &cmd_buffer->bo_list);
1753 
1754    return VK_SUCCESS;
1755 }
1756 
1757 /**
1758  * \brief Record a deferred clear operation into the command buffer.
1759  *
1760  * Devices which don't have gs_rta_support require extra handling for RTA
1761  * clears. We setup a list of deferred clear transfer commands which will be
1762  * processed at the end of the graphics sub command to account for the missing
1763  * feature.
1764  */
pvr_add_deferred_rta_clear(struct pvr_cmd_buffer * cmd_buffer,const VkClearAttachment * attachment,const VkClearRect * rect,bool is_render_init)1765 static VkResult pvr_add_deferred_rta_clear(struct pvr_cmd_buffer *cmd_buffer,
1766                                            const VkClearAttachment *attachment,
1767                                            const VkClearRect *rect,
1768                                            bool is_render_init)
1769 {
1770    struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
1771    struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
1772    const struct pvr_renderpass_hwsetup_render *hw_render =
1773       &pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx];
1774    struct pvr_transfer_cmd *transfer_cmd_list;
1775    const struct pvr_image_view *image_view;
1776    const struct pvr_image *image;
1777    uint32_t base_layer;
1778 
1779    const VkOffset3D offset = {
1780       .x = rect->rect.offset.x,
1781       .y = rect->rect.offset.y,
1782       .z = 1,
1783    };
1784    const VkExtent3D extent = {
1785       .width = rect->rect.extent.width,
1786       .height = rect->rect.extent.height,
1787       .depth = 1,
1788    };
1789 
1790    assert(
1791       !PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, gs_rta_support));
1792 
1793    transfer_cmd_list = util_dynarray_grow(&cmd_buffer->deferred_clears,
1794                                           struct pvr_transfer_cmd,
1795                                           rect->layerCount);
1796    if (!transfer_cmd_list) {
1797       return vk_command_buffer_set_error(&cmd_buffer->vk,
1798                                          VK_ERROR_OUT_OF_HOST_MEMORY);
1799    }
1800 
1801    /* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019:
1802     *
1803     *    "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not
1804     *    include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT"
1805     *
1806     */
1807    if (attachment->aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) {
1808       assert(attachment->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ||
1809              attachment->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT ||
1810              attachment->aspectMask ==
1811                 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1812 
1813       image_view = pass_info->attachments[hw_render->ds_attach_idx];
1814    } else if (is_render_init) {
1815       uint32_t index;
1816 
1817       assert(attachment->colorAttachment < hw_render->color_init_count);
1818       index = hw_render->color_init[attachment->colorAttachment].index;
1819 
1820       image_view = pass_info->attachments[index];
1821    } else {
1822       const struct pvr_renderpass_hwsetup_subpass *hw_pass =
1823          pvr_get_hw_subpass(pass_info->pass, pass_info->subpass_idx);
1824       const struct pvr_render_subpass *sub_pass =
1825          &pass_info->pass->subpasses[hw_pass->index];
1826       const uint32_t attachment_idx =
1827          sub_pass->color_attachments[attachment->colorAttachment];
1828 
1829       assert(attachment->colorAttachment < sub_pass->color_count);
1830 
1831       image_view = pass_info->attachments[attachment_idx];
1832    }
1833 
1834    base_layer = image_view->vk.base_array_layer + rect->baseArrayLayer;
1835    image = vk_to_pvr_image(image_view->vk.image);
1836 
1837    for (uint32_t i = 0; i < rect->layerCount; i++) {
1838       struct pvr_transfer_cmd *transfer_cmd = &transfer_cmd_list[i];
1839 
1840       /* TODO: Add an init function for when we don't want to use
1841        * pvr_transfer_cmd_alloc()? And use it here.
1842        */
1843       *transfer_cmd = (struct pvr_transfer_cmd){
1844          .flags = PVR_TRANSFER_CMD_FLAGS_FILL,
1845          .cmd_buffer = cmd_buffer,
1846          .is_deferred_clear = true,
1847       };
1848 
1849       if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1850          for (uint32_t j = 0; j < ARRAY_SIZE(transfer_cmd->clear_color); j++) {
1851             transfer_cmd->clear_color[j].ui =
1852                attachment->clearValue.color.uint32[j];
1853          }
1854       } else {
1855          transfer_cmd->clear_color[0].f =
1856             attachment->clearValue.depthStencil.depth;
1857          transfer_cmd->clear_color[1].ui =
1858             attachment->clearValue.depthStencil.stencil;
1859       }
1860 
1861       pvr_setup_transfer_surface(cmd_buffer->device,
1862                                  &transfer_cmd->dst,
1863                                  &transfer_cmd->scissor,
1864                                  image,
1865                                  base_layer + i,
1866                                  0,
1867                                  &offset,
1868                                  &extent,
1869                                  0.0f,
1870                                  image->vk.format,
1871                                  attachment->aspectMask);
1872    }
1873 
1874    return VK_SUCCESS;
1875 }
1876 
pvr_clear_attachments(struct pvr_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t rect_count,const VkClearRect * rects,bool is_render_init)1877 static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
1878                                   uint32_t attachment_count,
1879                                   const VkClearAttachment *attachments,
1880                                   uint32_t rect_count,
1881                                   const VkClearRect *rects,
1882                                   bool is_render_init)
1883 {
1884    const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass;
1885    struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
1886    const struct pvr_renderpass_hwsetup_subpass *hw_pass =
1887       pvr_get_hw_subpass(pass, pass_info->subpass_idx);
1888    struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
1889    struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
1890    struct pvr_render_subpass *sub_pass = &pass->subpasses[hw_pass->index];
1891    uint32_t vs_output_size_in_bytes;
1892    bool vs_has_rt_id_output;
1893 
1894    /* TODO: This function can be optimized so that most of the device memory
1895     * gets allocated together in one go and then filled as needed. There might
1896     * also be opportunities to reuse pds code and data segments.
1897     */
1898 
1899    assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
1900 
1901    pvr_reset_graphics_dirty_state(cmd_buffer, false);
1902 
1903    /* We'll be emitting to the control stream. */
1904    sub_cmd->empty_cmd = false;
1905 
1906    vs_has_rt_id_output =
1907       pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
1908 
1909    /* 4 because we're expecting the USC to output X, Y, Z, and W. */
1910    vs_output_size_in_bytes = PVR_DW_TO_BYTES(4);
1911    if (vs_has_rt_id_output)
1912       vs_output_size_in_bytes += PVR_DW_TO_BYTES(1);
1913 
1914    for (uint32_t i = 0; i < attachment_count; i++) {
1915       const VkClearAttachment *attachment = &attachments[i];
1916       struct pvr_pds_vertex_shader_program pds_program;
1917       struct pvr_pds_upload pds_program_upload = { 0 };
1918       uint64_t current_base_array_layer = ~0;
1919       VkResult result;
1920       float depth;
1921 
1922       if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1923          uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
1924          const struct usc_mrt_resource *mrt_resource;
1925          uint32_t global_attachment_idx;
1926          uint32_t local_attachment_idx;
1927          VkFormat format;
1928 
1929          local_attachment_idx = attachment->colorAttachment;
1930 
1931          if (is_render_init) {
1932             struct pvr_renderpass_hwsetup_render *hw_render;
1933 
1934             assert(pass->hw_setup->render_count > 0);
1935             hw_render = &pass->hw_setup->renders[0];
1936 
1937             mrt_resource =
1938                &hw_render->init_setup.mrt_resources[local_attachment_idx];
1939 
1940             assert(local_attachment_idx < hw_render->color_init_count);
1941             global_attachment_idx =
1942                hw_render->color_init[local_attachment_idx].index;
1943          } else {
1944             mrt_resource = &hw_pass->setup.mrt_resources[local_attachment_idx];
1945 
1946             assert(local_attachment_idx < sub_pass->color_count);
1947             global_attachment_idx =
1948                sub_pass->color_attachments[local_attachment_idx];
1949          }
1950 
1951          if (global_attachment_idx == VK_ATTACHMENT_UNUSED)
1952             continue;
1953 
1954          assert(global_attachment_idx < pass->attachment_count);
1955          format = pass->attachments[global_attachment_idx].vk_format;
1956 
1957          assert(format != VK_FORMAT_UNDEFINED);
1958 
1959          pvr_get_hw_clear_color(format,
1960                                 attachment->clearValue.color,
1961                                 packed_clear_color);
1962 
1963          result = pvr_clear_color_attachment_static(cmd_buffer,
1964                                                     mrt_resource,
1965                                                     format,
1966                                                     packed_clear_color,
1967                                                     VK_IMAGE_ASPECT_COLOR_BIT,
1968                                                     0,
1969                                                     vs_has_rt_id_output);
1970          if (result != VK_SUCCESS)
1971             return;
1972       } else if (hw_pass->z_replicate != -1 &&
1973                  attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1974          const VkClearColorValue clear_color = {
1975             .float32 = { [0] = attachment->clearValue.depthStencil.depth, },
1976          };
1977          const uint32_t template_idx = attachment->aspectMask |
1978                                        VK_IMAGE_ASPECT_COLOR_BIT;
1979          const uint32_t stencil = attachment->clearValue.depthStencil.stencil;
1980          uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
1981          const struct usc_mrt_resource *mrt_resource;
1982 
1983          mrt_resource = &hw_pass->setup.mrt_resources[hw_pass->z_replicate];
1984 
1985          pvr_get_hw_clear_color(VK_FORMAT_R32_SFLOAT,
1986                                 clear_color,
1987                                 packed_clear_color);
1988 
1989          result = pvr_clear_color_attachment_static(cmd_buffer,
1990                                                     mrt_resource,
1991                                                     VK_FORMAT_R32_SFLOAT,
1992                                                     packed_clear_color,
1993                                                     template_idx,
1994                                                     stencil,
1995                                                     vs_has_rt_id_output);
1996          if (result != VK_SUCCESS)
1997             return;
1998       } else {
1999          const uint32_t template_idx = attachment->aspectMask;
2000          struct pvr_static_clear_ppp_template template;
2001          struct pvr_suballoc_bo *pvr_bo;
2002 
2003          assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
2004          template =
2005             cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
2006 
2007          if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2008             template.config.ispa.sref =
2009                attachment->clearValue.depthStencil.stencil;
2010          }
2011 
2012          if (vs_has_rt_id_output) {
2013             template.config.output_sel.rhw_pres = true;
2014             template.config.output_sel.render_tgt_pres = true;
2015             template.config.output_sel.vtxsize = 4 + 1;
2016          }
2017 
2018          result = pvr_emit_ppp_from_template(&sub_cmd->control_stream,
2019                                              &template,
2020                                              &pvr_bo);
2021          if (result != VK_SUCCESS) {
2022             pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2023             return;
2024          }
2025 
2026          list_add(&pvr_bo->link, &cmd_buffer->bo_list);
2027       }
2028 
2029       if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2030          depth = attachment->clearValue.depthStencil.depth;
2031       else
2032          depth = 1.0f;
2033 
2034       if (vs_has_rt_id_output) {
2035          const struct pvr_device_static_clear_state *dev_clear_state =
2036             &cmd_buffer->device->static_clear_state;
2037          const struct pvr_suballoc_bo *multi_layer_vert_bo =
2038             dev_clear_state->usc_multi_layer_vertex_shader_bo;
2039 
2040          /* We can't use the device's passthrough pds program since it doesn't
2041           * have iterate_instance_id enabled. We'll be uploading code sections
2042           * per each clear rect.
2043           */
2044 
2045          /* TODO: See if we can allocate all the code section memory in one go.
2046           * We'd need to make sure that changing instance_id_modifier doesn't
2047           * change the code section size.
2048           * Also check if we can reuse the same code segment for each rect.
2049           * Seems like the instance_id_modifier is written into the data section
2050           * and used by the pds ADD instruction that way instead of it being
2051           * embedded into the code section.
2052           */
2053 
2054          pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program,
2055                                                            multi_layer_vert_bo);
2056       } else {
2057          /* We can reuse the device's code section but we'll need to upload data
2058           * sections so initialize the program.
2059           */
2060          pvr_pds_clear_vertex_shader_program_init_base(
2061             &pds_program,
2062             cmd_buffer->device->static_clear_state.usc_vertex_shader_bo);
2063 
2064          pds_program_upload.code_offset =
2065             cmd_buffer->device->static_clear_state.pds.code_offset;
2066          /* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe
2067           * let's change its interface to make that clear and not set this?
2068           */
2069          pds_program_upload.code_size =
2070             cmd_buffer->device->static_clear_state.pds.code_size;
2071       }
2072 
2073       for (uint32_t j = 0; j < rect_count; j++) {
2074          struct pvr_pds_upload pds_program_data_upload;
2075          const VkClearRect *clear_rect = &rects[j];
2076          struct pvr_suballoc_bo *vertices_bo;
2077          uint32_t vdm_cs_size_in_dw;
2078          uint32_t *vdm_cs_buffer;
2079          VkResult result;
2080 
2081          if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) &&
2082              (clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) {
2083             result = pvr_add_deferred_rta_clear(cmd_buffer,
2084                                                 attachment,
2085                                                 clear_rect,
2086                                                 is_render_init);
2087             if (result != VK_SUCCESS)
2088                return;
2089 
2090             if (clear_rect->baseArrayLayer != 0)
2091                continue;
2092          }
2093 
2094          /* TODO: Allocate all the buffers in one go before the loop, and add
2095           * support to multi-alloc bo.
2096           */
2097          result = pvr_clear_vertices_upload(cmd_buffer->device,
2098                                             &clear_rect->rect,
2099                                             depth,
2100                                             &vertices_bo);
2101          if (result != VK_SUCCESS) {
2102             pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2103             return;
2104          }
2105 
2106          list_add(&vertices_bo->link, &cmd_buffer->bo_list);
2107 
2108          if (vs_has_rt_id_output) {
2109             if (current_base_array_layer != clear_rect->baseArrayLayer) {
2110                const uint32_t base_array_layer = clear_rect->baseArrayLayer;
2111                struct pvr_pds_upload pds_program_code_upload;
2112 
2113                result =
2114                   pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
2115                      &pds_program,
2116                      cmd_buffer,
2117                      base_array_layer,
2118                      &pds_program_code_upload);
2119                if (result != VK_SUCCESS) {
2120                   pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2121                   return;
2122                }
2123 
2124                pds_program_upload.code_offset =
2125                   pds_program_code_upload.code_offset;
2126                /* TODO: The code size doesn't get used by pvr_clear_vdm_state()
2127                 * maybe let's change its interface to make that clear and not
2128                 * set this?
2129                 */
2130                pds_program_upload.code_size = pds_program_code_upload.code_size;
2131 
2132                current_base_array_layer = base_array_layer;
2133             }
2134 
2135             result =
2136                pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data(
2137                   &pds_program,
2138                   cmd_buffer,
2139                   vertices_bo,
2140                   &pds_program_data_upload);
2141             if (result != VK_SUCCESS)
2142                return;
2143          } else {
2144             result = pvr_pds_clear_vertex_shader_program_create_and_upload_data(
2145                &pds_program,
2146                cmd_buffer,
2147                vertices_bo,
2148                &pds_program_data_upload);
2149             if (result != VK_SUCCESS)
2150                return;
2151          }
2152 
2153          pds_program_upload.data_offset = pds_program_data_upload.data_offset;
2154          pds_program_upload.data_size = pds_program_data_upload.data_size;
2155 
2156          vdm_cs_size_in_dw =
2157             pvr_clear_vdm_state_get_size_in_dw(dev_info,
2158                                                clear_rect->layerCount);
2159 
2160          pvr_csb_set_relocation_mark(&sub_cmd->control_stream);
2161 
2162          vdm_cs_buffer =
2163             pvr_csb_alloc_dwords(&sub_cmd->control_stream, vdm_cs_size_in_dw);
2164          if (!vdm_cs_buffer) {
2165             pvr_cmd_buffer_set_error_unwarned(cmd_buffer,
2166                                               sub_cmd->control_stream.status);
2167             return;
2168          }
2169 
2170          pvr_pack_clear_vdm_state(dev_info,
2171                                   &pds_program_upload,
2172                                   pds_program.temps_used,
2173                                   4,
2174                                   vs_output_size_in_bytes,
2175                                   clear_rect->layerCount,
2176                                   vdm_cs_buffer);
2177 
2178          pvr_csb_clear_relocation_mark(&sub_cmd->control_stream);
2179       }
2180    }
2181 }
2182 
pvr_clear_attachments_render_init(struct pvr_cmd_buffer * cmd_buffer,const VkClearAttachment * attachment,const VkClearRect * rect)2183 void pvr_clear_attachments_render_init(struct pvr_cmd_buffer *cmd_buffer,
2184                                        const VkClearAttachment *attachment,
2185                                        const VkClearRect *rect)
2186 {
2187    pvr_clear_attachments(cmd_buffer, 1, attachment, 1, rect, true);
2188 }
2189 
pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2190 void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
2191                              uint32_t attachmentCount,
2192                              const VkClearAttachment *pAttachments,
2193                              uint32_t rectCount,
2194                              const VkClearRect *pRects)
2195 {
2196    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2197    struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2198    struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx;
2199 
2200    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2201    assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
2202 
2203    /* TODO: There are some optimizations that can be made here:
2204     *  - For a full screen clear, update the clear values for the corresponding
2205     *    attachment index.
2206     *  - For a full screen color attachment clear, add its index to a load op
2207     *    override to add it to the background shader. This will elide any load
2208     *    op loads currently in the background shader as well as the usual
2209     *    frag kick for geometry clear.
2210     */
2211 
2212    /* If we have any depth/stencil clears, update the sub command depth/stencil
2213     * modification and usage flags.
2214     */
2215    if (state->depth_format != VK_FORMAT_UNDEFINED) {
2216       uint32_t full_screen_clear_count;
2217       bool has_stencil_clear = false;
2218       bool has_depth_clear = false;
2219 
2220       for (uint32_t i = 0; i < attachmentCount; i++) {
2221          const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask;
2222 
2223          if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
2224             has_stencil_clear = true;
2225 
2226          if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
2227             has_depth_clear = true;
2228 
2229          if (has_stencil_clear && has_depth_clear)
2230             break;
2231       }
2232 
2233       sub_cmd->modifies_stencil |= has_stencil_clear;
2234       sub_cmd->modifies_depth |= has_depth_clear;
2235 
2236       /* We only care about clears that have a baseArrayLayer of 0 as any
2237        * attachment clears we move to the background shader must apply to all of
2238        * the attachment's sub resources.
2239        */
2240       full_screen_clear_count =
2241          pvr_get_max_layers_covering_target(state->render_pass_info.render_area,
2242                                             0,
2243                                             rectCount,
2244                                             pRects);
2245 
2246       if (full_screen_clear_count > 0) {
2247          if (has_stencil_clear &&
2248              sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
2249             sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
2250          }
2251 
2252          if (has_depth_clear &&
2253              sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
2254             sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
2255          }
2256       }
2257    }
2258 
2259    pvr_clear_attachments(cmd_buffer,
2260                          attachmentCount,
2261                          pAttachments,
2262                          rectCount,
2263                          pRects,
2264                          false);
2265 }
2266 
pvr_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)2267 void pvr_CmdResolveImage2(VkCommandBuffer commandBuffer,
2268                              const VkResolveImageInfo2 *pResolveImageInfo)
2269 {
2270    PVR_FROM_HANDLE(pvr_image, src, pResolveImageInfo->srcImage);
2271    PVR_FROM_HANDLE(pvr_image, dst, pResolveImageInfo->dstImage);
2272    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2273 
2274    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2275 
2276    for (uint32_t i = 0U; i < pResolveImageInfo->regionCount; i++) {
2277       VkImageCopy2 region = {
2278          .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
2279          .srcSubresource = pResolveImageInfo->pRegions[i].srcSubresource,
2280          .srcOffset = pResolveImageInfo->pRegions[i].srcOffset,
2281          .dstSubresource = pResolveImageInfo->pRegions[i].dstSubresource,
2282          .dstOffset = pResolveImageInfo->pRegions[i].dstOffset,
2283          .extent = pResolveImageInfo->pRegions[i].extent,
2284       };
2285 
2286       VkResult result =
2287          pvr_copy_or_resolve_color_image_region(cmd_buffer, src, dst, &region);
2288       if (result != VK_SUCCESS)
2289          return;
2290    }
2291 }
2292