xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/anv_blorp.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_private.h"
25 #include "genxml/gen8_pack.h"
26 
27 static bool
lookup_blorp_shader(struct blorp_batch * batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)28 lookup_blorp_shader(struct blorp_batch *batch,
29                     const void *key, uint32_t key_size,
30                     uint32_t *kernel_out, void *prog_data_out)
31 {
32    struct blorp_context *blorp = batch->blorp;
33    struct anv_device *device = blorp->driver_ctx;
34 
35    struct anv_shader_bin *bin =
36       anv_device_search_for_kernel(device, device->internal_cache,
37                                    key, key_size, NULL);
38    if (!bin)
39       return false;
40 
41    /* The cache already has a reference and it's not going anywhere so there
42     * is no need to hold a second reference.
43     */
44    anv_shader_bin_unref(device, bin);
45 
46    *kernel_out = bin->kernel.offset;
47    *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
48 
49    return true;
50 }
51 
52 static bool
upload_blorp_shader(struct blorp_batch * batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const void * prog_data,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)53 upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
54                     const void *key, uint32_t key_size,
55                     const void *kernel, uint32_t kernel_size,
56                     const void *prog_data,
57                     uint32_t prog_data_size,
58                     uint32_t *kernel_out, void *prog_data_out)
59 {
60    struct blorp_context *blorp = batch->blorp;
61    struct anv_device *device = blorp->driver_ctx;
62 
63    struct anv_pipeline_bind_map empty_bind_map = {};
64    struct anv_push_descriptor_info empty_push_desc_info = {};
65    struct anv_shader_upload_params upload_params = {
66       .stage               = stage,
67       .key_data            = key,
68       .key_size            = key_size,
69       .kernel_data         = kernel,
70       .kernel_size         = kernel_size,
71       .prog_data           = prog_data,
72       .prog_data_size      = prog_data_size,
73       .bind_map            = &empty_bind_map,
74       .push_desc_info      = &empty_push_desc_info,
75    };
76 
77    struct anv_shader_bin *bin =
78       anv_device_upload_kernel(device, device->internal_cache, &upload_params);
79 
80    if (!bin)
81       return false;
82 
83    /* The cache already has a reference and it's not going anywhere so there
84     * is no need to hold a second reference.
85     */
86    anv_shader_bin_unref(device, bin);
87 
88    *kernel_out = bin->kernel.offset;
89    *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
90 
91    return true;
92 }
93 
94 static void
upload_dynamic_state(struct blorp_context * context,const void * data,uint32_t size,uint32_t alignment,enum blorp_dynamic_state name)95 upload_dynamic_state(struct blorp_context *context,
96                      const void *data, uint32_t size,
97                      uint32_t alignment, enum blorp_dynamic_state name)
98 {
99    struct anv_device *device = context->driver_ctx;
100 
101    device->blorp.dynamic_states[name] =
102       anv_state_pool_emit_data(&device->dynamic_state_pool,
103                                size, alignment, data);
104 }
105 
106 void
anv_device_init_blorp(struct anv_device * device)107 anv_device_init_blorp(struct anv_device *device)
108 {
109    const struct blorp_config config = {
110       .use_mesh_shading = device->vk.enabled_extensions.EXT_mesh_shader,
111       .use_unrestricted_depth_range =
112          device->vk.enabled_extensions.EXT_depth_range_unrestricted,
113       .use_cached_dynamic_states = true,
114    };
115 
116    blorp_init_brw(&device->blorp.context, device, &device->isl_dev,
117                   device->physical->compiler, &config);
118    device->blorp.context.lookup_shader = lookup_blorp_shader;
119    device->blorp.context.upload_shader = upload_blorp_shader;
120    device->blorp.context.enable_tbimr = device->physical->instance->enable_tbimr;
121    device->blorp.context.exec = anv_genX(device->info, blorp_exec);
122    device->blorp.context.upload_dynamic_state = upload_dynamic_state;
123 
124    anv_genX(device->info, blorp_init_dynamic_states)(&device->blorp.context);
125 }
126 
127 void
anv_device_finish_blorp(struct anv_device * device)128 anv_device_finish_blorp(struct anv_device *device)
129 {
130 #ifdef HAVE_VALGRIND
131    /* We only need to free these to prevent valgrind errors.  The backing
132     * BO will go away in a couple of lines so we don't actually leak.
133     */
134    for (uint32_t i = 0; i < ARRAY_SIZE(device->blorp.dynamic_states); i++) {
135       anv_state_pool_free(&device->dynamic_state_pool,
136                           device->blorp.dynamic_states[i]);
137    }
138 #endif
139    blorp_finish(&device->blorp.context);
140 }
141 
142 static void
anv_blorp_batch_init(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,enum blorp_batch_flags flags)143 anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
144                      struct blorp_batch *batch, enum blorp_batch_flags flags)
145 {
146    VkQueueFlags queue_flags = cmd_buffer->queue_family->queueFlags;
147 
148    if (queue_flags & VK_QUEUE_GRAPHICS_BIT) {
149       /* blorp runs on render engine by default */
150    } else if (queue_flags & VK_QUEUE_COMPUTE_BIT) {
151       flags |= BLORP_BATCH_USE_COMPUTE;
152    } else if (queue_flags & VK_QUEUE_TRANSFER_BIT) {
153       flags |= BLORP_BATCH_USE_BLITTER;
154    } else {
155       unreachable("unknown queue family");
156    }
157 
158    /* Can't have both flags at the same time. */
159    assert((flags & BLORP_BATCH_USE_BLITTER) == 0 ||
160           (flags & BLORP_BATCH_USE_COMPUTE) == 0);
161 
162    blorp_batch_init(&cmd_buffer->device->blorp.context, batch, cmd_buffer, flags);
163 }
164 
165 static void
anv_blorp_batch_finish(struct blorp_batch * batch)166 anv_blorp_batch_finish(struct blorp_batch *batch)
167 {
168    blorp_batch_finish(batch);
169 }
170 
171 static isl_surf_usage_flags_t
get_usage_flag_for_cmd_buffer(const struct anv_cmd_buffer * cmd_buffer,bool is_dest,bool protected)172 get_usage_flag_for_cmd_buffer(const struct anv_cmd_buffer *cmd_buffer,
173                               bool is_dest, bool protected)
174 {
175    isl_surf_usage_flags_t usage;
176 
177    switch (cmd_buffer->queue_family->engine_class) {
178    case INTEL_ENGINE_CLASS_RENDER:
179       usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT :
180                         ISL_SURF_USAGE_TEXTURE_BIT;
181       break;
182    case INTEL_ENGINE_CLASS_COMPUTE:
183       usage = is_dest ? ISL_SURF_USAGE_STORAGE_BIT :
184                         ISL_SURF_USAGE_TEXTURE_BIT;
185       break;
186    case INTEL_ENGINE_CLASS_COPY:
187       usage = is_dest ? ISL_SURF_USAGE_BLITTER_DST_BIT :
188                         ISL_SURF_USAGE_BLITTER_SRC_BIT;
189       break;
190    default:
191       unreachable("Unhandled engine class");
192    }
193 
194    if (protected)
195       usage |= ISL_SURF_USAGE_PROTECTED_BIT;
196 
197    return usage;
198 }
199 
200 static void
get_blorp_surf_for_anv_address(struct anv_cmd_buffer * cmd_buffer,struct anv_address address,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,bool protected,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)201 get_blorp_surf_for_anv_address(struct anv_cmd_buffer *cmd_buffer,
202                                struct anv_address address,
203                                uint32_t width, uint32_t height,
204                                uint32_t row_pitch, enum isl_format format,
205                                bool is_dest, bool protected,
206                                struct blorp_surf *blorp_surf,
207                                struct isl_surf *isl_surf)
208 {
209    bool ok UNUSED;
210    isl_surf_usage_flags_t usage =
211       get_usage_flag_for_cmd_buffer(cmd_buffer, is_dest, protected);
212 
213    *blorp_surf = (struct blorp_surf) {
214       .surf = isl_surf,
215       .addr = {
216          .buffer = address.bo,
217          .offset = address.offset,
218          .mocs = anv_mocs(cmd_buffer->device, address.bo, usage),
219       },
220    };
221 
222    ok = isl_surf_init(&cmd_buffer->device->isl_dev, isl_surf,
223                      .dim = ISL_SURF_DIM_2D,
224                      .format = format,
225                      .width = width,
226                      .height = height,
227                      .depth = 1,
228                      .levels = 1,
229                      .array_len = 1,
230                      .samples = 1,
231                      .row_pitch_B = row_pitch,
232                      .usage = usage,
233                      .tiling_flags = ISL_TILING_LINEAR_BIT);
234    assert(ok);
235 }
236 
237 static void
get_blorp_surf_for_anv_buffer(struct anv_cmd_buffer * cmd_buffer,struct anv_buffer * buffer,uint64_t offset,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)238 get_blorp_surf_for_anv_buffer(struct anv_cmd_buffer *cmd_buffer,
239                               struct anv_buffer *buffer, uint64_t offset,
240                               uint32_t width, uint32_t height,
241                               uint32_t row_pitch, enum isl_format format,
242                               bool is_dest,
243                               struct blorp_surf *blorp_surf,
244                               struct isl_surf *isl_surf)
245 {
246    get_blorp_surf_for_anv_address(cmd_buffer,
247                                   anv_address_add(buffer->address, offset),
248                                   width, height, row_pitch, format,
249                                   is_dest, anv_buffer_is_protected(buffer),
250                                   blorp_surf, isl_surf);
251 }
252 
253 /* Pick something high enough that it won't be used in core and low enough it
254  * will never map to an extension.
255  */
256 #define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
257 
258 static struct blorp_address
anv_to_blorp_address(struct anv_address addr)259 anv_to_blorp_address(struct anv_address addr)
260 {
261    return (struct blorp_address) {
262       .buffer = addr.bo,
263       .offset = addr.offset,
264    };
265 }
266 
267 static void
get_blorp_surf_for_anv_image(const struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspect,VkImageUsageFlags usage,VkImageLayout layout,enum isl_aux_usage aux_usage,struct blorp_surf * blorp_surf)268 get_blorp_surf_for_anv_image(const struct anv_cmd_buffer *cmd_buffer,
269                              const struct anv_image *image,
270                              VkImageAspectFlags aspect,
271                              VkImageUsageFlags usage,
272                              VkImageLayout layout,
273                              enum isl_aux_usage aux_usage,
274                              struct blorp_surf *blorp_surf)
275 {
276    const struct anv_device *device = cmd_buffer->device;
277    const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
278 
279    if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
280       assert(usage != 0);
281       aux_usage = anv_layout_to_aux_usage(device->info, image,
282                                           aspect, usage, layout,
283                                           cmd_buffer->queue_family->queueFlags);
284    }
285 
286    isl_surf_usage_flags_t isl_usage =
287       get_usage_flag_for_cmd_buffer(cmd_buffer,
288                                     usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT,
289                                     anv_image_is_protected(image));
290    const struct anv_surface *surface = &image->planes[plane].primary_surface;
291    const struct anv_address address =
292       anv_image_address(image, &surface->memory_range);
293 
294    *blorp_surf = (struct blorp_surf) {
295       .surf = &surface->isl,
296       .addr = {
297          .buffer = address.bo,
298          .offset = address.offset,
299          .mocs = anv_mocs(device, address.bo, isl_usage),
300       },
301    };
302 
303    if (aux_usage != ISL_AUX_USAGE_NONE) {
304       const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
305       const struct anv_address aux_address =
306          anv_image_address(image, &aux_surface->memory_range);
307 
308       blorp_surf->aux_usage = aux_usage;
309       blorp_surf->aux_surf = &aux_surface->isl;
310 
311       if (!anv_address_is_null(aux_address)) {
312          blorp_surf->aux_addr = (struct blorp_address) {
313             .buffer = aux_address.bo,
314             .offset = aux_address.offset,
315             .mocs = anv_mocs(device, aux_address.bo, isl_usage),
316          };
317       }
318 
319       /* If we're doing a partial resolve, then we need the indirect clear
320        * color.  If we are doing a fast clear and want to store/update the
321        * clear color, we also pass the address to blorp, otherwise it will only
322        * stomp the CCS to a particular value and won't care about format or
323        * clear value
324        */
325       if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
326          const struct anv_address clear_color_addr =
327             anv_image_get_clear_color_addr(device, image, aspect);
328          blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
329       } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
330          const struct anv_address clear_color_addr =
331             anv_image_get_clear_color_addr(device, image, aspect);
332          blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
333          blorp_surf->clear_color = anv_image_hiz_clear_value(image);
334       }
335    }
336 }
337 
338 static void
copy_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageCopy2 * region)339 copy_image(struct anv_cmd_buffer *cmd_buffer,
340            struct blorp_batch *batch,
341            struct anv_image *src_image,
342            VkImageLayout src_image_layout,
343            struct anv_image *dst_image,
344            VkImageLayout dst_image_layout,
345            const VkImageCopy2 *region)
346 {
347    VkOffset3D srcOffset =
348       vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
349    VkOffset3D dstOffset =
350       vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
351    VkExtent3D extent =
352       vk_image_sanitize_extent(&src_image->vk, region->extent);
353 
354    const uint32_t dst_level = region->dstSubresource.mipLevel;
355    unsigned dst_base_layer, layer_count;
356    if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
357       dst_base_layer = region->dstOffset.z;
358       layer_count = region->extent.depth;
359    } else {
360       dst_base_layer = region->dstSubresource.baseArrayLayer;
361       layer_count = vk_image_subresource_layer_count(&dst_image->vk,
362                                                      &region->dstSubresource);
363    }
364 
365    const uint32_t src_level = region->srcSubresource.mipLevel;
366    unsigned src_base_layer;
367    if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
368       src_base_layer = region->srcOffset.z;
369    } else {
370       src_base_layer = region->srcSubresource.baseArrayLayer;
371       assert(layer_count ==
372              vk_image_subresource_layer_count(&src_image->vk,
373                                               &region->srcSubresource));
374    }
375 
376    VkImageAspectFlags src_mask = region->srcSubresource.aspectMask,
377       dst_mask = region->dstSubresource.aspectMask;
378 
379    assert(anv_image_aspects_compatible(src_mask, dst_mask));
380 
381    if (util_bitcount(src_mask) > 1) {
382       anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
383          struct blorp_surf src_surf, dst_surf;
384          get_blorp_surf_for_anv_image(cmd_buffer,
385                                       src_image, 1UL << aspect_bit,
386                                       VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
387                                       src_image_layout, ISL_AUX_USAGE_NONE,
388                                       &src_surf);
389          get_blorp_surf_for_anv_image(cmd_buffer,
390                                       dst_image, 1UL << aspect_bit,
391                                       VK_IMAGE_USAGE_TRANSFER_DST_BIT,
392                                       dst_image_layout, ISL_AUX_USAGE_NONE,
393                                       &dst_surf);
394          anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
395                                            1UL << aspect_bit,
396                                            dst_surf.aux_usage, dst_level,
397                                            dst_base_layer, layer_count);
398 
399          for (unsigned i = 0; i < layer_count; i++) {
400             blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
401                        &dst_surf, dst_level, dst_base_layer + i,
402                        srcOffset.x, srcOffset.y,
403                        dstOffset.x, dstOffset.y,
404                        extent.width, extent.height);
405          }
406       }
407    } else {
408       /* This case handles the ycbcr images, aspect mask are compatible but
409        * don't need to be the same.
410        */
411       struct blorp_surf src_surf, dst_surf;
412       get_blorp_surf_for_anv_image(cmd_buffer, src_image, src_mask,
413                                    VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
414                                    src_image_layout, ISL_AUX_USAGE_NONE,
415                                    &src_surf);
416       get_blorp_surf_for_anv_image(cmd_buffer, dst_image, dst_mask,
417                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
418                                    dst_image_layout, ISL_AUX_USAGE_NONE,
419                                    &dst_surf);
420       anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
421                                         dst_surf.aux_usage, dst_level,
422                                         dst_base_layer, layer_count);
423 
424       for (unsigned i = 0; i < layer_count; i++) {
425          blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
426                     &dst_surf, dst_level, dst_base_layer + i,
427                     srcOffset.x, srcOffset.y,
428                     dstOffset.x, dstOffset.y,
429                     extent.width, extent.height);
430       }
431    }
432 }
433 
434 static struct anv_state
record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer * cmd_buffer)435 record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer)
436 {
437    const struct intel_device_info *info = cmd_buffer->device->info;
438 
439    const VkResult result = anv_cmd_buffer_ensure_rcs_companion(cmd_buffer);
440    if (result != VK_SUCCESS) {
441       anv_batch_set_error(&cmd_buffer->batch, result);
442       return ANV_STATE_NULL;
443    }
444 
445    assert(cmd_buffer->companion_rcs_cmd_buffer != NULL);
446 
447    /* Re-emit the aux table register in every command buffer.  This way we're
448     * ensured that we have the table even if this command buffer doesn't
449     * initialize any images.
450     */
451    if (cmd_buffer->device->info->has_aux_map) {
452       anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer,
453                                  ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
454                                  "new cmd buffer with aux-tt");
455    }
456 
457    return anv_genX(info, cmd_buffer_begin_companion_rcs_syncpoint)(cmd_buffer);
458 }
459 
460 static void
end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer * cmd_buffer,struct anv_state syncpoint)461 end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer,
462                              struct anv_state syncpoint)
463 {
464    const struct intel_device_info *info = cmd_buffer->device->info;
465    anv_genX(info, cmd_buffer_end_companion_rcs_syncpoint)(cmd_buffer,
466                                                           syncpoint);
467 }
468 
469 static bool
anv_blorp_blitter_execute_on_companion(struct anv_cmd_buffer * cmd_buffer,struct anv_image * image,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)470 anv_blorp_blitter_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
471                                        struct anv_image *image,
472                                        const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo,
473                                        const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
474 {
475    if (!anv_cmd_buffer_is_blitter_queue(cmd_buffer))
476       return false;
477 
478    assert((pCopyBufferToImageInfo && !pCopyImageToBufferInfo) ||
479           (pCopyImageToBufferInfo && !pCopyBufferToImageInfo));
480 
481    bool blorp_execute_on_companion = false;
482    VkImageAspectFlags aspect_mask = VK_IMAGE_ASPECT_NONE;
483    const uint32_t region_count = pCopyBufferToImageInfo ?
484                                  pCopyBufferToImageInfo->regionCount :
485                                  pCopyImageToBufferInfo->regionCount;
486 
487    for (unsigned r = 0; r < region_count &&
488                             !blorp_execute_on_companion; r++) {
489       if (pCopyBufferToImageInfo) {
490          aspect_mask =
491             pCopyBufferToImageInfo->pRegions[r].imageSubresource.aspectMask;
492       } else {
493          aspect_mask =
494             pCopyImageToBufferInfo->pRegions[r].imageSubresource.aspectMask;
495       }
496 
497       enum isl_format linear_format =
498          anv_get_isl_format(cmd_buffer->device->info, image->vk.format,
499                             aspect_mask, VK_IMAGE_TILING_LINEAR);
500       const struct isl_format_layout *linear_fmtl =
501          isl_format_get_layout(linear_format);
502 
503       switch (linear_fmtl->bpb) {
504       case 96:
505          /* We can only support linear mode for 96bpp on blitter engine. */
506          blorp_execute_on_companion |=
507             image->vk.tiling != VK_IMAGE_TILING_LINEAR;
508          break;
509       default:
510          blorp_execute_on_companion |= linear_fmtl->bpb % 3 == 0;
511          break;
512       }
513    }
514 
515    return blorp_execute_on_companion;
516 }
517 
518 static bool
anv_blorp_execute_on_companion(struct anv_cmd_buffer * cmd_buffer,struct anv_image * dst_image)519 anv_blorp_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
520                                struct anv_image *dst_image)
521 {
522    /* MSAA images have to be dealt with on the companion RCS command buffer
523     * for both CCS && BCS engines.
524     */
525    if ((anv_cmd_buffer_is_blitter_queue(cmd_buffer) ||
526         anv_cmd_buffer_is_compute_queue(cmd_buffer)) &&
527        dst_image->vk.samples > 1)
528       return true;
529 
530    /* Emulation of formats is done through a compute shader, so we need
531     * the companion command buffer for the BCS engine.
532     */
533    if (anv_cmd_buffer_is_blitter_queue(cmd_buffer) &&
534        dst_image->emu_plane_format != VK_FORMAT_UNDEFINED)
535       return true;
536 
537    return false;
538 }
539 
anv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)540 void anv_CmdCopyImage2(
541     VkCommandBuffer                             commandBuffer,
542     const VkCopyImageInfo2*                     pCopyImageInfo)
543 {
544    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
545    ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
546    ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
547 
548    struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
549    UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
550 
551    if (anv_blorp_execute_on_companion(cmd_buffer, dst_image)) {
552       rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
553       cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
554    }
555 
556    struct blorp_batch batch;
557    anv_blorp_batch_init(cmd_buffer, &batch, 0);
558 
559    for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
560       copy_image(cmd_buffer, &batch,
561                  src_image, pCopyImageInfo->srcImageLayout,
562                  dst_image, pCopyImageInfo->dstImageLayout,
563                  &pCopyImageInfo->pRegions[r]);
564    }
565 
566    anv_blorp_batch_finish(&batch);
567 
568    if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
569       assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
570       const enum anv_pipe_bits pipe_bits =
571          anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
572          ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
573          ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
574       anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
575                                 "Copy flush before astc emu");
576 
577       for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
578          const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r];
579          const VkOffset3D block_offset = vk_image_offset_to_elements(
580                &dst_image->vk, region->dstOffset);
581          const VkExtent3D block_extent = vk_image_extent_to_elements(
582                &src_image->vk, region->extent);
583          anv_astc_emu_process(cmd_buffer, dst_image,
584                               pCopyImageInfo->dstImageLayout,
585                               &region->dstSubresource,
586                               block_offset, block_extent);
587       }
588    }
589 
590    if (rcs_done.alloc_size)
591       end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
592 }
593 
594 static enum isl_format
isl_format_for_size(unsigned size_B)595 isl_format_for_size(unsigned size_B)
596 {
597    /* Prefer 32-bit per component formats for CmdFillBuffer */
598    switch (size_B) {
599    case 1:  return ISL_FORMAT_R8_UINT;
600    case 2:  return ISL_FORMAT_R16_UINT;
601    case 3:  return ISL_FORMAT_R8G8B8_UINT;
602    case 4:  return ISL_FORMAT_R32_UINT;
603    case 6:  return ISL_FORMAT_R16G16B16_UINT;
604    case 8:  return ISL_FORMAT_R32G32_UINT;
605    case 12: return ISL_FORMAT_R32G32B32_UINT;
606    case 16: return ISL_FORMAT_R32G32B32A32_UINT;
607    default:
608       unreachable("Unknown format size");
609    }
610 }
611 
612 static void
copy_buffer_to_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_buffer * anv_buffer,struct anv_image * anv_image,VkImageLayout image_layout,const VkBufferImageCopy2 * region,bool buffer_to_image)613 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
614                      struct blorp_batch *batch,
615                      struct anv_buffer *anv_buffer,
616                      struct anv_image *anv_image,
617                      VkImageLayout image_layout,
618                      const VkBufferImageCopy2* region,
619                      bool buffer_to_image)
620 {
621    struct {
622       struct blorp_surf surf;
623       uint32_t level;
624       VkOffset3D offset;
625    } image, buffer, *src, *dst;
626 
627    buffer.level = 0;
628    buffer.offset = (VkOffset3D) { 0, 0, 0 };
629 
630    if (buffer_to_image) {
631       src = &buffer;
632       dst = &image;
633    } else {
634       src = &image;
635       dst = &buffer;
636    }
637 
638    const VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
639 
640    get_blorp_surf_for_anv_image(cmd_buffer, anv_image, aspect,
641                                 buffer_to_image ?
642                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT :
643                                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
644                                 image_layout, ISL_AUX_USAGE_NONE,
645                                 &image.surf);
646    image.offset =
647       vk_image_sanitize_offset(&anv_image->vk, region->imageOffset);
648    image.level = region->imageSubresource.mipLevel;
649 
650    VkExtent3D extent =
651       vk_image_sanitize_extent(&anv_image->vk, region->imageExtent);
652    if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) {
653       image.offset.z = region->imageSubresource.baseArrayLayer;
654       extent.depth =
655          vk_image_subresource_layer_count(&anv_image->vk,
656                                           &region->imageSubresource);
657    }
658 
659    const enum isl_format linear_format =
660       anv_get_isl_format(cmd_buffer->device->info, anv_image->vk.format,
661                          aspect, VK_IMAGE_TILING_LINEAR);
662    const struct isl_format_layout *linear_fmtl =
663       isl_format_get_layout(linear_format);
664 
665    const struct vk_image_buffer_layout buffer_layout =
666       vk_image_buffer_copy_layout(&anv_image->vk, region);
667 
668    /* Some formats have additional restrictions which may cause ISL to
669     * fail to create a surface for us.  For example, YCbCr formats
670     * have to have 2-pixel aligned strides.
671     *
672     * To avoid these issues, we always bind the buffer as if it's a
673     * "normal" format like RGBA32_UINT.  Since we're using blorp_copy,
674     * the format doesn't matter as long as it has the right bpb.
675     */
676    const VkExtent2D buffer_extent = {
677       .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
678       .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
679    };
680    const enum isl_format buffer_format =
681       isl_format_for_size(linear_fmtl->bpb / 8);
682 
683    struct isl_surf buffer_isl_surf;
684    get_blorp_surf_for_anv_buffer(cmd_buffer,
685                                  anv_buffer, region->bufferOffset,
686                                  buffer_extent.width, buffer_extent.height,
687                                  buffer_layout.row_stride_B, buffer_format,
688                                  false, &buffer.surf, &buffer_isl_surf);
689 
690    if (&image == dst) {
691       /* In this case, the source is the buffer and, since blorp takes its
692        * copy dimensions in terms of the source format, we have to use the
693        * scaled down version for compressed textures because the source
694        * format is an RGB format.
695        */
696       extent.width = buffer_extent.width;
697       extent.height = buffer_extent.height;
698 
699       anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
700                                         aspect, dst->surf.aux_usage,
701                                         dst->level,
702                                         dst->offset.z, extent.depth);
703    }
704 
705    for (unsigned z = 0; z < extent.depth; z++) {
706       blorp_copy(batch, &src->surf, src->level, src->offset.z,
707                  &dst->surf, dst->level, dst->offset.z,
708                  src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
709                  extent.width, extent.height);
710 
711       image.offset.z++;
712       buffer.surf.addr.offset += buffer_layout.image_stride_B;
713    }
714 }
715 
anv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)716 void anv_CmdCopyBufferToImage2(
717     VkCommandBuffer                             commandBuffer,
718     const VkCopyBufferToImageInfo2*             pCopyBufferToImageInfo)
719 {
720    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
721    ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
722    ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
723 
724    struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
725    UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
726 
727    bool blorp_execute_on_companion =
728       anv_blorp_execute_on_companion(cmd_buffer, dst_image);
729 
730    /* Check if any one of the aspects is incompatible with the blitter engine,
731     * if true, use the companion RCS command buffer for blit operation since 3
732     * component formats are not supported natively except 96bpb on the blitter.
733     */
734    blorp_execute_on_companion |=
735       anv_blorp_blitter_execute_on_companion(cmd_buffer, dst_image,
736                                              pCopyBufferToImageInfo, NULL);
737 
738    if (blorp_execute_on_companion) {
739       rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
740       cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
741    }
742 
743    struct blorp_batch batch;
744    anv_blorp_batch_init(cmd_buffer, &batch, 0);
745 
746    for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
747       copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image,
748                            pCopyBufferToImageInfo->dstImageLayout,
749                            &pCopyBufferToImageInfo->pRegions[r], true);
750    }
751 
752    anv_blorp_batch_finish(&batch);
753 
754    if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
755       assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
756       const enum anv_pipe_bits pipe_bits =
757          anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
758          ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
759          ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
760       anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
761                                 "Copy flush before astc emu");
762 
763       for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
764          const VkBufferImageCopy2 *region =
765             &pCopyBufferToImageInfo->pRegions[r];
766          const VkOffset3D block_offset = vk_image_offset_to_elements(
767                &dst_image->vk, region->imageOffset);
768          const VkExtent3D block_extent = vk_image_extent_to_elements(
769                &dst_image->vk, region->imageExtent);
770          anv_astc_emu_process(cmd_buffer, dst_image,
771                               pCopyBufferToImageInfo->dstImageLayout,
772                               &region->imageSubresource,
773                               block_offset, block_extent);
774       }
775    }
776 
777    if (rcs_done.alloc_size)
778       end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
779 }
780 
781 static void
anv_add_buffer_write_pending_bits(struct anv_cmd_buffer * cmd_buffer,const char * reason)782 anv_add_buffer_write_pending_bits(struct anv_cmd_buffer *cmd_buffer,
783                                   const char *reason)
784 {
785    const struct intel_device_info *devinfo = cmd_buffer->device->info;
786 
787    if (anv_cmd_buffer_is_blitter_queue(cmd_buffer))
788       return;
789 
790    cmd_buffer->state.queries.buffer_write_bits |=
791       (cmd_buffer->state.current_pipeline ==
792        cmd_buffer->device->physical->gpgpu_pipeline_value) ?
793       ANV_QUERY_COMPUTE_WRITES_PENDING_BITS :
794       ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo);
795 }
796 
anv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)797 void anv_CmdCopyImageToBuffer2(
798     VkCommandBuffer                             commandBuffer,
799     const VkCopyImageToBufferInfo2*             pCopyImageToBufferInfo)
800 {
801    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
802    ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
803    ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
804 
805    UNUSED struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
806    UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
807 
808    bool blorp_execute_on_companion =
809       anv_blorp_execute_on_companion(cmd_buffer, src_image);
810 
811    /* Check if any one of the aspects is incompatible with the blitter engine,
812     * if true, use the companion RCS command buffer for blit operation since 3
813     * component formats are not supported natively except 96bpb on the blitter.
814     */
815    blorp_execute_on_companion |=
816       anv_blorp_blitter_execute_on_companion(cmd_buffer, src_image, NULL,
817                                              pCopyImageToBufferInfo);
818 
819    if (blorp_execute_on_companion) {
820       rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
821       cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
822    }
823 
824    struct blorp_batch batch;
825    anv_blorp_batch_init(cmd_buffer, &batch, 0);
826 
827    for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
828       copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image,
829                            pCopyImageToBufferInfo->srcImageLayout,
830                            &pCopyImageToBufferInfo->pRegions[r], false);
831    }
832 
833    anv_add_buffer_write_pending_bits(cmd_buffer, "after copy image to buffer");
834 
835    anv_blorp_batch_finish(&batch);
836 
837    if (rcs_done.alloc_size)
838       end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
839 }
840 
841 static bool
flip_coords(unsigned * src0,unsigned * src1,unsigned * dst0,unsigned * dst1)842 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
843 {
844    bool flip = false;
845    if (*src0 > *src1) {
846       unsigned tmp = *src0;
847       *src0 = *src1;
848       *src1 = tmp;
849       flip = !flip;
850    }
851 
852    if (*dst0 > *dst1) {
853       unsigned tmp = *dst0;
854       *dst0 = *dst1;
855       *dst1 = tmp;
856       flip = !flip;
857    }
858 
859    return flip;
860 }
861 
862 static void
blit_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageBlit2 * region,VkFilter filter)863 blit_image(struct anv_cmd_buffer *cmd_buffer,
864            struct blorp_batch *batch,
865            struct anv_image *src_image,
866            VkImageLayout src_image_layout,
867            struct anv_image *dst_image,
868            VkImageLayout dst_image_layout,
869            const VkImageBlit2 *region,
870            VkFilter filter)
871 {
872    const VkImageSubresourceLayers *src_res = &region->srcSubresource;
873    const VkImageSubresourceLayers *dst_res = &region->dstSubresource;
874 
875    struct blorp_surf src, dst;
876 
877    enum blorp_filter blorp_filter;
878    switch (filter) {
879    case VK_FILTER_NEAREST:
880       blorp_filter = BLORP_FILTER_NEAREST;
881       break;
882    case VK_FILTER_LINEAR:
883       blorp_filter = BLORP_FILTER_BILINEAR;
884       break;
885    default:
886       unreachable("Invalid filter");
887    }
888 
889    assert(anv_image_aspects_compatible(src_res->aspectMask,
890                                        dst_res->aspectMask));
891 
892    anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
893       get_blorp_surf_for_anv_image(cmd_buffer,
894                                    src_image, 1U << aspect_bit,
895                                    VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
896                                    src_image_layout, ISL_AUX_USAGE_NONE, &src);
897       get_blorp_surf_for_anv_image(cmd_buffer,
898                                    dst_image, 1U << aspect_bit,
899                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
900                                    dst_image_layout, ISL_AUX_USAGE_NONE, &dst);
901 
902       VkFormat src_vk_format = src_image->vk.format;
903 
904       if (src_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
905          /* redirect src to the hidden plane */
906          const uint32_t plane = src_image->n_planes;
907          const struct anv_surface *surface =
908             &src_image->planes[plane].primary_surface;
909          const struct anv_address address =
910             anv_image_address(src_image, &surface->memory_range);
911          src.surf = &surface->isl,
912          src.addr.offset = address.offset;
913 
914          src_vk_format = src_image->emu_plane_format;
915       }
916 
917       struct anv_format_plane src_format =
918          anv_get_format_aspect(cmd_buffer->device->info, src_vk_format,
919                                1U << aspect_bit, src_image->vk.tiling);
920       struct anv_format_plane dst_format =
921          anv_get_format_aspect(cmd_buffer->device->info, dst_image->vk.format,
922                                1U << aspect_bit, dst_image->vk.tiling);
923 
924       unsigned dst_start, dst_end;
925       if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
926          assert(dst_res->baseArrayLayer == 0);
927          dst_start = region->dstOffsets[0].z;
928          dst_end = region->dstOffsets[1].z;
929       } else {
930          dst_start = dst_res->baseArrayLayer;
931          dst_end = dst_start +
932             vk_image_subresource_layer_count(&dst_image->vk, dst_res);
933       }
934 
935       unsigned src_start, src_end;
936       if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
937          assert(src_res->baseArrayLayer == 0);
938          src_start = region->srcOffsets[0].z;
939          src_end = region->srcOffsets[1].z;
940       } else {
941          src_start = src_res->baseArrayLayer;
942          src_end = src_start +
943             vk_image_subresource_layer_count(&src_image->vk, src_res);
944       }
945 
946       bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
947       const unsigned num_layers = dst_end - dst_start;
948       float src_z_step = (float)(src_end - src_start) / (float)num_layers;
949 
950       /* There is no interpolation to the pixel center during rendering, so
951        * add the 0.5 offset ourselves here. */
952       float depth_center_offset = 0;
953       if (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
954          depth_center_offset = 0.5 / num_layers * (src_end - src_start);
955 
956       if (flip_z) {
957          src_start = src_end;
958          src_z_step *= -1;
959          depth_center_offset *= -1;
960       }
961 
962       unsigned src_x0 = region->srcOffsets[0].x;
963       unsigned src_x1 = region->srcOffsets[1].x;
964       unsigned dst_x0 = region->dstOffsets[0].x;
965       unsigned dst_x1 = region->dstOffsets[1].x;
966       bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
967 
968       unsigned src_y0 = region->srcOffsets[0].y;
969       unsigned src_y1 = region->srcOffsets[1].y;
970       unsigned dst_y0 = region->dstOffsets[0].y;
971       unsigned dst_y1 = region->dstOffsets[1].y;
972       bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
973 
974       anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
975                                         1U << aspect_bit,
976                                         dst.aux_usage,
977                                         dst_res->mipLevel,
978                                         dst_start, num_layers);
979 
980       for (unsigned i = 0; i < num_layers; i++) {
981          unsigned dst_z = dst_start + i;
982          float src_z = src_start + i * src_z_step + depth_center_offset;
983 
984          blorp_blit(batch, &src, src_res->mipLevel, src_z,
985                     src_format.isl_format, src_format.swizzle,
986                     &dst, dst_res->mipLevel, dst_z,
987                     dst_format.isl_format, dst_format.swizzle,
988                     src_x0, src_y0, src_x1, src_y1,
989                     dst_x0, dst_y0, dst_x1, dst_y1,
990                     blorp_filter, flip_x, flip_y);
991       }
992    }
993 }
994 
anv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)995 void anv_CmdBlitImage2(
996     VkCommandBuffer                             commandBuffer,
997     const VkBlitImageInfo2*                     pBlitImageInfo)
998 {
999    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1000    ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage);
1001    ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage);
1002 
1003    struct blorp_batch batch;
1004    anv_blorp_batch_init(cmd_buffer, &batch, 0);
1005 
1006    for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
1007       blit_image(cmd_buffer, &batch,
1008                  src_image, pBlitImageInfo->srcImageLayout,
1009                  dst_image, pBlitImageInfo->dstImageLayout,
1010                  &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
1011    }
1012 
1013    anv_blorp_batch_finish(&batch);
1014 }
1015 
1016 /**
1017  * Returns the greatest common divisor of a and b that is a power of two.
1018  */
1019 static uint64_t
gcd_pow2_u64(uint64_t a,uint64_t b)1020 gcd_pow2_u64(uint64_t a, uint64_t b)
1021 {
1022    assert(a > 0 || b > 0);
1023 
1024    unsigned a_log2 = ffsll(a) - 1;
1025    unsigned b_log2 = ffsll(b) - 1;
1026 
1027    /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
1028     * case, the MIN2() will take the other one.  If both are 0 then we will
1029     * hit the assert above.
1030     */
1031    return 1 << MIN2(a_log2, b_log2);
1032 }
1033 
1034 /* This is maximum possible width/height our HW can handle */
1035 #define MAX_SURFACE_DIM (1ull << 14)
1036 
1037 static void
copy_buffer(struct anv_device * device,struct blorp_batch * batch,struct anv_buffer * src_buffer,struct anv_buffer * dst_buffer,const VkBufferCopy2 * region)1038 copy_buffer(struct anv_device *device,
1039             struct blorp_batch *batch,
1040             struct anv_buffer *src_buffer,
1041             struct anv_buffer *dst_buffer,
1042             const VkBufferCopy2 *region)
1043 {
1044    struct blorp_address src = {
1045       .buffer = src_buffer->address.bo,
1046       .offset = src_buffer->address.offset + region->srcOffset,
1047       .mocs = anv_mocs(device, src_buffer->address.bo,
1048                        blorp_batch_isl_copy_usage(batch, false /* is_dest */,
1049                                                   anv_buffer_is_protected(src_buffer))),
1050    };
1051    struct blorp_address dst = {
1052       .buffer = dst_buffer->address.bo,
1053       .offset = dst_buffer->address.offset + region->dstOffset,
1054       .mocs = anv_mocs(device, dst_buffer->address.bo,
1055                        blorp_batch_isl_copy_usage(batch, true /* is_dest */,
1056                                                   anv_buffer_is_protected(dst_buffer))),
1057    };
1058 
1059    blorp_buffer_copy(batch, src, dst, region->size);
1060 }
1061 
anv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1062 void anv_CmdCopyBuffer2(
1063     VkCommandBuffer                             commandBuffer,
1064     const VkCopyBufferInfo2*                    pCopyBufferInfo)
1065 {
1066    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1067    ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1068    ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1069 
1070    struct blorp_batch batch;
1071    anv_blorp_batch_init(cmd_buffer, &batch,
1072                         cmd_buffer->state.current_pipeline ==
1073                         cmd_buffer->device->physical->gpgpu_pipeline_value ?
1074                         BLORP_BATCH_USE_COMPUTE : 0);
1075 
1076    for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
1077       copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
1078                   &pCopyBufferInfo->pRegions[r]);
1079    }
1080 
1081    anv_add_buffer_write_pending_bits(cmd_buffer, "after copy buffer");
1082 
1083    anv_blorp_batch_finish(&batch);
1084 }
1085 
1086 
anv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1087 void anv_CmdUpdateBuffer(
1088     VkCommandBuffer                             commandBuffer,
1089     VkBuffer                                    dstBuffer,
1090     VkDeviceSize                                dstOffset,
1091     VkDeviceSize                                dataSize,
1092     const void*                                 pData)
1093 {
1094    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1095    ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
1096 
1097    struct blorp_batch batch;
1098    anv_blorp_batch_init(cmd_buffer, &batch,
1099                         cmd_buffer->state.current_pipeline ==
1100                         cmd_buffer->device->physical->gpgpu_pipeline_value ?
1101                         BLORP_BATCH_USE_COMPUTE : 0);
1102 
1103    /* We can't quite grab a full block because the state stream needs a
1104     * little data at the top to build its linked list.
1105     */
1106    const uint32_t max_update_size =
1107       cmd_buffer->device->dynamic_state_pool.block_size - 64;
1108 
1109    assert(max_update_size < MAX_SURFACE_DIM * 4);
1110 
1111    /* We're about to read data that was written from the CPU.  Flush the
1112     * texture cache so we don't get anything stale.
1113     */
1114    anv_add_pending_pipe_bits(cmd_buffer,
1115                              ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1116                              "before UpdateBuffer");
1117 
1118    while (dataSize) {
1119       const uint32_t copy_size = MIN2(dataSize, max_update_size);
1120 
1121       struct anv_state tmp_data =
1122          anv_cmd_buffer_alloc_temporary_state(cmd_buffer, copy_size, 64);
1123       struct anv_address tmp_addr =
1124          anv_cmd_buffer_temporary_state_address(cmd_buffer, tmp_data);
1125 
1126       memcpy(tmp_data.map, pData, copy_size);
1127 
1128       struct blorp_address src = {
1129          .buffer = tmp_addr.bo,
1130          .offset = tmp_addr.offset,
1131          .mocs = anv_mocs(cmd_buffer->device, NULL,
1132                           get_usage_flag_for_cmd_buffer(cmd_buffer,
1133                                                         false /* is_dest */,
1134                                                         false /* protected */)),
1135       };
1136       struct blorp_address dst = {
1137          .buffer = dst_buffer->address.bo,
1138          .offset = dst_buffer->address.offset + dstOffset,
1139          .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
1140                           get_usage_flag_for_cmd_buffer(
1141                              cmd_buffer,
1142                              true /* is_dest */,
1143                              anv_buffer_is_protected(dst_buffer))),
1144       };
1145 
1146       blorp_buffer_copy(&batch, src, dst, copy_size);
1147 
1148       dataSize -= copy_size;
1149       dstOffset += copy_size;
1150       pData = (void *)pData + copy_size;
1151    }
1152 
1153    anv_add_buffer_write_pending_bits(cmd_buffer, "update buffer");
1154 
1155    anv_blorp_batch_finish(&batch);
1156 }
1157 
1158 void
anv_cmd_buffer_fill_area(struct anv_cmd_buffer * cmd_buffer,struct anv_address address,VkDeviceSize size,uint32_t data,bool protected)1159 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
1160                          struct anv_address address,
1161                          VkDeviceSize size,
1162                          uint32_t data,
1163                          bool protected)
1164 {
1165    struct blorp_surf surf;
1166    struct isl_surf isl_surf;
1167 
1168    struct blorp_batch batch;
1169    anv_blorp_batch_init(cmd_buffer, &batch,
1170                         cmd_buffer->state.current_pipeline ==
1171                         cmd_buffer->device->physical->gpgpu_pipeline_value ?
1172                         BLORP_BATCH_USE_COMPUTE : 0);
1173 
1174    /* First, we compute the biggest format that can be used with the
1175     * given offsets and size.
1176     */
1177    int bs = 16;
1178    uint64_t offset = address.offset;
1179    bs = gcd_pow2_u64(bs, offset);
1180    bs = gcd_pow2_u64(bs, size);
1181    enum isl_format isl_format = isl_format_for_size(bs);
1182 
1183    union isl_color_value color = {
1184       .u32 = { data, data, data, data },
1185    };
1186 
1187    const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
1188    while (size >= max_fill_size) {
1189       get_blorp_surf_for_anv_address(cmd_buffer,
1190                                      (struct anv_address) {
1191                                         .bo = address.bo, .offset = offset,
1192                                      },
1193                                      MAX_SURFACE_DIM, MAX_SURFACE_DIM,
1194                                      MAX_SURFACE_DIM * bs, isl_format,
1195                                      true /* is_dest */, protected,
1196                                      &surf, &isl_surf);
1197 
1198       blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1199                   0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
1200                   color, 0 /* color_write_disable */);
1201       size -= max_fill_size;
1202       offset += max_fill_size;
1203    }
1204 
1205    uint64_t height = size / (MAX_SURFACE_DIM * bs);
1206    assert(height < MAX_SURFACE_DIM);
1207    if (height != 0) {
1208       const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
1209       get_blorp_surf_for_anv_address(cmd_buffer,
1210                                      (struct anv_address) {
1211                                         .bo = address.bo, .offset = offset,
1212                                      },
1213                                      MAX_SURFACE_DIM, height,
1214                                      MAX_SURFACE_DIM * bs, isl_format,
1215                                      true /* is_dest */, protected,
1216                                      &surf, &isl_surf);
1217 
1218       blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1219                   0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
1220                   color, 0 /* color_write_disable */);
1221       size -= rect_fill_size;
1222       offset += rect_fill_size;
1223    }
1224 
1225    if (size != 0) {
1226       const uint32_t width = size / bs;
1227       get_blorp_surf_for_anv_address(cmd_buffer,
1228                                      (struct anv_address) {
1229                                         .bo = address.bo, .offset = offset,
1230                                      },
1231                                      width, 1,
1232                                      width * bs, isl_format,
1233                                      true /* is_dest */, protected,
1234                                      &surf, &isl_surf);
1235 
1236       blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1237                   0, 0, 1, 0, 0, width, 1,
1238                   color, 0 /* color_write_disable */);
1239    }
1240 
1241    anv_blorp_batch_finish(&batch);
1242 }
1243 
anv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)1244 void anv_CmdFillBuffer(
1245     VkCommandBuffer                             commandBuffer,
1246     VkBuffer                                    dstBuffer,
1247     VkDeviceSize                                dstOffset,
1248     VkDeviceSize                                fillSize,
1249     uint32_t                                    data)
1250 {
1251    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1252    ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
1253 
1254    fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize);
1255 
1256    /* From the Vulkan spec:
1257     *
1258     *    "size is the number of bytes to fill, and must be either a multiple
1259     *    of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1260     *    the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1261     *    buffer is not a multiple of 4, then the nearest smaller multiple is
1262     *    used."
1263     */
1264    fillSize &= ~3ull;
1265 
1266    anv_cmd_buffer_fill_area(cmd_buffer,
1267                             anv_address_add(dst_buffer->address, dstOffset),
1268                             fillSize, data,
1269                             anv_buffer_is_protected(dst_buffer));
1270 
1271    anv_add_buffer_write_pending_bits(cmd_buffer, "after fill buffer");
1272 }
1273 
anv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1274 void anv_CmdClearColorImage(
1275     VkCommandBuffer                             commandBuffer,
1276     VkImage                                     _image,
1277     VkImageLayout                               imageLayout,
1278     const VkClearColorValue*                    pColor,
1279     uint32_t                                    rangeCount,
1280     const VkImageSubresourceRange*              pRanges)
1281 {
1282    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1283    ANV_FROM_HANDLE(anv_image, image, _image);
1284 
1285    struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
1286    UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
1287 
1288    if (anv_blorp_execute_on_companion(cmd_buffer, image)) {
1289       rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
1290       cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
1291    }
1292 
1293    struct blorp_batch batch;
1294    anv_blorp_batch_init(cmd_buffer, &batch, 0);
1295 
1296    for (unsigned r = 0; r < rangeCount; r++) {
1297       if (pRanges[r].aspectMask == 0)
1298          continue;
1299 
1300       assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1301 
1302       struct blorp_surf surf;
1303       get_blorp_surf_for_anv_image(cmd_buffer,
1304                                    image, pRanges[r].aspectMask,
1305                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1306                                    imageLayout, ISL_AUX_USAGE_NONE, &surf);
1307 
1308       struct anv_format_plane src_format =
1309          anv_get_format_aspect(cmd_buffer->device->info, image->vk.format,
1310                                VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling);
1311 
1312       unsigned base_layer = pRanges[r].baseArrayLayer;
1313       uint32_t layer_count =
1314          vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1315       uint32_t level_count =
1316          vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1317 
1318       for (uint32_t i = 0; i < level_count; i++) {
1319          const unsigned level = pRanges[r].baseMipLevel + i;
1320          const unsigned level_width = u_minify(image->vk.extent.width, level);
1321          const unsigned level_height = u_minify(image->vk.extent.height, level);
1322 
1323          if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1324             base_layer = 0;
1325             layer_count = u_minify(image->vk.extent.depth, level);
1326          }
1327 
1328          anv_cmd_buffer_mark_image_written(cmd_buffer, image,
1329                                            pRanges[r].aspectMask,
1330                                            surf.aux_usage, level,
1331                                            base_layer, layer_count);
1332 
1333          blorp_clear(&batch, &surf,
1334                      src_format.isl_format, src_format.swizzle,
1335                      level, base_layer, layer_count,
1336                      0, 0, level_width, level_height,
1337                      vk_to_isl_color(*pColor), 0 /* color_write_disable */);
1338       }
1339    }
1340 
1341    anv_blorp_batch_finish(&batch);
1342 
1343    if (rcs_done.alloc_size)
1344       end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
1345 }
1346 
anv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1347 void anv_CmdClearDepthStencilImage(
1348     VkCommandBuffer                             commandBuffer,
1349     VkImage                                     image_h,
1350     VkImageLayout                               imageLayout,
1351     const VkClearDepthStencilValue*             pDepthStencil,
1352     uint32_t                                    rangeCount,
1353     const VkImageSubresourceRange*              pRanges)
1354 {
1355    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1356    ANV_FROM_HANDLE(anv_image, image, image_h);
1357 
1358    struct blorp_batch batch;
1359    anv_blorp_batch_init(cmd_buffer, &batch, 0);
1360    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1361 
1362    struct blorp_surf depth, stencil;
1363    if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1364       get_blorp_surf_for_anv_image(cmd_buffer,
1365                                    image, VK_IMAGE_ASPECT_DEPTH_BIT,
1366                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1367                                    imageLayout, ISL_AUX_USAGE_NONE, &depth);
1368    } else {
1369       memset(&depth, 0, sizeof(depth));
1370    }
1371 
1372    if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1373       get_blorp_surf_for_anv_image(cmd_buffer,
1374                                    image, VK_IMAGE_ASPECT_STENCIL_BIT,
1375                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1376                                    imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1377    } else {
1378       memset(&stencil, 0, sizeof(stencil));
1379    }
1380 
1381    for (unsigned r = 0; r < rangeCount; r++) {
1382       if (pRanges[r].aspectMask == 0)
1383          continue;
1384 
1385       bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1386       bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1387 
1388       unsigned base_layer = pRanges[r].baseArrayLayer;
1389       uint32_t layer_count =
1390          vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1391       uint32_t level_count =
1392          vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1393 
1394       for (uint32_t i = 0; i < level_count; i++) {
1395          const unsigned level = pRanges[r].baseMipLevel + i;
1396          const unsigned level_width = u_minify(image->vk.extent.width, level);
1397          const unsigned level_height = u_minify(image->vk.extent.height, level);
1398 
1399          if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1400             layer_count = u_minify(image->vk.extent.depth, level);
1401 
1402          blorp_clear_depth_stencil(&batch, &depth, &stencil,
1403                                    level, base_layer, layer_count,
1404                                    0, 0, level_width, level_height,
1405                                    clear_depth, pDepthStencil->depth,
1406                                    clear_stencil ? 0xff : 0,
1407                                    pDepthStencil->stencil);
1408       }
1409    }
1410 
1411    anv_blorp_batch_finish(&batch);
1412 }
1413 
1414 VkResult
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer * cmd_buffer,uint32_t num_entries,uint32_t * state_offset,struct anv_state * bt_state)1415 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1416                                          uint32_t num_entries,
1417                                          uint32_t *state_offset,
1418                                          struct anv_state *bt_state)
1419 {
1420    *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1421                                                   state_offset);
1422    if (bt_state->map == NULL) {
1423       /* We ran out of space.  Grab a new binding table block. */
1424       VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1425       if (result != VK_SUCCESS)
1426          return result;
1427 
1428       /* Re-emit state base addresses so we get the new surface state base
1429        * address before we start emitting binding tables etc.
1430        */
1431       anv_cmd_buffer_emit_bt_pool_base_address(cmd_buffer);
1432 
1433       *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1434                                                      state_offset);
1435       assert(bt_state->map != NULL);
1436    }
1437 
1438    return VK_SUCCESS;
1439 }
1440 
1441 static VkResult
binding_table_for_surface_state(struct anv_cmd_buffer * cmd_buffer,struct anv_state surface_state,uint32_t * bt_offset)1442 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1443                                 struct anv_state surface_state,
1444                                 uint32_t *bt_offset)
1445 {
1446    uint32_t state_offset;
1447    struct anv_state bt_state;
1448 
1449    VkResult result =
1450       anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1451                                                &bt_state);
1452    if (result != VK_SUCCESS)
1453       return result;
1454 
1455    uint32_t *bt_map = bt_state.map;
1456    bt_map[0] = surface_state.offset + state_offset;
1457 
1458    *bt_offset = bt_state.offset;
1459    return VK_SUCCESS;
1460 }
1461 
1462 static bool
can_fast_clear_color_att(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_attachment * att,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1463 can_fast_clear_color_att(struct anv_cmd_buffer *cmd_buffer,
1464                          struct blorp_batch *batch,
1465                          const struct anv_attachment *att,
1466                          const VkClearAttachment *attachment,
1467                          uint32_t rectCount, const VkClearRect *pRects)
1468 {
1469    union isl_color_value clear_color =
1470       vk_to_isl_color(attachment->clearValue.color);
1471 
1472    if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
1473       return false;
1474 
1475    /* We don't support fast clearing with conditional rendering at the
1476     * moment. All the tracking done around fast clears (clear color updates
1477     * and fast-clear type updates) happens unconditionally.
1478     */
1479    if (batch->flags & BLORP_BATCH_PREDICATE_ENABLE)
1480       return false;
1481 
1482    if (rectCount > 1) {
1483       anv_perf_warn(VK_LOG_OBJS(&cmd_buffer->device->vk.base),
1484                     "Fast clears for vkCmdClearAttachments supported only for rectCount == 1");
1485       return false;
1486    }
1487 
1488    /* We only support fast-clears on the first layer */
1489    if (pRects[0].layerCount > 1 || pRects[0].baseArrayLayer > 0)
1490       return false;
1491 
1492    bool is_multiview = cmd_buffer->state.gfx.view_mask != 0;
1493    if (is_multiview && (cmd_buffer->state.gfx.view_mask != 1))
1494       return false;
1495 
1496    return anv_can_fast_clear_color_view(cmd_buffer->device,
1497                                         (struct anv_image_view *)att->iview,
1498                                         att->layout,
1499                                         clear_color,
1500                                         pRects->layerCount,
1501                                         pRects->rect,
1502                                         cmd_buffer->queue_family->queueFlags);
1503 }
1504 
1505 static void
exec_ccs_op(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value)1506 exec_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1507             struct blorp_batch *batch,
1508             const struct anv_image *image,
1509             enum isl_format format, struct isl_swizzle swizzle,
1510             VkImageAspectFlagBits aspect, uint32_t level,
1511             uint32_t base_layer, uint32_t layer_count,
1512             enum isl_aux_op ccs_op, union isl_color_value *clear_value)
1513 {
1514    assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1515    assert(image->vk.samples == 1);
1516    assert(level < anv_image_aux_levels(image, aspect));
1517    /* Multi-LOD YcBcR is not allowed */
1518    assert(image->n_planes == 1 || level == 0);
1519    assert(base_layer + layer_count <=
1520           anv_image_aux_layers(image, aspect, level));
1521 
1522    const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1523 
1524    struct blorp_surf surf;
1525    get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
1526                                 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1527                                 image->planes[plane].aux_usage,
1528                                 &surf);
1529 
1530    uint32_t level_width = u_minify(surf.surf->logical_level0_px.w, level);
1531    uint32_t level_height = u_minify(surf.surf->logical_level0_px.h, level);
1532 
1533    /* Blorp will store the clear color for us if we provide the clear color
1534     * address and we are doing a fast clear. So we save the clear value into
1535     * the blorp surface.
1536     */
1537    if (clear_value)
1538       surf.clear_color = *clear_value;
1539 
1540    switch (ccs_op) {
1541    case ISL_AUX_OP_FAST_CLEAR:
1542       blorp_fast_clear(batch, &surf, format, swizzle,
1543                        level, base_layer, layer_count,
1544                        0, 0, level_width, level_height);
1545       break;
1546    case ISL_AUX_OP_FULL_RESOLVE:
1547    case ISL_AUX_OP_PARTIAL_RESOLVE: {
1548       /* Wa_1508744258: Enable RHWO optimization for resolves */
1549       const bool enable_rhwo_opt =
1550          intel_needs_workaround(cmd_buffer->device->info, 1508744258);
1551 
1552       if (enable_rhwo_opt)
1553          cmd_buffer->state.pending_rhwo_optimization_enabled = true;
1554 
1555       blorp_ccs_resolve(batch, &surf, level, base_layer, layer_count,
1556                         format, ccs_op);
1557 
1558       if (enable_rhwo_opt)
1559          cmd_buffer->state.pending_rhwo_optimization_enabled = false;
1560       break;
1561    }
1562    case ISL_AUX_OP_AMBIGUATE:
1563       for (uint32_t a = 0; a < layer_count; a++) {
1564          const uint32_t layer = base_layer + a;
1565          blorp_ccs_ambiguate(batch, &surf, level, layer);
1566       }
1567       break;
1568    default:
1569       unreachable("Unsupported CCS operation");
1570    }
1571 }
1572 
1573 static void
exec_mcs_op(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value)1574 exec_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1575             struct blorp_batch *batch,
1576             const struct anv_image *image,
1577             enum isl_format format, struct isl_swizzle swizzle,
1578             VkImageAspectFlagBits aspect,
1579             uint32_t base_layer, uint32_t layer_count,
1580             enum isl_aux_op mcs_op, union isl_color_value *clear_value)
1581 {
1582    assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1583    assert(image->vk.samples > 1);
1584    assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1585 
1586    /* Multisampling with multi-planar formats is not supported */
1587    assert(image->n_planes == 1);
1588 
1589    struct blorp_surf surf;
1590    get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
1591                                 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1592                                 ISL_AUX_USAGE_MCS, &surf);
1593 
1594    /* Blorp will store the clear color for us if we provide the clear color
1595     * address and we are doing a fast clear. So we save the clear value into
1596     * the blorp surface.
1597     */
1598    if (clear_value)
1599       surf.clear_color = *clear_value;
1600 
1601    switch (mcs_op) {
1602    case ISL_AUX_OP_FAST_CLEAR:
1603       blorp_fast_clear(batch, &surf, format, swizzle,
1604                        0, base_layer, layer_count,
1605                        0, 0, image->vk.extent.width, image->vk.extent.height);
1606       break;
1607    case ISL_AUX_OP_PARTIAL_RESOLVE:
1608       blorp_mcs_partial_resolve(batch, &surf, format,
1609                                 base_layer, layer_count);
1610       break;
1611    case ISL_AUX_OP_AMBIGUATE:
1612       blorp_mcs_ambiguate(batch, &surf, base_layer, layer_count);
1613       break;
1614    case ISL_AUX_OP_FULL_RESOLVE:
1615    default:
1616       unreachable("Unsupported MCS operation");
1617    }
1618 }
1619 
1620 static void
clear_color_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1621 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1622                        struct blorp_batch *batch,
1623                        const VkClearAttachment *attachment,
1624                        uint32_t rectCount, const VkClearRect *pRects)
1625 {
1626    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1627    const uint32_t att_idx = attachment->colorAttachment;
1628    assert(att_idx < gfx->color_att_count);
1629    const struct anv_attachment *att = &gfx->color_att[att_idx];
1630 
1631    if (att->vk_format == VK_FORMAT_UNDEFINED)
1632       return;
1633 
1634    union isl_color_value clear_color =
1635       vk_to_isl_color(attachment->clearValue.color);
1636 
1637    const struct anv_image_view *iview = att->iview;
1638    if (iview &&
1639        can_fast_clear_color_att(cmd_buffer, batch, att,
1640                                 attachment, rectCount, pRects)) {
1641       if (iview->image->vk.samples == 1) {
1642          exec_ccs_op(cmd_buffer, batch, iview->image,
1643                      iview->planes[0].isl.format,
1644                      iview->planes[0].isl.swizzle,
1645                      VK_IMAGE_ASPECT_COLOR_BIT,
1646                      0, 0, 1, ISL_AUX_OP_FAST_CLEAR,
1647                      &clear_color);
1648       } else {
1649          exec_mcs_op(cmd_buffer, batch, iview->image,
1650                      iview->planes[0].isl.format,
1651                      iview->planes[0].isl.swizzle,
1652                      VK_IMAGE_ASPECT_COLOR_BIT,
1653                      0, 1, ISL_AUX_OP_FAST_CLEAR,
1654                      &clear_color);
1655       }
1656 
1657       if (cmd_buffer->device->info->ver < 20) {
1658          anv_cmd_buffer_mark_image_fast_cleared(cmd_buffer, iview->image,
1659                                                 iview->planes[0].isl.format,
1660                                                 clear_color);
1661          anv_cmd_buffer_load_clear_color_from_image(cmd_buffer,
1662                                                     att->surface_state.state,
1663                                                     iview->image);
1664       }
1665       return;
1666    }
1667 
1668    uint32_t binding_table;
1669    VkResult result =
1670       binding_table_for_surface_state(cmd_buffer, att->surface_state.state,
1671                                       &binding_table);
1672    if (result != VK_SUCCESS)
1673       return;
1674 
1675    /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1676    if (gfx->view_mask) {
1677       u_foreach_bit(view_idx, gfx->view_mask) {
1678          for (uint32_t r = 0; r < rectCount; ++r) {
1679             const VkOffset2D offset = pRects[r].rect.offset;
1680             const VkExtent2D extent = pRects[r].rect.extent;
1681             blorp_clear_attachments(batch, binding_table,
1682                                     ISL_FORMAT_UNSUPPORTED,
1683                                     gfx->samples,
1684                                     view_idx, 1,
1685                                     offset.x, offset.y,
1686                                     offset.x + extent.width,
1687                                     offset.y + extent.height,
1688                                     true, clear_color, false, 0.0f, 0, 0);
1689          }
1690       }
1691       return;
1692    }
1693 
1694    for (uint32_t r = 0; r < rectCount; ++r) {
1695       const VkOffset2D offset = pRects[r].rect.offset;
1696       const VkExtent2D extent = pRects[r].rect.extent;
1697       assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1698       blorp_clear_attachments(batch, binding_table,
1699                               ISL_FORMAT_UNSUPPORTED,
1700                               gfx->samples,
1701                               pRects[r].baseArrayLayer,
1702                               pRects[r].layerCount,
1703                               offset.x, offset.y,
1704                               offset.x + extent.width, offset.y + extent.height,
1705                               true, clear_color, false, 0.0f, 0, 0);
1706    }
1707 }
1708 
1709 static void
anv_fast_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,const VkClearDepthStencilValue * clear_value)1710 anv_fast_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1711                              struct blorp_batch *batch,
1712                              const struct anv_image *image,
1713                              VkImageAspectFlags aspects,
1714                              uint32_t level,
1715                              uint32_t base_layer, uint32_t layer_count,
1716                              VkRect2D area,
1717                              const VkClearDepthStencilValue *clear_value)
1718 {
1719    assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1720                                VK_IMAGE_ASPECT_STENCIL_BIT));
1721 
1722    struct blorp_surf depth = {};
1723    if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1724       const uint32_t plane =
1725          anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT);
1726       assert(base_layer + layer_count <=
1727              anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1728       get_blorp_surf_for_anv_image(cmd_buffer,
1729                                    image, VK_IMAGE_ASPECT_DEPTH_BIT,
1730                                    0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1731                                    image->planes[plane].aux_usage, &depth);
1732    }
1733 
1734    struct blorp_surf stencil = {};
1735    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1736       const uint32_t plane =
1737          anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1738       get_blorp_surf_for_anv_image(cmd_buffer,
1739                                    image, VK_IMAGE_ASPECT_STENCIL_BIT,
1740                                    0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1741                                    image->planes[plane].aux_usage, &stencil);
1742    }
1743 
1744    /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1745     *
1746     *    "The following is required when performing a depth buffer clear with
1747     *    using the WM_STATE or 3DSTATE_WM:
1748     *
1749     *       * If other rendering operations have preceded this clear, a
1750     *         PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1751     *         enabled must be issued before the rectangle primitive used for
1752     *         the depth buffer clear operation.
1753     *       * [...]"
1754     *
1755     * Even though the PRM only says that this is required if using 3DSTATE_WM
1756     * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1757     * hangs when doing a clear with WM_HZ_OP.
1758     */
1759    anv_add_pending_pipe_bits(cmd_buffer,
1760                              ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1761                              ANV_PIPE_DEPTH_STALL_BIT,
1762                              "before clear hiz");
1763 
1764    if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1765        depth.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) {
1766       /* From Bspec 47010 (Depth Buffer Clear):
1767        *
1768        *    Since the fast clear cycles to CCS are not cached in TileCache,
1769        *    any previous depth buffer writes to overlapping pixels must be
1770        *    flushed out of TileCache before a succeeding Depth Buffer Clear.
1771        *    This restriction only applies to Depth Buffer with write-thru
1772        *    enabled, since fast clears to CCS only occur for write-thru mode.
1773        *
1774        * There may have been a write to this depth buffer. Flush it from the
1775        * tile cache just in case.
1776        *
1777        * Set CS stall bit to guarantee that the fast clear starts the execution
1778        * after the tile cache flush completed.
1779        *
1780        * There is no Bspec requirement to flush the data cache but the
1781        * experiment shows that flusing the data cache helps to resolve the
1782        * corruption.
1783        */
1784       unsigned wa_flush = cmd_buffer->device->info->verx10 >= 125 ?
1785                           ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0;
1786       anv_add_pending_pipe_bits(cmd_buffer,
1787                                 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1788                                 ANV_PIPE_CS_STALL_BIT |
1789                                 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1790                                 wa_flush,
1791                                 "before clear hiz_ccs_wt");
1792    }
1793 
1794    blorp_hiz_clear_depth_stencil(batch, &depth, &stencil,
1795                                  level, base_layer, layer_count,
1796                                  area.offset.x, area.offset.y,
1797                                  area.offset.x + area.extent.width,
1798                                  area.offset.y + area.extent.height,
1799                                  aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1800                                  clear_value->depth,
1801                                  aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1802                                  clear_value->stencil);
1803 
1804    /* From the SKL PRM, Depth Buffer Clear:
1805     *
1806     *    "Depth Buffer Clear Workaround
1807     *
1808     *    Depth buffer clear pass using any of the methods (WM_STATE,
1809     *    3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1810     *    command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1811     *    starting to render.  DepthStall and DepthFlush are not needed between
1812     *    consecutive depth clear passes nor is it required if the depth-clear
1813     *    pass was done with “full_surf_clear” bit set in the
1814     *    3DSTATE_WM_HZ_OP."
1815     *
1816     * Even though the PRM provides a bunch of conditions under which this is
1817     * supposedly unnecessary, we choose to perform the flush unconditionally
1818     * just to be safe.
1819     *
1820     * From Bspec 46959, a programming note applicable to Gfx12+:
1821     *
1822     *    "Since HZ_OP has to be sent twice (first time set the clear/resolve state
1823     *    and 2nd time to clear the state), and HW internally flushes the depth
1824     *    cache on HZ_OP, there is no need to explicitly send a Depth Cache flush
1825     *    after Clear or Resolve."
1826     */
1827    if (cmd_buffer->device->info->verx10 < 120) {
1828       anv_add_pending_pipe_bits(cmd_buffer,
1829                                 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1830                                 ANV_PIPE_DEPTH_STALL_BIT,
1831                                 "after clear hiz");
1832    }
1833 }
1834 
1835 static bool
can_hiz_clear_att(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_attachment * ds_att,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1836 can_hiz_clear_att(struct anv_cmd_buffer *cmd_buffer,
1837                   struct blorp_batch *batch,
1838                   const struct anv_attachment *ds_att,
1839                   const VkClearAttachment *attachment,
1840                   uint32_t rectCount, const VkClearRect *pRects)
1841 {
1842    if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
1843       return false;
1844 
1845    /* From Bspec's section MI_PREDICATE:
1846     *
1847     *    "The MI_PREDICATE command is used to control the Predicate state bit,
1848     *    which in turn can be used to enable/disable the processing of
1849     *    3DPRIMITIVE commands."
1850     *
1851     * Also from BDW/CHV Bspec's 3DSTATE_WM_HZ_OP programming notes:
1852     *
1853     *    "This command does NOT support predication from the use of the
1854     *    MI_PREDICATE register. To predicate depth clears and resolves on you
1855     *    must fall back to using the 3D_PRIMITIVE or GPGPU_WALKER commands."
1856     *
1857     * Since BLORP's predication is currently dependent on MI_PREDICATE, fall
1858     * back to the slow depth clear path when the BLORP_BATCH_PREDICATE_ENABLE
1859     * flag is set.
1860     */
1861    if (batch->flags & BLORP_BATCH_PREDICATE_ENABLE)
1862       return false;
1863 
1864    if (rectCount > 1) {
1865       anv_perf_warn(VK_LOG_OBJS(&cmd_buffer->device->vk.base),
1866                     "Fast clears for vkCmdClearAttachments supported only for rectCount == 1");
1867       return false;
1868    }
1869 
1870    /* When the BLORP_BATCH_NO_EMIT_DEPTH_STENCIL flag is set, BLORP can only
1871     * clear the first slice of the currently configured depth/stencil view.
1872     */
1873    assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1874    if (pRects[0].layerCount > 1 || pRects[0].baseArrayLayer > 0)
1875       return false;
1876 
1877    return anv_can_hiz_clear_ds_view(cmd_buffer->device, ds_att->iview,
1878                                     ds_att->layout,
1879                                     attachment->aspectMask,
1880                                     attachment->clearValue.depthStencil.depth,
1881                                     pRects->rect,
1882                                     cmd_buffer->queue_family->queueFlags);
1883 }
1884 
1885 static void
clear_depth_stencil_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1886 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1887                                struct blorp_batch *batch,
1888                                const VkClearAttachment *attachment,
1889                                uint32_t rectCount, const VkClearRect *pRects)
1890 {
1891    static const union isl_color_value color_value = { .u32 = { 0, } };
1892    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1893    const struct anv_attachment *d_att = &gfx->depth_att;
1894    const struct anv_attachment *s_att = &gfx->stencil_att;
1895    if (d_att->vk_format == VK_FORMAT_UNDEFINED &&
1896        s_att->vk_format == VK_FORMAT_UNDEFINED)
1897       return;
1898 
1899    const struct anv_attachment *ds_att = d_att->iview ? d_att : s_att;
1900    if (ds_att->iview &&
1901        can_hiz_clear_att(cmd_buffer, batch, ds_att, attachment, rectCount, pRects)) {
1902       anv_fast_clear_depth_stencil(cmd_buffer, batch, ds_att->iview->image,
1903                                    attachment->aspectMask,
1904                                    ds_att->iview->planes[0].isl.base_level,
1905                                    ds_att->iview->planes[0].isl.base_array_layer,
1906                                    pRects[0].layerCount, pRects->rect,
1907                                    &attachment->clearValue.depthStencil);
1908       return;
1909    }
1910 
1911    bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1912    bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1913 
1914    enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1915    if (d_att->vk_format != VK_FORMAT_UNDEFINED) {
1916       depth_format = anv_get_isl_format(cmd_buffer->device->info,
1917                                         d_att->vk_format,
1918                                         VK_IMAGE_ASPECT_DEPTH_BIT,
1919                                         VK_IMAGE_TILING_OPTIMAL);
1920    }
1921 
1922    uint32_t binding_table;
1923    VkResult result =
1924       binding_table_for_surface_state(cmd_buffer,
1925                                       gfx->null_surface_state,
1926                                       &binding_table);
1927    if (result != VK_SUCCESS)
1928       return;
1929 
1930    /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1931    if (gfx->view_mask) {
1932       u_foreach_bit(view_idx, gfx->view_mask) {
1933          for (uint32_t r = 0; r < rectCount; ++r) {
1934             const VkOffset2D offset = pRects[r].rect.offset;
1935             const VkExtent2D extent = pRects[r].rect.extent;
1936             VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1937             blorp_clear_attachments(batch, binding_table,
1938                                     depth_format,
1939                                     gfx->samples,
1940                                     view_idx, 1,
1941                                     offset.x, offset.y,
1942                                     offset.x + extent.width,
1943                                     offset.y + extent.height,
1944                                     false, color_value,
1945                                     clear_depth, value.depth,
1946                                     clear_stencil ? 0xff : 0, value.stencil);
1947          }
1948       }
1949       return;
1950    }
1951 
1952    for (uint32_t r = 0; r < rectCount; ++r) {
1953       const VkOffset2D offset = pRects[r].rect.offset;
1954       const VkExtent2D extent = pRects[r].rect.extent;
1955       VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1956       assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1957       blorp_clear_attachments(batch, binding_table,
1958                               depth_format,
1959                               gfx->samples,
1960                               pRects[r].baseArrayLayer,
1961                               pRects[r].layerCount,
1962                               offset.x, offset.y,
1963                               offset.x + extent.width, offset.y + extent.height,
1964                               false, color_value,
1965                               clear_depth, value.depth,
1966                               clear_stencil ? 0xff : 0, value.stencil);
1967    }
1968 }
1969 
anv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1970 void anv_CmdClearAttachments(
1971     VkCommandBuffer                             commandBuffer,
1972     uint32_t                                    attachmentCount,
1973     const VkClearAttachment*                    pAttachments,
1974     uint32_t                                    rectCount,
1975     const VkClearRect*                          pRects)
1976 {
1977    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1978 
1979    /* Because this gets called within a render pass, we tell blorp not to
1980     * trash our depth and stencil buffers.
1981     */
1982    struct blorp_batch batch;
1983    enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
1984    if (cmd_buffer->state.conditional_render_enabled) {
1985       anv_cmd_emit_conditional_render_predicate(cmd_buffer);
1986       flags |= BLORP_BATCH_PREDICATE_ENABLE;
1987    }
1988    anv_blorp_batch_init(cmd_buffer, &batch, flags);
1989 
1990    for (uint32_t a = 0; a < attachmentCount; ++a) {
1991       if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
1992          assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1993          clear_color_attachment(cmd_buffer, &batch,
1994                                 &pAttachments[a],
1995                                 rectCount, pRects);
1996       } else {
1997          clear_depth_stencil_attachment(cmd_buffer, &batch,
1998                                         &pAttachments[a],
1999                                         rectCount, pRects);
2000       }
2001    }
2002 
2003    anv_blorp_batch_finish(&batch);
2004 }
2005 
2006 static void
anv_image_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * src_image,enum isl_format src_format_override,enum isl_aux_usage src_aux_usage,uint32_t src_level,uint32_t src_base_layer,const struct anv_image * dst_image,enum isl_format dst_format_override,enum isl_aux_usage dst_aux_usage,uint32_t dst_level,uint32_t dst_base_layer,VkImageAspectFlagBits aspect,uint32_t src_x,uint32_t src_y,uint32_t dst_x,uint32_t dst_y,uint32_t width,uint32_t height,uint32_t layer_count,enum blorp_filter filter)2007 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
2008                        const struct anv_image *src_image,
2009                        enum isl_format src_format_override,
2010                        enum isl_aux_usage src_aux_usage,
2011                        uint32_t src_level, uint32_t src_base_layer,
2012                        const struct anv_image *dst_image,
2013                        enum isl_format dst_format_override,
2014                        enum isl_aux_usage dst_aux_usage,
2015                        uint32_t dst_level, uint32_t dst_base_layer,
2016                        VkImageAspectFlagBits aspect,
2017                        uint32_t src_x, uint32_t src_y,
2018                        uint32_t dst_x, uint32_t dst_y,
2019                        uint32_t width, uint32_t height,
2020                        uint32_t layer_count,
2021                        enum blorp_filter filter)
2022 {
2023    struct blorp_batch batch;
2024    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2025    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2026 
2027    assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D);
2028    assert(src_image->vk.samples > 1);
2029    assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
2030    assert(dst_image->vk.samples == 1);
2031 
2032    struct blorp_surf src_surf, dst_surf;
2033    get_blorp_surf_for_anv_image(cmd_buffer, src_image, aspect,
2034                                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2035                                 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2036                                 src_aux_usage, &src_surf);
2037    if (src_aux_usage == ISL_AUX_USAGE_MCS) {
2038       src_surf.clear_color_addr = anv_to_blorp_address(
2039          anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
2040                                         VK_IMAGE_ASPECT_COLOR_BIT));
2041    }
2042    get_blorp_surf_for_anv_image(cmd_buffer, dst_image, aspect,
2043                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2044                                 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2045                                 dst_aux_usage, &dst_surf);
2046    anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
2047                                      aspect, dst_aux_usage,
2048                                      dst_level, dst_base_layer, layer_count);
2049 
2050    if (filter == BLORP_FILTER_NONE) {
2051       /* If no explicit filter is provided, then it's implied by the type of
2052        * the source image.
2053        */
2054       if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
2055           (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
2056           isl_format_has_int_channel(src_surf.surf->format)) {
2057          filter = BLORP_FILTER_SAMPLE_0;
2058       } else {
2059          filter = BLORP_FILTER_AVERAGE;
2060       }
2061    }
2062 
2063    for (uint32_t l = 0; l < layer_count; l++) {
2064       blorp_blit(&batch,
2065                  &src_surf, src_level, src_base_layer + l,
2066                  src_format_override, ISL_SWIZZLE_IDENTITY,
2067                  &dst_surf, dst_level, dst_base_layer + l,
2068                  dst_format_override, ISL_SWIZZLE_IDENTITY,
2069                  src_x, src_y, src_x + width, src_y + height,
2070                  dst_x, dst_y, dst_x + width, dst_y + height,
2071                  filter, false, false);
2072    }
2073 
2074    anv_blorp_batch_finish(&batch);
2075 }
2076 
2077 static enum blorp_filter
vk_to_blorp_resolve_mode(VkResolveModeFlagBits vk_mode)2078 vk_to_blorp_resolve_mode(VkResolveModeFlagBits vk_mode)
2079 {
2080    switch (vk_mode) {
2081    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
2082       return BLORP_FILTER_SAMPLE_0;
2083    case VK_RESOLVE_MODE_AVERAGE_BIT:
2084       return BLORP_FILTER_AVERAGE;
2085    case VK_RESOLVE_MODE_MIN_BIT:
2086       return BLORP_FILTER_MIN_SAMPLE;
2087    case VK_RESOLVE_MODE_MAX_BIT:
2088       return BLORP_FILTER_MAX_SAMPLE;
2089    default:
2090       return BLORP_FILTER_NONE;
2091    }
2092 }
2093 
2094 void
anv_attachment_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_attachment * att,VkImageLayout layout,VkImageAspectFlagBits aspect)2095 anv_attachment_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
2096                             const struct anv_attachment *att,
2097                             VkImageLayout layout,
2098                             VkImageAspectFlagBits aspect)
2099 {
2100    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
2101    const struct anv_image_view *src_iview = att->iview;
2102    const struct anv_image_view *dst_iview = att->resolve_iview;
2103 
2104    enum isl_aux_usage src_aux_usage =
2105       anv_layout_to_aux_usage(cmd_buffer->device->info,
2106                               src_iview->image, aspect,
2107                               VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2108                               layout,
2109                               cmd_buffer->queue_family->queueFlags);
2110 
2111    enum isl_aux_usage dst_aux_usage =
2112       anv_layout_to_aux_usage(cmd_buffer->device->info,
2113                               dst_iview->image, aspect,
2114                               VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2115                               att->resolve_layout,
2116                               cmd_buffer->queue_family->queueFlags);
2117 
2118    enum blorp_filter filter = vk_to_blorp_resolve_mode(att->resolve_mode);
2119 
2120    /* Depth/stencil should not use their view format for resolve because they
2121     * go in pairs.
2122     */
2123    enum isl_format src_format = ISL_FORMAT_UNSUPPORTED;
2124    enum isl_format dst_format = ISL_FORMAT_UNSUPPORTED;
2125    if (!(aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
2126       src_format = src_iview->planes[0].isl.format;
2127       dst_format = dst_iview->planes[0].isl.format;
2128    }
2129 
2130    const VkRect2D render_area = gfx->render_area;
2131    if (gfx->view_mask == 0) {
2132       anv_image_msaa_resolve(cmd_buffer,
2133                              src_iview->image, src_format, src_aux_usage,
2134                              src_iview->planes[0].isl.base_level,
2135                              src_iview->planes[0].isl.base_array_layer,
2136                              dst_iview->image, dst_format, dst_aux_usage,
2137                              dst_iview->planes[0].isl.base_level,
2138                              dst_iview->planes[0].isl.base_array_layer,
2139                              aspect,
2140                              render_area.offset.x, render_area.offset.y,
2141                              render_area.offset.x, render_area.offset.y,
2142                              render_area.extent.width,
2143                              render_area.extent.height,
2144                              gfx->layer_count, filter);
2145    } else {
2146       uint32_t res_view_mask = gfx->view_mask;
2147       while (res_view_mask) {
2148          int i = u_bit_scan(&res_view_mask);
2149 
2150          anv_image_msaa_resolve(cmd_buffer,
2151                                 src_iview->image, src_format, src_aux_usage,
2152                                 src_iview->planes[0].isl.base_level,
2153                                 src_iview->planes[0].isl.base_array_layer + i,
2154                                 dst_iview->image, dst_format, dst_aux_usage,
2155                                 dst_iview->planes[0].isl.base_level,
2156                                 dst_iview->planes[0].isl.base_array_layer + i,
2157                                 aspect,
2158                                 render_area.offset.x, render_area.offset.y,
2159                                 render_area.offset.x, render_area.offset.y,
2160                                 render_area.extent.width,
2161                                 render_area.extent.height,
2162                                 1, filter);
2163       }
2164    }
2165 }
2166 
2167 static void
resolve_image(struct anv_cmd_buffer * cmd_buffer,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region)2168 resolve_image(struct anv_cmd_buffer *cmd_buffer,
2169               struct anv_image *src_image,
2170               VkImageLayout src_image_layout,
2171               struct anv_image *dst_image,
2172               VkImageLayout dst_image_layout,
2173               const VkImageResolve2 *region)
2174 {
2175    assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask);
2176    assert(vk_image_subresource_layer_count(&src_image->vk, &region->srcSubresource) ==
2177           vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource));
2178 
2179    const uint32_t layer_count =
2180       vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource);
2181 
2182    anv_foreach_image_aspect_bit(aspect_bit, src_image,
2183                                 region->srcSubresource.aspectMask) {
2184       enum isl_aux_usage src_aux_usage =
2185          anv_layout_to_aux_usage(cmd_buffer->device->info, src_image,
2186                                  (1 << aspect_bit),
2187                                  VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2188                                  src_image_layout,
2189                                  cmd_buffer->queue_family->queueFlags);
2190       enum isl_aux_usage dst_aux_usage =
2191          anv_layout_to_aux_usage(cmd_buffer->device->info, dst_image,
2192                                  (1 << aspect_bit),
2193                                  VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2194                                  dst_image_layout,
2195                                  cmd_buffer->queue_family->queueFlags);
2196 
2197       anv_image_msaa_resolve(cmd_buffer,
2198                              src_image, ISL_FORMAT_UNSUPPORTED, src_aux_usage,
2199                              region->srcSubresource.mipLevel,
2200                              region->srcSubresource.baseArrayLayer,
2201                              dst_image, ISL_FORMAT_UNSUPPORTED, dst_aux_usage,
2202                              region->dstSubresource.mipLevel,
2203                              region->dstSubresource.baseArrayLayer,
2204                              (1 << aspect_bit),
2205                              region->srcOffset.x,
2206                              region->srcOffset.y,
2207                              region->dstOffset.x,
2208                              region->dstOffset.y,
2209                              region->extent.width,
2210                              region->extent.height,
2211                              layer_count, BLORP_FILTER_NONE);
2212    }
2213 }
2214 
anv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)2215 void anv_CmdResolveImage2(
2216     VkCommandBuffer                             commandBuffer,
2217     const VkResolveImageInfo2*                  pResolveImageInfo)
2218 {
2219    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
2220    ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage);
2221    ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage);
2222 
2223    for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
2224       resolve_image(cmd_buffer,
2225                     src_image, pResolveImageInfo->srcImageLayout,
2226                     dst_image, pResolveImageInfo->dstImageLayout,
2227                     &pResolveImageInfo->pRegions[r]);
2228    }
2229 }
2230 
2231 void
anv_image_clear_color(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,union isl_color_value clear_color)2232 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
2233                       const struct anv_image *image,
2234                       VkImageAspectFlagBits aspect,
2235                       enum isl_aux_usage aux_usage,
2236                       enum isl_format format, struct isl_swizzle swizzle,
2237                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
2238                       VkRect2D area, union isl_color_value clear_color)
2239 {
2240    assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
2241 
2242    /* We don't support planar images with multisampling yet */
2243    assert(image->n_planes == 1);
2244 
2245    struct blorp_batch batch;
2246    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2247 
2248    struct blorp_surf surf;
2249    get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
2250                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2251                                 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2252                                 aux_usage, &surf);
2253    anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
2254                                      level, base_layer, layer_count);
2255 
2256    blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
2257                level, base_layer, layer_count,
2258                area.offset.x, area.offset.y,
2259                area.offset.x + area.extent.width,
2260                area.offset.y + area.extent.height,
2261                clear_color, 0 /* color_write_disable */);
2262 
2263    anv_blorp_batch_finish(&batch);
2264 }
2265 
2266 void
anv_image_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,enum isl_aux_usage depth_aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,const VkClearDepthStencilValue * clear_value)2267 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
2268                               const struct anv_image *image,
2269                               VkImageAspectFlags aspects,
2270                               enum isl_aux_usage depth_aux_usage,
2271                               uint32_t level,
2272                               uint32_t base_layer, uint32_t layer_count,
2273                               VkRect2D area,
2274                               const VkClearDepthStencilValue *clear_value)
2275 {
2276    assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
2277                                VK_IMAGE_ASPECT_STENCIL_BIT));
2278 
2279    struct blorp_batch batch;
2280    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2281    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2282 
2283    struct blorp_surf depth = {};
2284    if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
2285       get_blorp_surf_for_anv_image(cmd_buffer,
2286                                    image, VK_IMAGE_ASPECT_DEPTH_BIT,
2287                                    0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2288                                    depth_aux_usage, &depth);
2289    }
2290 
2291    struct blorp_surf stencil = {};
2292    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2293       const uint32_t plane =
2294          anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
2295       get_blorp_surf_for_anv_image(cmd_buffer,
2296                                    image, VK_IMAGE_ASPECT_STENCIL_BIT,
2297                                    0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2298                                    image->planes[plane].aux_usage, &stencil);
2299    }
2300 
2301    /* Blorp may choose to clear stencil using RGBA32_UINT for better
2302     * performance.  If it does this, we need to flush it out of the depth
2303     * cache before rendering to it.
2304     */
2305    anv_add_pending_pipe_bits(cmd_buffer,
2306                              ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2307                              ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2308                              "before clear DS");
2309 
2310    blorp_clear_depth_stencil(&batch, &depth, &stencil,
2311                              level, base_layer, layer_count,
2312                              area.offset.x, area.offset.y,
2313                              area.offset.x + area.extent.width,
2314                              area.offset.y + area.extent.height,
2315                              aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
2316                              clear_value->depth,
2317                              (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
2318                              clear_value->stencil);
2319 
2320    /* Blorp may choose to clear stencil using RGBA32_UINT for better
2321     * performance.  If it does this, we need to flush it out of the render
2322     * cache before someone starts trying to do stencil on it.
2323     */
2324    anv_add_pending_pipe_bits(cmd_buffer,
2325                              ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
2326                              ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2327                              "after clear DS");
2328 
2329    anv_blorp_batch_finish(&batch);
2330 }
2331 
2332 void
anv_image_hiz_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op hiz_op)2333 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
2334                  const struct anv_image *image,
2335                  VkImageAspectFlagBits aspect, uint32_t level,
2336                  uint32_t base_layer, uint32_t layer_count,
2337                  enum isl_aux_op hiz_op)
2338 {
2339    assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
2340    assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
2341    const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
2342    assert(plane == 0);
2343 
2344    struct blorp_batch batch;
2345    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2346    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2347 
2348    struct blorp_surf surf;
2349    get_blorp_surf_for_anv_image(cmd_buffer,
2350                                 image, VK_IMAGE_ASPECT_DEPTH_BIT,
2351                                 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2352                                 image->planes[plane].aux_usage, &surf);
2353 
2354    blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
2355 
2356    anv_blorp_batch_finish(&batch);
2357 }
2358 
2359 void
anv_image_hiz_clear(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,const VkClearDepthStencilValue * clear_value)2360 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
2361                     const struct anv_image *image,
2362                     VkImageAspectFlags aspects,
2363                     uint32_t level,
2364                     uint32_t base_layer, uint32_t layer_count,
2365                     VkRect2D area,
2366                     const VkClearDepthStencilValue *clear_value)
2367 {
2368    struct blorp_batch batch;
2369    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2370    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2371 
2372    anv_fast_clear_depth_stencil(cmd_buffer, &batch, image, aspects, level,
2373                                 base_layer, layer_count, area, clear_value);
2374 
2375    anv_blorp_batch_finish(&batch);
2376 }
2377 
2378 void
anv_image_mcs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value,bool predicate)2379 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
2380                  const struct anv_image *image,
2381                  enum isl_format format, struct isl_swizzle swizzle,
2382                  VkImageAspectFlagBits aspect,
2383                  uint32_t base_layer, uint32_t layer_count,
2384                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
2385                  bool predicate)
2386 {
2387    struct blorp_batch batch;
2388    anv_blorp_batch_init(cmd_buffer, &batch,
2389                         BLORP_BATCH_PREDICATE_ENABLE * predicate);
2390    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2391 
2392    exec_mcs_op(cmd_buffer, &batch, image, format, swizzle, aspect,
2393                base_layer, layer_count, mcs_op, clear_value);
2394 
2395    anv_blorp_batch_finish(&batch);
2396 }
2397 
2398 void
anv_image_ccs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value,bool predicate)2399 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
2400                  const struct anv_image *image,
2401                  enum isl_format format, struct isl_swizzle swizzle,
2402                  VkImageAspectFlagBits aspect, uint32_t level,
2403                  uint32_t base_layer, uint32_t layer_count,
2404                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
2405                  bool predicate)
2406 {
2407    struct blorp_batch batch;
2408    anv_blorp_batch_init(cmd_buffer, &batch,
2409                         BLORP_BATCH_PREDICATE_ENABLE * predicate);
2410    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2411 
2412    exec_ccs_op(cmd_buffer, &batch, image, format, swizzle, aspect, level,
2413                base_layer, layer_count, ccs_op, clear_value);
2414 
2415    anv_blorp_batch_finish(&batch);
2416 }
2417