1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25 #include "genxml/gen8_pack.h"
26
27 static bool
lookup_blorp_shader(struct blorp_batch * batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)28 lookup_blorp_shader(struct blorp_batch *batch,
29 const void *key, uint32_t key_size,
30 uint32_t *kernel_out, void *prog_data_out)
31 {
32 struct blorp_context *blorp = batch->blorp;
33 struct anv_device *device = blorp->driver_ctx;
34
35 struct anv_shader_bin *bin =
36 anv_device_search_for_kernel(device, device->internal_cache,
37 key, key_size, NULL);
38 if (!bin)
39 return false;
40
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
43 */
44 anv_shader_bin_unref(device, bin);
45
46 *kernel_out = bin->kernel.offset;
47 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
48
49 return true;
50 }
51
52 static bool
upload_blorp_shader(struct blorp_batch * batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const void * prog_data,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)53 upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
54 const void *key, uint32_t key_size,
55 const void *kernel, uint32_t kernel_size,
56 const void *prog_data,
57 uint32_t prog_data_size,
58 uint32_t *kernel_out, void *prog_data_out)
59 {
60 struct blorp_context *blorp = batch->blorp;
61 struct anv_device *device = blorp->driver_ctx;
62
63 struct anv_pipeline_bind_map empty_bind_map = {};
64 struct anv_push_descriptor_info empty_push_desc_info = {};
65 struct anv_shader_upload_params upload_params = {
66 .stage = stage,
67 .key_data = key,
68 .key_size = key_size,
69 .kernel_data = kernel,
70 .kernel_size = kernel_size,
71 .prog_data = prog_data,
72 .prog_data_size = prog_data_size,
73 .bind_map = &empty_bind_map,
74 .push_desc_info = &empty_push_desc_info,
75 };
76
77 struct anv_shader_bin *bin =
78 anv_device_upload_kernel(device, device->internal_cache, &upload_params);
79
80 if (!bin)
81 return false;
82
83 /* The cache already has a reference and it's not going anywhere so there
84 * is no need to hold a second reference.
85 */
86 anv_shader_bin_unref(device, bin);
87
88 *kernel_out = bin->kernel.offset;
89 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
90
91 return true;
92 }
93
94 static void
upload_dynamic_state(struct blorp_context * context,const void * data,uint32_t size,uint32_t alignment,enum blorp_dynamic_state name)95 upload_dynamic_state(struct blorp_context *context,
96 const void *data, uint32_t size,
97 uint32_t alignment, enum blorp_dynamic_state name)
98 {
99 struct anv_device *device = context->driver_ctx;
100
101 device->blorp.dynamic_states[name] =
102 anv_state_pool_emit_data(&device->dynamic_state_pool,
103 size, alignment, data);
104 }
105
106 void
anv_device_init_blorp(struct anv_device * device)107 anv_device_init_blorp(struct anv_device *device)
108 {
109 const struct blorp_config config = {
110 .use_mesh_shading = device->vk.enabled_extensions.EXT_mesh_shader,
111 .use_unrestricted_depth_range =
112 device->vk.enabled_extensions.EXT_depth_range_unrestricted,
113 .use_cached_dynamic_states = true,
114 };
115
116 blorp_init_brw(&device->blorp.context, device, &device->isl_dev,
117 device->physical->compiler, &config);
118 device->blorp.context.lookup_shader = lookup_blorp_shader;
119 device->blorp.context.upload_shader = upload_blorp_shader;
120 device->blorp.context.enable_tbimr = device->physical->instance->enable_tbimr;
121 device->blorp.context.exec = anv_genX(device->info, blorp_exec);
122 device->blorp.context.upload_dynamic_state = upload_dynamic_state;
123
124 anv_genX(device->info, blorp_init_dynamic_states)(&device->blorp.context);
125 }
126
127 void
anv_device_finish_blorp(struct anv_device * device)128 anv_device_finish_blorp(struct anv_device *device)
129 {
130 #ifdef HAVE_VALGRIND
131 /* We only need to free these to prevent valgrind errors. The backing
132 * BO will go away in a couple of lines so we don't actually leak.
133 */
134 for (uint32_t i = 0; i < ARRAY_SIZE(device->blorp.dynamic_states); i++) {
135 anv_state_pool_free(&device->dynamic_state_pool,
136 device->blorp.dynamic_states[i]);
137 }
138 #endif
139 blorp_finish(&device->blorp.context);
140 }
141
142 static void
anv_blorp_batch_init(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,enum blorp_batch_flags flags)143 anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
144 struct blorp_batch *batch, enum blorp_batch_flags flags)
145 {
146 VkQueueFlags queue_flags = cmd_buffer->queue_family->queueFlags;
147
148 if (queue_flags & VK_QUEUE_GRAPHICS_BIT) {
149 /* blorp runs on render engine by default */
150 } else if (queue_flags & VK_QUEUE_COMPUTE_BIT) {
151 flags |= BLORP_BATCH_USE_COMPUTE;
152 } else if (queue_flags & VK_QUEUE_TRANSFER_BIT) {
153 flags |= BLORP_BATCH_USE_BLITTER;
154 } else {
155 unreachable("unknown queue family");
156 }
157
158 /* Can't have both flags at the same time. */
159 assert((flags & BLORP_BATCH_USE_BLITTER) == 0 ||
160 (flags & BLORP_BATCH_USE_COMPUTE) == 0);
161
162 blorp_batch_init(&cmd_buffer->device->blorp.context, batch, cmd_buffer, flags);
163 }
164
165 static void
anv_blorp_batch_finish(struct blorp_batch * batch)166 anv_blorp_batch_finish(struct blorp_batch *batch)
167 {
168 blorp_batch_finish(batch);
169 }
170
171 static isl_surf_usage_flags_t
get_usage_flag_for_cmd_buffer(const struct anv_cmd_buffer * cmd_buffer,bool is_dest,bool protected)172 get_usage_flag_for_cmd_buffer(const struct anv_cmd_buffer *cmd_buffer,
173 bool is_dest, bool protected)
174 {
175 isl_surf_usage_flags_t usage;
176
177 switch (cmd_buffer->queue_family->engine_class) {
178 case INTEL_ENGINE_CLASS_RENDER:
179 usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT :
180 ISL_SURF_USAGE_TEXTURE_BIT;
181 break;
182 case INTEL_ENGINE_CLASS_COMPUTE:
183 usage = is_dest ? ISL_SURF_USAGE_STORAGE_BIT :
184 ISL_SURF_USAGE_TEXTURE_BIT;
185 break;
186 case INTEL_ENGINE_CLASS_COPY:
187 usage = is_dest ? ISL_SURF_USAGE_BLITTER_DST_BIT :
188 ISL_SURF_USAGE_BLITTER_SRC_BIT;
189 break;
190 default:
191 unreachable("Unhandled engine class");
192 }
193
194 if (protected)
195 usage |= ISL_SURF_USAGE_PROTECTED_BIT;
196
197 return usage;
198 }
199
200 static void
get_blorp_surf_for_anv_address(struct anv_cmd_buffer * cmd_buffer,struct anv_address address,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,bool protected,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)201 get_blorp_surf_for_anv_address(struct anv_cmd_buffer *cmd_buffer,
202 struct anv_address address,
203 uint32_t width, uint32_t height,
204 uint32_t row_pitch, enum isl_format format,
205 bool is_dest, bool protected,
206 struct blorp_surf *blorp_surf,
207 struct isl_surf *isl_surf)
208 {
209 bool ok UNUSED;
210 isl_surf_usage_flags_t usage =
211 get_usage_flag_for_cmd_buffer(cmd_buffer, is_dest, protected);
212
213 *blorp_surf = (struct blorp_surf) {
214 .surf = isl_surf,
215 .addr = {
216 .buffer = address.bo,
217 .offset = address.offset,
218 .mocs = anv_mocs(cmd_buffer->device, address.bo, usage),
219 },
220 };
221
222 ok = isl_surf_init(&cmd_buffer->device->isl_dev, isl_surf,
223 .dim = ISL_SURF_DIM_2D,
224 .format = format,
225 .width = width,
226 .height = height,
227 .depth = 1,
228 .levels = 1,
229 .array_len = 1,
230 .samples = 1,
231 .row_pitch_B = row_pitch,
232 .usage = usage,
233 .tiling_flags = ISL_TILING_LINEAR_BIT);
234 assert(ok);
235 }
236
237 static void
get_blorp_surf_for_anv_buffer(struct anv_cmd_buffer * cmd_buffer,struct anv_buffer * buffer,uint64_t offset,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)238 get_blorp_surf_for_anv_buffer(struct anv_cmd_buffer *cmd_buffer,
239 struct anv_buffer *buffer, uint64_t offset,
240 uint32_t width, uint32_t height,
241 uint32_t row_pitch, enum isl_format format,
242 bool is_dest,
243 struct blorp_surf *blorp_surf,
244 struct isl_surf *isl_surf)
245 {
246 get_blorp_surf_for_anv_address(cmd_buffer,
247 anv_address_add(buffer->address, offset),
248 width, height, row_pitch, format,
249 is_dest, anv_buffer_is_protected(buffer),
250 blorp_surf, isl_surf);
251 }
252
253 /* Pick something high enough that it won't be used in core and low enough it
254 * will never map to an extension.
255 */
256 #define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
257
258 static struct blorp_address
anv_to_blorp_address(struct anv_address addr)259 anv_to_blorp_address(struct anv_address addr)
260 {
261 return (struct blorp_address) {
262 .buffer = addr.bo,
263 .offset = addr.offset,
264 };
265 }
266
267 static void
get_blorp_surf_for_anv_image(const struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspect,VkImageUsageFlags usage,VkImageLayout layout,enum isl_aux_usage aux_usage,struct blorp_surf * blorp_surf)268 get_blorp_surf_for_anv_image(const struct anv_cmd_buffer *cmd_buffer,
269 const struct anv_image *image,
270 VkImageAspectFlags aspect,
271 VkImageUsageFlags usage,
272 VkImageLayout layout,
273 enum isl_aux_usage aux_usage,
274 struct blorp_surf *blorp_surf)
275 {
276 const struct anv_device *device = cmd_buffer->device;
277 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
278
279 if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
280 assert(usage != 0);
281 aux_usage = anv_layout_to_aux_usage(device->info, image,
282 aspect, usage, layout,
283 cmd_buffer->queue_family->queueFlags);
284 }
285
286 isl_surf_usage_flags_t isl_usage =
287 get_usage_flag_for_cmd_buffer(cmd_buffer,
288 usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT,
289 anv_image_is_protected(image));
290 const struct anv_surface *surface = &image->planes[plane].primary_surface;
291 const struct anv_address address =
292 anv_image_address(image, &surface->memory_range);
293
294 *blorp_surf = (struct blorp_surf) {
295 .surf = &surface->isl,
296 .addr = {
297 .buffer = address.bo,
298 .offset = address.offset,
299 .mocs = anv_mocs(device, address.bo, isl_usage),
300 },
301 };
302
303 if (aux_usage != ISL_AUX_USAGE_NONE) {
304 const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
305 const struct anv_address aux_address =
306 anv_image_address(image, &aux_surface->memory_range);
307
308 blorp_surf->aux_usage = aux_usage;
309 blorp_surf->aux_surf = &aux_surface->isl;
310
311 if (!anv_address_is_null(aux_address)) {
312 blorp_surf->aux_addr = (struct blorp_address) {
313 .buffer = aux_address.bo,
314 .offset = aux_address.offset,
315 .mocs = anv_mocs(device, aux_address.bo, isl_usage),
316 };
317 }
318
319 /* If we're doing a partial resolve, then we need the indirect clear
320 * color. If we are doing a fast clear and want to store/update the
321 * clear color, we also pass the address to blorp, otherwise it will only
322 * stomp the CCS to a particular value and won't care about format or
323 * clear value
324 */
325 if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
326 const struct anv_address clear_color_addr =
327 anv_image_get_clear_color_addr(device, image, aspect);
328 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
329 } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
330 const struct anv_address clear_color_addr =
331 anv_image_get_clear_color_addr(device, image, aspect);
332 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
333 blorp_surf->clear_color = anv_image_hiz_clear_value(image);
334 }
335 }
336 }
337
338 static void
copy_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageCopy2 * region)339 copy_image(struct anv_cmd_buffer *cmd_buffer,
340 struct blorp_batch *batch,
341 struct anv_image *src_image,
342 VkImageLayout src_image_layout,
343 struct anv_image *dst_image,
344 VkImageLayout dst_image_layout,
345 const VkImageCopy2 *region)
346 {
347 VkOffset3D srcOffset =
348 vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
349 VkOffset3D dstOffset =
350 vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
351 VkExtent3D extent =
352 vk_image_sanitize_extent(&src_image->vk, region->extent);
353
354 const uint32_t dst_level = region->dstSubresource.mipLevel;
355 unsigned dst_base_layer, layer_count;
356 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
357 dst_base_layer = region->dstOffset.z;
358 layer_count = region->extent.depth;
359 } else {
360 dst_base_layer = region->dstSubresource.baseArrayLayer;
361 layer_count = vk_image_subresource_layer_count(&dst_image->vk,
362 ®ion->dstSubresource);
363 }
364
365 const uint32_t src_level = region->srcSubresource.mipLevel;
366 unsigned src_base_layer;
367 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
368 src_base_layer = region->srcOffset.z;
369 } else {
370 src_base_layer = region->srcSubresource.baseArrayLayer;
371 assert(layer_count ==
372 vk_image_subresource_layer_count(&src_image->vk,
373 ®ion->srcSubresource));
374 }
375
376 VkImageAspectFlags src_mask = region->srcSubresource.aspectMask,
377 dst_mask = region->dstSubresource.aspectMask;
378
379 assert(anv_image_aspects_compatible(src_mask, dst_mask));
380
381 if (util_bitcount(src_mask) > 1) {
382 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
383 struct blorp_surf src_surf, dst_surf;
384 get_blorp_surf_for_anv_image(cmd_buffer,
385 src_image, 1UL << aspect_bit,
386 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
387 src_image_layout, ISL_AUX_USAGE_NONE,
388 &src_surf);
389 get_blorp_surf_for_anv_image(cmd_buffer,
390 dst_image, 1UL << aspect_bit,
391 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
392 dst_image_layout, ISL_AUX_USAGE_NONE,
393 &dst_surf);
394 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
395 1UL << aspect_bit,
396 dst_surf.aux_usage, dst_level,
397 dst_base_layer, layer_count);
398
399 for (unsigned i = 0; i < layer_count; i++) {
400 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
401 &dst_surf, dst_level, dst_base_layer + i,
402 srcOffset.x, srcOffset.y,
403 dstOffset.x, dstOffset.y,
404 extent.width, extent.height);
405 }
406 }
407 } else {
408 /* This case handles the ycbcr images, aspect mask are compatible but
409 * don't need to be the same.
410 */
411 struct blorp_surf src_surf, dst_surf;
412 get_blorp_surf_for_anv_image(cmd_buffer, src_image, src_mask,
413 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
414 src_image_layout, ISL_AUX_USAGE_NONE,
415 &src_surf);
416 get_blorp_surf_for_anv_image(cmd_buffer, dst_image, dst_mask,
417 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
418 dst_image_layout, ISL_AUX_USAGE_NONE,
419 &dst_surf);
420 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
421 dst_surf.aux_usage, dst_level,
422 dst_base_layer, layer_count);
423
424 for (unsigned i = 0; i < layer_count; i++) {
425 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
426 &dst_surf, dst_level, dst_base_layer + i,
427 srcOffset.x, srcOffset.y,
428 dstOffset.x, dstOffset.y,
429 extent.width, extent.height);
430 }
431 }
432 }
433
434 static struct anv_state
record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer * cmd_buffer)435 record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer)
436 {
437 const struct intel_device_info *info = cmd_buffer->device->info;
438
439 const VkResult result = anv_cmd_buffer_ensure_rcs_companion(cmd_buffer);
440 if (result != VK_SUCCESS) {
441 anv_batch_set_error(&cmd_buffer->batch, result);
442 return ANV_STATE_NULL;
443 }
444
445 assert(cmd_buffer->companion_rcs_cmd_buffer != NULL);
446
447 /* Re-emit the aux table register in every command buffer. This way we're
448 * ensured that we have the table even if this command buffer doesn't
449 * initialize any images.
450 */
451 if (cmd_buffer->device->info->has_aux_map) {
452 anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer,
453 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
454 "new cmd buffer with aux-tt");
455 }
456
457 return anv_genX(info, cmd_buffer_begin_companion_rcs_syncpoint)(cmd_buffer);
458 }
459
460 static void
end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer * cmd_buffer,struct anv_state syncpoint)461 end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer,
462 struct anv_state syncpoint)
463 {
464 const struct intel_device_info *info = cmd_buffer->device->info;
465 anv_genX(info, cmd_buffer_end_companion_rcs_syncpoint)(cmd_buffer,
466 syncpoint);
467 }
468
469 static bool
anv_blorp_blitter_execute_on_companion(struct anv_cmd_buffer * cmd_buffer,struct anv_image * image,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)470 anv_blorp_blitter_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
471 struct anv_image *image,
472 const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo,
473 const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
474 {
475 if (!anv_cmd_buffer_is_blitter_queue(cmd_buffer))
476 return false;
477
478 assert((pCopyBufferToImageInfo && !pCopyImageToBufferInfo) ||
479 (pCopyImageToBufferInfo && !pCopyBufferToImageInfo));
480
481 bool blorp_execute_on_companion = false;
482 VkImageAspectFlags aspect_mask = VK_IMAGE_ASPECT_NONE;
483 const uint32_t region_count = pCopyBufferToImageInfo ?
484 pCopyBufferToImageInfo->regionCount :
485 pCopyImageToBufferInfo->regionCount;
486
487 for (unsigned r = 0; r < region_count &&
488 !blorp_execute_on_companion; r++) {
489 if (pCopyBufferToImageInfo) {
490 aspect_mask =
491 pCopyBufferToImageInfo->pRegions[r].imageSubresource.aspectMask;
492 } else {
493 aspect_mask =
494 pCopyImageToBufferInfo->pRegions[r].imageSubresource.aspectMask;
495 }
496
497 enum isl_format linear_format =
498 anv_get_isl_format(cmd_buffer->device->info, image->vk.format,
499 aspect_mask, VK_IMAGE_TILING_LINEAR);
500 const struct isl_format_layout *linear_fmtl =
501 isl_format_get_layout(linear_format);
502
503 switch (linear_fmtl->bpb) {
504 case 96:
505 /* We can only support linear mode for 96bpp on blitter engine. */
506 blorp_execute_on_companion |=
507 image->vk.tiling != VK_IMAGE_TILING_LINEAR;
508 break;
509 default:
510 blorp_execute_on_companion |= linear_fmtl->bpb % 3 == 0;
511 break;
512 }
513 }
514
515 return blorp_execute_on_companion;
516 }
517
518 static bool
anv_blorp_execute_on_companion(struct anv_cmd_buffer * cmd_buffer,struct anv_image * dst_image)519 anv_blorp_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
520 struct anv_image *dst_image)
521 {
522 /* MSAA images have to be dealt with on the companion RCS command buffer
523 * for both CCS && BCS engines.
524 */
525 if ((anv_cmd_buffer_is_blitter_queue(cmd_buffer) ||
526 anv_cmd_buffer_is_compute_queue(cmd_buffer)) &&
527 dst_image->vk.samples > 1)
528 return true;
529
530 /* Emulation of formats is done through a compute shader, so we need
531 * the companion command buffer for the BCS engine.
532 */
533 if (anv_cmd_buffer_is_blitter_queue(cmd_buffer) &&
534 dst_image->emu_plane_format != VK_FORMAT_UNDEFINED)
535 return true;
536
537 return false;
538 }
539
anv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)540 void anv_CmdCopyImage2(
541 VkCommandBuffer commandBuffer,
542 const VkCopyImageInfo2* pCopyImageInfo)
543 {
544 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
545 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
546 ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
547
548 struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
549 UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
550
551 if (anv_blorp_execute_on_companion(cmd_buffer, dst_image)) {
552 rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
553 cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
554 }
555
556 struct blorp_batch batch;
557 anv_blorp_batch_init(cmd_buffer, &batch, 0);
558
559 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
560 copy_image(cmd_buffer, &batch,
561 src_image, pCopyImageInfo->srcImageLayout,
562 dst_image, pCopyImageInfo->dstImageLayout,
563 &pCopyImageInfo->pRegions[r]);
564 }
565
566 anv_blorp_batch_finish(&batch);
567
568 if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
569 assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
570 const enum anv_pipe_bits pipe_bits =
571 anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
572 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
573 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
574 anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
575 "Copy flush before astc emu");
576
577 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
578 const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r];
579 const VkOffset3D block_offset = vk_image_offset_to_elements(
580 &dst_image->vk, region->dstOffset);
581 const VkExtent3D block_extent = vk_image_extent_to_elements(
582 &src_image->vk, region->extent);
583 anv_astc_emu_process(cmd_buffer, dst_image,
584 pCopyImageInfo->dstImageLayout,
585 ®ion->dstSubresource,
586 block_offset, block_extent);
587 }
588 }
589
590 if (rcs_done.alloc_size)
591 end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
592 }
593
594 static enum isl_format
isl_format_for_size(unsigned size_B)595 isl_format_for_size(unsigned size_B)
596 {
597 /* Prefer 32-bit per component formats for CmdFillBuffer */
598 switch (size_B) {
599 case 1: return ISL_FORMAT_R8_UINT;
600 case 2: return ISL_FORMAT_R16_UINT;
601 case 3: return ISL_FORMAT_R8G8B8_UINT;
602 case 4: return ISL_FORMAT_R32_UINT;
603 case 6: return ISL_FORMAT_R16G16B16_UINT;
604 case 8: return ISL_FORMAT_R32G32_UINT;
605 case 12: return ISL_FORMAT_R32G32B32_UINT;
606 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
607 default:
608 unreachable("Unknown format size");
609 }
610 }
611
612 static void
copy_buffer_to_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_buffer * anv_buffer,struct anv_image * anv_image,VkImageLayout image_layout,const VkBufferImageCopy2 * region,bool buffer_to_image)613 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
614 struct blorp_batch *batch,
615 struct anv_buffer *anv_buffer,
616 struct anv_image *anv_image,
617 VkImageLayout image_layout,
618 const VkBufferImageCopy2* region,
619 bool buffer_to_image)
620 {
621 struct {
622 struct blorp_surf surf;
623 uint32_t level;
624 VkOffset3D offset;
625 } image, buffer, *src, *dst;
626
627 buffer.level = 0;
628 buffer.offset = (VkOffset3D) { 0, 0, 0 };
629
630 if (buffer_to_image) {
631 src = &buffer;
632 dst = ℑ
633 } else {
634 src = ℑ
635 dst = &buffer;
636 }
637
638 const VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
639
640 get_blorp_surf_for_anv_image(cmd_buffer, anv_image, aspect,
641 buffer_to_image ?
642 VK_IMAGE_USAGE_TRANSFER_DST_BIT :
643 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
644 image_layout, ISL_AUX_USAGE_NONE,
645 &image.surf);
646 image.offset =
647 vk_image_sanitize_offset(&anv_image->vk, region->imageOffset);
648 image.level = region->imageSubresource.mipLevel;
649
650 VkExtent3D extent =
651 vk_image_sanitize_extent(&anv_image->vk, region->imageExtent);
652 if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) {
653 image.offset.z = region->imageSubresource.baseArrayLayer;
654 extent.depth =
655 vk_image_subresource_layer_count(&anv_image->vk,
656 ®ion->imageSubresource);
657 }
658
659 const enum isl_format linear_format =
660 anv_get_isl_format(cmd_buffer->device->info, anv_image->vk.format,
661 aspect, VK_IMAGE_TILING_LINEAR);
662 const struct isl_format_layout *linear_fmtl =
663 isl_format_get_layout(linear_format);
664
665 const struct vk_image_buffer_layout buffer_layout =
666 vk_image_buffer_copy_layout(&anv_image->vk, region);
667
668 /* Some formats have additional restrictions which may cause ISL to
669 * fail to create a surface for us. For example, YCbCr formats
670 * have to have 2-pixel aligned strides.
671 *
672 * To avoid these issues, we always bind the buffer as if it's a
673 * "normal" format like RGBA32_UINT. Since we're using blorp_copy,
674 * the format doesn't matter as long as it has the right bpb.
675 */
676 const VkExtent2D buffer_extent = {
677 .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
678 .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
679 };
680 const enum isl_format buffer_format =
681 isl_format_for_size(linear_fmtl->bpb / 8);
682
683 struct isl_surf buffer_isl_surf;
684 get_blorp_surf_for_anv_buffer(cmd_buffer,
685 anv_buffer, region->bufferOffset,
686 buffer_extent.width, buffer_extent.height,
687 buffer_layout.row_stride_B, buffer_format,
688 false, &buffer.surf, &buffer_isl_surf);
689
690 if (&image == dst) {
691 /* In this case, the source is the buffer and, since blorp takes its
692 * copy dimensions in terms of the source format, we have to use the
693 * scaled down version for compressed textures because the source
694 * format is an RGB format.
695 */
696 extent.width = buffer_extent.width;
697 extent.height = buffer_extent.height;
698
699 anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
700 aspect, dst->surf.aux_usage,
701 dst->level,
702 dst->offset.z, extent.depth);
703 }
704
705 for (unsigned z = 0; z < extent.depth; z++) {
706 blorp_copy(batch, &src->surf, src->level, src->offset.z,
707 &dst->surf, dst->level, dst->offset.z,
708 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
709 extent.width, extent.height);
710
711 image.offset.z++;
712 buffer.surf.addr.offset += buffer_layout.image_stride_B;
713 }
714 }
715
anv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)716 void anv_CmdCopyBufferToImage2(
717 VkCommandBuffer commandBuffer,
718 const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo)
719 {
720 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
721 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
722 ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
723
724 struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
725 UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
726
727 bool blorp_execute_on_companion =
728 anv_blorp_execute_on_companion(cmd_buffer, dst_image);
729
730 /* Check if any one of the aspects is incompatible with the blitter engine,
731 * if true, use the companion RCS command buffer for blit operation since 3
732 * component formats are not supported natively except 96bpb on the blitter.
733 */
734 blorp_execute_on_companion |=
735 anv_blorp_blitter_execute_on_companion(cmd_buffer, dst_image,
736 pCopyBufferToImageInfo, NULL);
737
738 if (blorp_execute_on_companion) {
739 rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
740 cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
741 }
742
743 struct blorp_batch batch;
744 anv_blorp_batch_init(cmd_buffer, &batch, 0);
745
746 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
747 copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image,
748 pCopyBufferToImageInfo->dstImageLayout,
749 &pCopyBufferToImageInfo->pRegions[r], true);
750 }
751
752 anv_blorp_batch_finish(&batch);
753
754 if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
755 assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
756 const enum anv_pipe_bits pipe_bits =
757 anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
758 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
759 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
760 anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
761 "Copy flush before astc emu");
762
763 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
764 const VkBufferImageCopy2 *region =
765 &pCopyBufferToImageInfo->pRegions[r];
766 const VkOffset3D block_offset = vk_image_offset_to_elements(
767 &dst_image->vk, region->imageOffset);
768 const VkExtent3D block_extent = vk_image_extent_to_elements(
769 &dst_image->vk, region->imageExtent);
770 anv_astc_emu_process(cmd_buffer, dst_image,
771 pCopyBufferToImageInfo->dstImageLayout,
772 ®ion->imageSubresource,
773 block_offset, block_extent);
774 }
775 }
776
777 if (rcs_done.alloc_size)
778 end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
779 }
780
781 static void
anv_add_buffer_write_pending_bits(struct anv_cmd_buffer * cmd_buffer,const char * reason)782 anv_add_buffer_write_pending_bits(struct anv_cmd_buffer *cmd_buffer,
783 const char *reason)
784 {
785 const struct intel_device_info *devinfo = cmd_buffer->device->info;
786
787 if (anv_cmd_buffer_is_blitter_queue(cmd_buffer))
788 return;
789
790 cmd_buffer->state.queries.buffer_write_bits |=
791 (cmd_buffer->state.current_pipeline ==
792 cmd_buffer->device->physical->gpgpu_pipeline_value) ?
793 ANV_QUERY_COMPUTE_WRITES_PENDING_BITS :
794 ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo);
795 }
796
anv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)797 void anv_CmdCopyImageToBuffer2(
798 VkCommandBuffer commandBuffer,
799 const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
800 {
801 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
802 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
803 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
804
805 UNUSED struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
806 UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
807
808 bool blorp_execute_on_companion =
809 anv_blorp_execute_on_companion(cmd_buffer, src_image);
810
811 /* Check if any one of the aspects is incompatible with the blitter engine,
812 * if true, use the companion RCS command buffer for blit operation since 3
813 * component formats are not supported natively except 96bpb on the blitter.
814 */
815 blorp_execute_on_companion |=
816 anv_blorp_blitter_execute_on_companion(cmd_buffer, src_image, NULL,
817 pCopyImageToBufferInfo);
818
819 if (blorp_execute_on_companion) {
820 rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
821 cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
822 }
823
824 struct blorp_batch batch;
825 anv_blorp_batch_init(cmd_buffer, &batch, 0);
826
827 for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
828 copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image,
829 pCopyImageToBufferInfo->srcImageLayout,
830 &pCopyImageToBufferInfo->pRegions[r], false);
831 }
832
833 anv_add_buffer_write_pending_bits(cmd_buffer, "after copy image to buffer");
834
835 anv_blorp_batch_finish(&batch);
836
837 if (rcs_done.alloc_size)
838 end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
839 }
840
841 static bool
flip_coords(unsigned * src0,unsigned * src1,unsigned * dst0,unsigned * dst1)842 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
843 {
844 bool flip = false;
845 if (*src0 > *src1) {
846 unsigned tmp = *src0;
847 *src0 = *src1;
848 *src1 = tmp;
849 flip = !flip;
850 }
851
852 if (*dst0 > *dst1) {
853 unsigned tmp = *dst0;
854 *dst0 = *dst1;
855 *dst1 = tmp;
856 flip = !flip;
857 }
858
859 return flip;
860 }
861
862 static void
blit_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageBlit2 * region,VkFilter filter)863 blit_image(struct anv_cmd_buffer *cmd_buffer,
864 struct blorp_batch *batch,
865 struct anv_image *src_image,
866 VkImageLayout src_image_layout,
867 struct anv_image *dst_image,
868 VkImageLayout dst_image_layout,
869 const VkImageBlit2 *region,
870 VkFilter filter)
871 {
872 const VkImageSubresourceLayers *src_res = ®ion->srcSubresource;
873 const VkImageSubresourceLayers *dst_res = ®ion->dstSubresource;
874
875 struct blorp_surf src, dst;
876
877 enum blorp_filter blorp_filter;
878 switch (filter) {
879 case VK_FILTER_NEAREST:
880 blorp_filter = BLORP_FILTER_NEAREST;
881 break;
882 case VK_FILTER_LINEAR:
883 blorp_filter = BLORP_FILTER_BILINEAR;
884 break;
885 default:
886 unreachable("Invalid filter");
887 }
888
889 assert(anv_image_aspects_compatible(src_res->aspectMask,
890 dst_res->aspectMask));
891
892 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
893 get_blorp_surf_for_anv_image(cmd_buffer,
894 src_image, 1U << aspect_bit,
895 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
896 src_image_layout, ISL_AUX_USAGE_NONE, &src);
897 get_blorp_surf_for_anv_image(cmd_buffer,
898 dst_image, 1U << aspect_bit,
899 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
900 dst_image_layout, ISL_AUX_USAGE_NONE, &dst);
901
902 VkFormat src_vk_format = src_image->vk.format;
903
904 if (src_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
905 /* redirect src to the hidden plane */
906 const uint32_t plane = src_image->n_planes;
907 const struct anv_surface *surface =
908 &src_image->planes[plane].primary_surface;
909 const struct anv_address address =
910 anv_image_address(src_image, &surface->memory_range);
911 src.surf = &surface->isl,
912 src.addr.offset = address.offset;
913
914 src_vk_format = src_image->emu_plane_format;
915 }
916
917 struct anv_format_plane src_format =
918 anv_get_format_aspect(cmd_buffer->device->info, src_vk_format,
919 1U << aspect_bit, src_image->vk.tiling);
920 struct anv_format_plane dst_format =
921 anv_get_format_aspect(cmd_buffer->device->info, dst_image->vk.format,
922 1U << aspect_bit, dst_image->vk.tiling);
923
924 unsigned dst_start, dst_end;
925 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
926 assert(dst_res->baseArrayLayer == 0);
927 dst_start = region->dstOffsets[0].z;
928 dst_end = region->dstOffsets[1].z;
929 } else {
930 dst_start = dst_res->baseArrayLayer;
931 dst_end = dst_start +
932 vk_image_subresource_layer_count(&dst_image->vk, dst_res);
933 }
934
935 unsigned src_start, src_end;
936 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
937 assert(src_res->baseArrayLayer == 0);
938 src_start = region->srcOffsets[0].z;
939 src_end = region->srcOffsets[1].z;
940 } else {
941 src_start = src_res->baseArrayLayer;
942 src_end = src_start +
943 vk_image_subresource_layer_count(&src_image->vk, src_res);
944 }
945
946 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
947 const unsigned num_layers = dst_end - dst_start;
948 float src_z_step = (float)(src_end - src_start) / (float)num_layers;
949
950 /* There is no interpolation to the pixel center during rendering, so
951 * add the 0.5 offset ourselves here. */
952 float depth_center_offset = 0;
953 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
954 depth_center_offset = 0.5 / num_layers * (src_end - src_start);
955
956 if (flip_z) {
957 src_start = src_end;
958 src_z_step *= -1;
959 depth_center_offset *= -1;
960 }
961
962 unsigned src_x0 = region->srcOffsets[0].x;
963 unsigned src_x1 = region->srcOffsets[1].x;
964 unsigned dst_x0 = region->dstOffsets[0].x;
965 unsigned dst_x1 = region->dstOffsets[1].x;
966 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
967
968 unsigned src_y0 = region->srcOffsets[0].y;
969 unsigned src_y1 = region->srcOffsets[1].y;
970 unsigned dst_y0 = region->dstOffsets[0].y;
971 unsigned dst_y1 = region->dstOffsets[1].y;
972 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
973
974 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
975 1U << aspect_bit,
976 dst.aux_usage,
977 dst_res->mipLevel,
978 dst_start, num_layers);
979
980 for (unsigned i = 0; i < num_layers; i++) {
981 unsigned dst_z = dst_start + i;
982 float src_z = src_start + i * src_z_step + depth_center_offset;
983
984 blorp_blit(batch, &src, src_res->mipLevel, src_z,
985 src_format.isl_format, src_format.swizzle,
986 &dst, dst_res->mipLevel, dst_z,
987 dst_format.isl_format, dst_format.swizzle,
988 src_x0, src_y0, src_x1, src_y1,
989 dst_x0, dst_y0, dst_x1, dst_y1,
990 blorp_filter, flip_x, flip_y);
991 }
992 }
993 }
994
anv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)995 void anv_CmdBlitImage2(
996 VkCommandBuffer commandBuffer,
997 const VkBlitImageInfo2* pBlitImageInfo)
998 {
999 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1000 ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage);
1001 ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage);
1002
1003 struct blorp_batch batch;
1004 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1005
1006 for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
1007 blit_image(cmd_buffer, &batch,
1008 src_image, pBlitImageInfo->srcImageLayout,
1009 dst_image, pBlitImageInfo->dstImageLayout,
1010 &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
1011 }
1012
1013 anv_blorp_batch_finish(&batch);
1014 }
1015
1016 /**
1017 * Returns the greatest common divisor of a and b that is a power of two.
1018 */
1019 static uint64_t
gcd_pow2_u64(uint64_t a,uint64_t b)1020 gcd_pow2_u64(uint64_t a, uint64_t b)
1021 {
1022 assert(a > 0 || b > 0);
1023
1024 unsigned a_log2 = ffsll(a) - 1;
1025 unsigned b_log2 = ffsll(b) - 1;
1026
1027 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
1028 * case, the MIN2() will take the other one. If both are 0 then we will
1029 * hit the assert above.
1030 */
1031 return 1 << MIN2(a_log2, b_log2);
1032 }
1033
1034 /* This is maximum possible width/height our HW can handle */
1035 #define MAX_SURFACE_DIM (1ull << 14)
1036
1037 static void
copy_buffer(struct anv_device * device,struct blorp_batch * batch,struct anv_buffer * src_buffer,struct anv_buffer * dst_buffer,const VkBufferCopy2 * region)1038 copy_buffer(struct anv_device *device,
1039 struct blorp_batch *batch,
1040 struct anv_buffer *src_buffer,
1041 struct anv_buffer *dst_buffer,
1042 const VkBufferCopy2 *region)
1043 {
1044 struct blorp_address src = {
1045 .buffer = src_buffer->address.bo,
1046 .offset = src_buffer->address.offset + region->srcOffset,
1047 .mocs = anv_mocs(device, src_buffer->address.bo,
1048 blorp_batch_isl_copy_usage(batch, false /* is_dest */,
1049 anv_buffer_is_protected(src_buffer))),
1050 };
1051 struct blorp_address dst = {
1052 .buffer = dst_buffer->address.bo,
1053 .offset = dst_buffer->address.offset + region->dstOffset,
1054 .mocs = anv_mocs(device, dst_buffer->address.bo,
1055 blorp_batch_isl_copy_usage(batch, true /* is_dest */,
1056 anv_buffer_is_protected(dst_buffer))),
1057 };
1058
1059 blorp_buffer_copy(batch, src, dst, region->size);
1060 }
1061
anv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1062 void anv_CmdCopyBuffer2(
1063 VkCommandBuffer commandBuffer,
1064 const VkCopyBufferInfo2* pCopyBufferInfo)
1065 {
1066 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1067 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1068 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1069
1070 struct blorp_batch batch;
1071 anv_blorp_batch_init(cmd_buffer, &batch,
1072 cmd_buffer->state.current_pipeline ==
1073 cmd_buffer->device->physical->gpgpu_pipeline_value ?
1074 BLORP_BATCH_USE_COMPUTE : 0);
1075
1076 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
1077 copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
1078 &pCopyBufferInfo->pRegions[r]);
1079 }
1080
1081 anv_add_buffer_write_pending_bits(cmd_buffer, "after copy buffer");
1082
1083 anv_blorp_batch_finish(&batch);
1084 }
1085
1086
anv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1087 void anv_CmdUpdateBuffer(
1088 VkCommandBuffer commandBuffer,
1089 VkBuffer dstBuffer,
1090 VkDeviceSize dstOffset,
1091 VkDeviceSize dataSize,
1092 const void* pData)
1093 {
1094 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1095 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
1096
1097 struct blorp_batch batch;
1098 anv_blorp_batch_init(cmd_buffer, &batch,
1099 cmd_buffer->state.current_pipeline ==
1100 cmd_buffer->device->physical->gpgpu_pipeline_value ?
1101 BLORP_BATCH_USE_COMPUTE : 0);
1102
1103 /* We can't quite grab a full block because the state stream needs a
1104 * little data at the top to build its linked list.
1105 */
1106 const uint32_t max_update_size =
1107 cmd_buffer->device->dynamic_state_pool.block_size - 64;
1108
1109 assert(max_update_size < MAX_SURFACE_DIM * 4);
1110
1111 /* We're about to read data that was written from the CPU. Flush the
1112 * texture cache so we don't get anything stale.
1113 */
1114 anv_add_pending_pipe_bits(cmd_buffer,
1115 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1116 "before UpdateBuffer");
1117
1118 while (dataSize) {
1119 const uint32_t copy_size = MIN2(dataSize, max_update_size);
1120
1121 struct anv_state tmp_data =
1122 anv_cmd_buffer_alloc_temporary_state(cmd_buffer, copy_size, 64);
1123 struct anv_address tmp_addr =
1124 anv_cmd_buffer_temporary_state_address(cmd_buffer, tmp_data);
1125
1126 memcpy(tmp_data.map, pData, copy_size);
1127
1128 struct blorp_address src = {
1129 .buffer = tmp_addr.bo,
1130 .offset = tmp_addr.offset,
1131 .mocs = anv_mocs(cmd_buffer->device, NULL,
1132 get_usage_flag_for_cmd_buffer(cmd_buffer,
1133 false /* is_dest */,
1134 false /* protected */)),
1135 };
1136 struct blorp_address dst = {
1137 .buffer = dst_buffer->address.bo,
1138 .offset = dst_buffer->address.offset + dstOffset,
1139 .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
1140 get_usage_flag_for_cmd_buffer(
1141 cmd_buffer,
1142 true /* is_dest */,
1143 anv_buffer_is_protected(dst_buffer))),
1144 };
1145
1146 blorp_buffer_copy(&batch, src, dst, copy_size);
1147
1148 dataSize -= copy_size;
1149 dstOffset += copy_size;
1150 pData = (void *)pData + copy_size;
1151 }
1152
1153 anv_add_buffer_write_pending_bits(cmd_buffer, "update buffer");
1154
1155 anv_blorp_batch_finish(&batch);
1156 }
1157
1158 void
anv_cmd_buffer_fill_area(struct anv_cmd_buffer * cmd_buffer,struct anv_address address,VkDeviceSize size,uint32_t data,bool protected)1159 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
1160 struct anv_address address,
1161 VkDeviceSize size,
1162 uint32_t data,
1163 bool protected)
1164 {
1165 struct blorp_surf surf;
1166 struct isl_surf isl_surf;
1167
1168 struct blorp_batch batch;
1169 anv_blorp_batch_init(cmd_buffer, &batch,
1170 cmd_buffer->state.current_pipeline ==
1171 cmd_buffer->device->physical->gpgpu_pipeline_value ?
1172 BLORP_BATCH_USE_COMPUTE : 0);
1173
1174 /* First, we compute the biggest format that can be used with the
1175 * given offsets and size.
1176 */
1177 int bs = 16;
1178 uint64_t offset = address.offset;
1179 bs = gcd_pow2_u64(bs, offset);
1180 bs = gcd_pow2_u64(bs, size);
1181 enum isl_format isl_format = isl_format_for_size(bs);
1182
1183 union isl_color_value color = {
1184 .u32 = { data, data, data, data },
1185 };
1186
1187 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
1188 while (size >= max_fill_size) {
1189 get_blorp_surf_for_anv_address(cmd_buffer,
1190 (struct anv_address) {
1191 .bo = address.bo, .offset = offset,
1192 },
1193 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
1194 MAX_SURFACE_DIM * bs, isl_format,
1195 true /* is_dest */, protected,
1196 &surf, &isl_surf);
1197
1198 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1199 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
1200 color, 0 /* color_write_disable */);
1201 size -= max_fill_size;
1202 offset += max_fill_size;
1203 }
1204
1205 uint64_t height = size / (MAX_SURFACE_DIM * bs);
1206 assert(height < MAX_SURFACE_DIM);
1207 if (height != 0) {
1208 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
1209 get_blorp_surf_for_anv_address(cmd_buffer,
1210 (struct anv_address) {
1211 .bo = address.bo, .offset = offset,
1212 },
1213 MAX_SURFACE_DIM, height,
1214 MAX_SURFACE_DIM * bs, isl_format,
1215 true /* is_dest */, protected,
1216 &surf, &isl_surf);
1217
1218 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1219 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
1220 color, 0 /* color_write_disable */);
1221 size -= rect_fill_size;
1222 offset += rect_fill_size;
1223 }
1224
1225 if (size != 0) {
1226 const uint32_t width = size / bs;
1227 get_blorp_surf_for_anv_address(cmd_buffer,
1228 (struct anv_address) {
1229 .bo = address.bo, .offset = offset,
1230 },
1231 width, 1,
1232 width * bs, isl_format,
1233 true /* is_dest */, protected,
1234 &surf, &isl_surf);
1235
1236 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1237 0, 0, 1, 0, 0, width, 1,
1238 color, 0 /* color_write_disable */);
1239 }
1240
1241 anv_blorp_batch_finish(&batch);
1242 }
1243
anv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)1244 void anv_CmdFillBuffer(
1245 VkCommandBuffer commandBuffer,
1246 VkBuffer dstBuffer,
1247 VkDeviceSize dstOffset,
1248 VkDeviceSize fillSize,
1249 uint32_t data)
1250 {
1251 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1252 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
1253
1254 fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize);
1255
1256 /* From the Vulkan spec:
1257 *
1258 * "size is the number of bytes to fill, and must be either a multiple
1259 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1260 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1261 * buffer is not a multiple of 4, then the nearest smaller multiple is
1262 * used."
1263 */
1264 fillSize &= ~3ull;
1265
1266 anv_cmd_buffer_fill_area(cmd_buffer,
1267 anv_address_add(dst_buffer->address, dstOffset),
1268 fillSize, data,
1269 anv_buffer_is_protected(dst_buffer));
1270
1271 anv_add_buffer_write_pending_bits(cmd_buffer, "after fill buffer");
1272 }
1273
anv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1274 void anv_CmdClearColorImage(
1275 VkCommandBuffer commandBuffer,
1276 VkImage _image,
1277 VkImageLayout imageLayout,
1278 const VkClearColorValue* pColor,
1279 uint32_t rangeCount,
1280 const VkImageSubresourceRange* pRanges)
1281 {
1282 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1283 ANV_FROM_HANDLE(anv_image, image, _image);
1284
1285 struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
1286 UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
1287
1288 if (anv_blorp_execute_on_companion(cmd_buffer, image)) {
1289 rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
1290 cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
1291 }
1292
1293 struct blorp_batch batch;
1294 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1295
1296 for (unsigned r = 0; r < rangeCount; r++) {
1297 if (pRanges[r].aspectMask == 0)
1298 continue;
1299
1300 assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1301
1302 struct blorp_surf surf;
1303 get_blorp_surf_for_anv_image(cmd_buffer,
1304 image, pRanges[r].aspectMask,
1305 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1306 imageLayout, ISL_AUX_USAGE_NONE, &surf);
1307
1308 struct anv_format_plane src_format =
1309 anv_get_format_aspect(cmd_buffer->device->info, image->vk.format,
1310 VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling);
1311
1312 unsigned base_layer = pRanges[r].baseArrayLayer;
1313 uint32_t layer_count =
1314 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1315 uint32_t level_count =
1316 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1317
1318 for (uint32_t i = 0; i < level_count; i++) {
1319 const unsigned level = pRanges[r].baseMipLevel + i;
1320 const unsigned level_width = u_minify(image->vk.extent.width, level);
1321 const unsigned level_height = u_minify(image->vk.extent.height, level);
1322
1323 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1324 base_layer = 0;
1325 layer_count = u_minify(image->vk.extent.depth, level);
1326 }
1327
1328 anv_cmd_buffer_mark_image_written(cmd_buffer, image,
1329 pRanges[r].aspectMask,
1330 surf.aux_usage, level,
1331 base_layer, layer_count);
1332
1333 blorp_clear(&batch, &surf,
1334 src_format.isl_format, src_format.swizzle,
1335 level, base_layer, layer_count,
1336 0, 0, level_width, level_height,
1337 vk_to_isl_color(*pColor), 0 /* color_write_disable */);
1338 }
1339 }
1340
1341 anv_blorp_batch_finish(&batch);
1342
1343 if (rcs_done.alloc_size)
1344 end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
1345 }
1346
anv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1347 void anv_CmdClearDepthStencilImage(
1348 VkCommandBuffer commandBuffer,
1349 VkImage image_h,
1350 VkImageLayout imageLayout,
1351 const VkClearDepthStencilValue* pDepthStencil,
1352 uint32_t rangeCount,
1353 const VkImageSubresourceRange* pRanges)
1354 {
1355 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1356 ANV_FROM_HANDLE(anv_image, image, image_h);
1357
1358 struct blorp_batch batch;
1359 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1360 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1361
1362 struct blorp_surf depth, stencil;
1363 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1364 get_blorp_surf_for_anv_image(cmd_buffer,
1365 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1366 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1367 imageLayout, ISL_AUX_USAGE_NONE, &depth);
1368 } else {
1369 memset(&depth, 0, sizeof(depth));
1370 }
1371
1372 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1373 get_blorp_surf_for_anv_image(cmd_buffer,
1374 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1375 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1376 imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1377 } else {
1378 memset(&stencil, 0, sizeof(stencil));
1379 }
1380
1381 for (unsigned r = 0; r < rangeCount; r++) {
1382 if (pRanges[r].aspectMask == 0)
1383 continue;
1384
1385 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1386 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1387
1388 unsigned base_layer = pRanges[r].baseArrayLayer;
1389 uint32_t layer_count =
1390 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1391 uint32_t level_count =
1392 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1393
1394 for (uint32_t i = 0; i < level_count; i++) {
1395 const unsigned level = pRanges[r].baseMipLevel + i;
1396 const unsigned level_width = u_minify(image->vk.extent.width, level);
1397 const unsigned level_height = u_minify(image->vk.extent.height, level);
1398
1399 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1400 layer_count = u_minify(image->vk.extent.depth, level);
1401
1402 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1403 level, base_layer, layer_count,
1404 0, 0, level_width, level_height,
1405 clear_depth, pDepthStencil->depth,
1406 clear_stencil ? 0xff : 0,
1407 pDepthStencil->stencil);
1408 }
1409 }
1410
1411 anv_blorp_batch_finish(&batch);
1412 }
1413
1414 VkResult
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer * cmd_buffer,uint32_t num_entries,uint32_t * state_offset,struct anv_state * bt_state)1415 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1416 uint32_t num_entries,
1417 uint32_t *state_offset,
1418 struct anv_state *bt_state)
1419 {
1420 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1421 state_offset);
1422 if (bt_state->map == NULL) {
1423 /* We ran out of space. Grab a new binding table block. */
1424 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1425 if (result != VK_SUCCESS)
1426 return result;
1427
1428 /* Re-emit state base addresses so we get the new surface state base
1429 * address before we start emitting binding tables etc.
1430 */
1431 anv_cmd_buffer_emit_bt_pool_base_address(cmd_buffer);
1432
1433 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1434 state_offset);
1435 assert(bt_state->map != NULL);
1436 }
1437
1438 return VK_SUCCESS;
1439 }
1440
1441 static VkResult
binding_table_for_surface_state(struct anv_cmd_buffer * cmd_buffer,struct anv_state surface_state,uint32_t * bt_offset)1442 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1443 struct anv_state surface_state,
1444 uint32_t *bt_offset)
1445 {
1446 uint32_t state_offset;
1447 struct anv_state bt_state;
1448
1449 VkResult result =
1450 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1451 &bt_state);
1452 if (result != VK_SUCCESS)
1453 return result;
1454
1455 uint32_t *bt_map = bt_state.map;
1456 bt_map[0] = surface_state.offset + state_offset;
1457
1458 *bt_offset = bt_state.offset;
1459 return VK_SUCCESS;
1460 }
1461
1462 static bool
can_fast_clear_color_att(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_attachment * att,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1463 can_fast_clear_color_att(struct anv_cmd_buffer *cmd_buffer,
1464 struct blorp_batch *batch,
1465 const struct anv_attachment *att,
1466 const VkClearAttachment *attachment,
1467 uint32_t rectCount, const VkClearRect *pRects)
1468 {
1469 union isl_color_value clear_color =
1470 vk_to_isl_color(attachment->clearValue.color);
1471
1472 if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
1473 return false;
1474
1475 /* We don't support fast clearing with conditional rendering at the
1476 * moment. All the tracking done around fast clears (clear color updates
1477 * and fast-clear type updates) happens unconditionally.
1478 */
1479 if (batch->flags & BLORP_BATCH_PREDICATE_ENABLE)
1480 return false;
1481
1482 if (rectCount > 1) {
1483 anv_perf_warn(VK_LOG_OBJS(&cmd_buffer->device->vk.base),
1484 "Fast clears for vkCmdClearAttachments supported only for rectCount == 1");
1485 return false;
1486 }
1487
1488 /* We only support fast-clears on the first layer */
1489 if (pRects[0].layerCount > 1 || pRects[0].baseArrayLayer > 0)
1490 return false;
1491
1492 bool is_multiview = cmd_buffer->state.gfx.view_mask != 0;
1493 if (is_multiview && (cmd_buffer->state.gfx.view_mask != 1))
1494 return false;
1495
1496 return anv_can_fast_clear_color_view(cmd_buffer->device,
1497 (struct anv_image_view *)att->iview,
1498 att->layout,
1499 clear_color,
1500 pRects->layerCount,
1501 pRects->rect,
1502 cmd_buffer->queue_family->queueFlags);
1503 }
1504
1505 static void
exec_ccs_op(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value)1506 exec_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1507 struct blorp_batch *batch,
1508 const struct anv_image *image,
1509 enum isl_format format, struct isl_swizzle swizzle,
1510 VkImageAspectFlagBits aspect, uint32_t level,
1511 uint32_t base_layer, uint32_t layer_count,
1512 enum isl_aux_op ccs_op, union isl_color_value *clear_value)
1513 {
1514 assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1515 assert(image->vk.samples == 1);
1516 assert(level < anv_image_aux_levels(image, aspect));
1517 /* Multi-LOD YcBcR is not allowed */
1518 assert(image->n_planes == 1 || level == 0);
1519 assert(base_layer + layer_count <=
1520 anv_image_aux_layers(image, aspect, level));
1521
1522 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1523
1524 struct blorp_surf surf;
1525 get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
1526 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1527 image->planes[plane].aux_usage,
1528 &surf);
1529
1530 uint32_t level_width = u_minify(surf.surf->logical_level0_px.w, level);
1531 uint32_t level_height = u_minify(surf.surf->logical_level0_px.h, level);
1532
1533 /* Blorp will store the clear color for us if we provide the clear color
1534 * address and we are doing a fast clear. So we save the clear value into
1535 * the blorp surface.
1536 */
1537 if (clear_value)
1538 surf.clear_color = *clear_value;
1539
1540 switch (ccs_op) {
1541 case ISL_AUX_OP_FAST_CLEAR:
1542 blorp_fast_clear(batch, &surf, format, swizzle,
1543 level, base_layer, layer_count,
1544 0, 0, level_width, level_height);
1545 break;
1546 case ISL_AUX_OP_FULL_RESOLVE:
1547 case ISL_AUX_OP_PARTIAL_RESOLVE: {
1548 /* Wa_1508744258: Enable RHWO optimization for resolves */
1549 const bool enable_rhwo_opt =
1550 intel_needs_workaround(cmd_buffer->device->info, 1508744258);
1551
1552 if (enable_rhwo_opt)
1553 cmd_buffer->state.pending_rhwo_optimization_enabled = true;
1554
1555 blorp_ccs_resolve(batch, &surf, level, base_layer, layer_count,
1556 format, ccs_op);
1557
1558 if (enable_rhwo_opt)
1559 cmd_buffer->state.pending_rhwo_optimization_enabled = false;
1560 break;
1561 }
1562 case ISL_AUX_OP_AMBIGUATE:
1563 for (uint32_t a = 0; a < layer_count; a++) {
1564 const uint32_t layer = base_layer + a;
1565 blorp_ccs_ambiguate(batch, &surf, level, layer);
1566 }
1567 break;
1568 default:
1569 unreachable("Unsupported CCS operation");
1570 }
1571 }
1572
1573 static void
exec_mcs_op(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value)1574 exec_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1575 struct blorp_batch *batch,
1576 const struct anv_image *image,
1577 enum isl_format format, struct isl_swizzle swizzle,
1578 VkImageAspectFlagBits aspect,
1579 uint32_t base_layer, uint32_t layer_count,
1580 enum isl_aux_op mcs_op, union isl_color_value *clear_value)
1581 {
1582 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1583 assert(image->vk.samples > 1);
1584 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1585
1586 /* Multisampling with multi-planar formats is not supported */
1587 assert(image->n_planes == 1);
1588
1589 struct blorp_surf surf;
1590 get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
1591 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1592 ISL_AUX_USAGE_MCS, &surf);
1593
1594 /* Blorp will store the clear color for us if we provide the clear color
1595 * address and we are doing a fast clear. So we save the clear value into
1596 * the blorp surface.
1597 */
1598 if (clear_value)
1599 surf.clear_color = *clear_value;
1600
1601 switch (mcs_op) {
1602 case ISL_AUX_OP_FAST_CLEAR:
1603 blorp_fast_clear(batch, &surf, format, swizzle,
1604 0, base_layer, layer_count,
1605 0, 0, image->vk.extent.width, image->vk.extent.height);
1606 break;
1607 case ISL_AUX_OP_PARTIAL_RESOLVE:
1608 blorp_mcs_partial_resolve(batch, &surf, format,
1609 base_layer, layer_count);
1610 break;
1611 case ISL_AUX_OP_AMBIGUATE:
1612 blorp_mcs_ambiguate(batch, &surf, base_layer, layer_count);
1613 break;
1614 case ISL_AUX_OP_FULL_RESOLVE:
1615 default:
1616 unreachable("Unsupported MCS operation");
1617 }
1618 }
1619
1620 static void
clear_color_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1621 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1622 struct blorp_batch *batch,
1623 const VkClearAttachment *attachment,
1624 uint32_t rectCount, const VkClearRect *pRects)
1625 {
1626 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1627 const uint32_t att_idx = attachment->colorAttachment;
1628 assert(att_idx < gfx->color_att_count);
1629 const struct anv_attachment *att = &gfx->color_att[att_idx];
1630
1631 if (att->vk_format == VK_FORMAT_UNDEFINED)
1632 return;
1633
1634 union isl_color_value clear_color =
1635 vk_to_isl_color(attachment->clearValue.color);
1636
1637 const struct anv_image_view *iview = att->iview;
1638 if (iview &&
1639 can_fast_clear_color_att(cmd_buffer, batch, att,
1640 attachment, rectCount, pRects)) {
1641 if (iview->image->vk.samples == 1) {
1642 exec_ccs_op(cmd_buffer, batch, iview->image,
1643 iview->planes[0].isl.format,
1644 iview->planes[0].isl.swizzle,
1645 VK_IMAGE_ASPECT_COLOR_BIT,
1646 0, 0, 1, ISL_AUX_OP_FAST_CLEAR,
1647 &clear_color);
1648 } else {
1649 exec_mcs_op(cmd_buffer, batch, iview->image,
1650 iview->planes[0].isl.format,
1651 iview->planes[0].isl.swizzle,
1652 VK_IMAGE_ASPECT_COLOR_BIT,
1653 0, 1, ISL_AUX_OP_FAST_CLEAR,
1654 &clear_color);
1655 }
1656
1657 if (cmd_buffer->device->info->ver < 20) {
1658 anv_cmd_buffer_mark_image_fast_cleared(cmd_buffer, iview->image,
1659 iview->planes[0].isl.format,
1660 clear_color);
1661 anv_cmd_buffer_load_clear_color_from_image(cmd_buffer,
1662 att->surface_state.state,
1663 iview->image);
1664 }
1665 return;
1666 }
1667
1668 uint32_t binding_table;
1669 VkResult result =
1670 binding_table_for_surface_state(cmd_buffer, att->surface_state.state,
1671 &binding_table);
1672 if (result != VK_SUCCESS)
1673 return;
1674
1675 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1676 if (gfx->view_mask) {
1677 u_foreach_bit(view_idx, gfx->view_mask) {
1678 for (uint32_t r = 0; r < rectCount; ++r) {
1679 const VkOffset2D offset = pRects[r].rect.offset;
1680 const VkExtent2D extent = pRects[r].rect.extent;
1681 blorp_clear_attachments(batch, binding_table,
1682 ISL_FORMAT_UNSUPPORTED,
1683 gfx->samples,
1684 view_idx, 1,
1685 offset.x, offset.y,
1686 offset.x + extent.width,
1687 offset.y + extent.height,
1688 true, clear_color, false, 0.0f, 0, 0);
1689 }
1690 }
1691 return;
1692 }
1693
1694 for (uint32_t r = 0; r < rectCount; ++r) {
1695 const VkOffset2D offset = pRects[r].rect.offset;
1696 const VkExtent2D extent = pRects[r].rect.extent;
1697 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1698 blorp_clear_attachments(batch, binding_table,
1699 ISL_FORMAT_UNSUPPORTED,
1700 gfx->samples,
1701 pRects[r].baseArrayLayer,
1702 pRects[r].layerCount,
1703 offset.x, offset.y,
1704 offset.x + extent.width, offset.y + extent.height,
1705 true, clear_color, false, 0.0f, 0, 0);
1706 }
1707 }
1708
1709 static void
anv_fast_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,const VkClearDepthStencilValue * clear_value)1710 anv_fast_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1711 struct blorp_batch *batch,
1712 const struct anv_image *image,
1713 VkImageAspectFlags aspects,
1714 uint32_t level,
1715 uint32_t base_layer, uint32_t layer_count,
1716 VkRect2D area,
1717 const VkClearDepthStencilValue *clear_value)
1718 {
1719 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1720 VK_IMAGE_ASPECT_STENCIL_BIT));
1721
1722 struct blorp_surf depth = {};
1723 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1724 const uint32_t plane =
1725 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT);
1726 assert(base_layer + layer_count <=
1727 anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1728 get_blorp_surf_for_anv_image(cmd_buffer,
1729 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1730 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1731 image->planes[plane].aux_usage, &depth);
1732 }
1733
1734 struct blorp_surf stencil = {};
1735 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1736 const uint32_t plane =
1737 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1738 get_blorp_surf_for_anv_image(cmd_buffer,
1739 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1740 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1741 image->planes[plane].aux_usage, &stencil);
1742 }
1743
1744 /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1745 *
1746 * "The following is required when performing a depth buffer clear with
1747 * using the WM_STATE or 3DSTATE_WM:
1748 *
1749 * * If other rendering operations have preceded this clear, a
1750 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1751 * enabled must be issued before the rectangle primitive used for
1752 * the depth buffer clear operation.
1753 * * [...]"
1754 *
1755 * Even though the PRM only says that this is required if using 3DSTATE_WM
1756 * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1757 * hangs when doing a clear with WM_HZ_OP.
1758 */
1759 anv_add_pending_pipe_bits(cmd_buffer,
1760 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1761 ANV_PIPE_DEPTH_STALL_BIT,
1762 "before clear hiz");
1763
1764 if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1765 depth.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) {
1766 /* From Bspec 47010 (Depth Buffer Clear):
1767 *
1768 * Since the fast clear cycles to CCS are not cached in TileCache,
1769 * any previous depth buffer writes to overlapping pixels must be
1770 * flushed out of TileCache before a succeeding Depth Buffer Clear.
1771 * This restriction only applies to Depth Buffer with write-thru
1772 * enabled, since fast clears to CCS only occur for write-thru mode.
1773 *
1774 * There may have been a write to this depth buffer. Flush it from the
1775 * tile cache just in case.
1776 *
1777 * Set CS stall bit to guarantee that the fast clear starts the execution
1778 * after the tile cache flush completed.
1779 *
1780 * There is no Bspec requirement to flush the data cache but the
1781 * experiment shows that flusing the data cache helps to resolve the
1782 * corruption.
1783 */
1784 unsigned wa_flush = cmd_buffer->device->info->verx10 >= 125 ?
1785 ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0;
1786 anv_add_pending_pipe_bits(cmd_buffer,
1787 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1788 ANV_PIPE_CS_STALL_BIT |
1789 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1790 wa_flush,
1791 "before clear hiz_ccs_wt");
1792 }
1793
1794 blorp_hiz_clear_depth_stencil(batch, &depth, &stencil,
1795 level, base_layer, layer_count,
1796 area.offset.x, area.offset.y,
1797 area.offset.x + area.extent.width,
1798 area.offset.y + area.extent.height,
1799 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1800 clear_value->depth,
1801 aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1802 clear_value->stencil);
1803
1804 /* From the SKL PRM, Depth Buffer Clear:
1805 *
1806 * "Depth Buffer Clear Workaround
1807 *
1808 * Depth buffer clear pass using any of the methods (WM_STATE,
1809 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1810 * command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1811 * starting to render. DepthStall and DepthFlush are not needed between
1812 * consecutive depth clear passes nor is it required if the depth-clear
1813 * pass was done with “full_surf_clear” bit set in the
1814 * 3DSTATE_WM_HZ_OP."
1815 *
1816 * Even though the PRM provides a bunch of conditions under which this is
1817 * supposedly unnecessary, we choose to perform the flush unconditionally
1818 * just to be safe.
1819 *
1820 * From Bspec 46959, a programming note applicable to Gfx12+:
1821 *
1822 * "Since HZ_OP has to be sent twice (first time set the clear/resolve state
1823 * and 2nd time to clear the state), and HW internally flushes the depth
1824 * cache on HZ_OP, there is no need to explicitly send a Depth Cache flush
1825 * after Clear or Resolve."
1826 */
1827 if (cmd_buffer->device->info->verx10 < 120) {
1828 anv_add_pending_pipe_bits(cmd_buffer,
1829 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1830 ANV_PIPE_DEPTH_STALL_BIT,
1831 "after clear hiz");
1832 }
1833 }
1834
1835 static bool
can_hiz_clear_att(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_attachment * ds_att,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1836 can_hiz_clear_att(struct anv_cmd_buffer *cmd_buffer,
1837 struct blorp_batch *batch,
1838 const struct anv_attachment *ds_att,
1839 const VkClearAttachment *attachment,
1840 uint32_t rectCount, const VkClearRect *pRects)
1841 {
1842 if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
1843 return false;
1844
1845 /* From Bspec's section MI_PREDICATE:
1846 *
1847 * "The MI_PREDICATE command is used to control the Predicate state bit,
1848 * which in turn can be used to enable/disable the processing of
1849 * 3DPRIMITIVE commands."
1850 *
1851 * Also from BDW/CHV Bspec's 3DSTATE_WM_HZ_OP programming notes:
1852 *
1853 * "This command does NOT support predication from the use of the
1854 * MI_PREDICATE register. To predicate depth clears and resolves on you
1855 * must fall back to using the 3D_PRIMITIVE or GPGPU_WALKER commands."
1856 *
1857 * Since BLORP's predication is currently dependent on MI_PREDICATE, fall
1858 * back to the slow depth clear path when the BLORP_BATCH_PREDICATE_ENABLE
1859 * flag is set.
1860 */
1861 if (batch->flags & BLORP_BATCH_PREDICATE_ENABLE)
1862 return false;
1863
1864 if (rectCount > 1) {
1865 anv_perf_warn(VK_LOG_OBJS(&cmd_buffer->device->vk.base),
1866 "Fast clears for vkCmdClearAttachments supported only for rectCount == 1");
1867 return false;
1868 }
1869
1870 /* When the BLORP_BATCH_NO_EMIT_DEPTH_STENCIL flag is set, BLORP can only
1871 * clear the first slice of the currently configured depth/stencil view.
1872 */
1873 assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1874 if (pRects[0].layerCount > 1 || pRects[0].baseArrayLayer > 0)
1875 return false;
1876
1877 return anv_can_hiz_clear_ds_view(cmd_buffer->device, ds_att->iview,
1878 ds_att->layout,
1879 attachment->aspectMask,
1880 attachment->clearValue.depthStencil.depth,
1881 pRects->rect,
1882 cmd_buffer->queue_family->queueFlags);
1883 }
1884
1885 static void
clear_depth_stencil_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1886 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1887 struct blorp_batch *batch,
1888 const VkClearAttachment *attachment,
1889 uint32_t rectCount, const VkClearRect *pRects)
1890 {
1891 static const union isl_color_value color_value = { .u32 = { 0, } };
1892 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1893 const struct anv_attachment *d_att = &gfx->depth_att;
1894 const struct anv_attachment *s_att = &gfx->stencil_att;
1895 if (d_att->vk_format == VK_FORMAT_UNDEFINED &&
1896 s_att->vk_format == VK_FORMAT_UNDEFINED)
1897 return;
1898
1899 const struct anv_attachment *ds_att = d_att->iview ? d_att : s_att;
1900 if (ds_att->iview &&
1901 can_hiz_clear_att(cmd_buffer, batch, ds_att, attachment, rectCount, pRects)) {
1902 anv_fast_clear_depth_stencil(cmd_buffer, batch, ds_att->iview->image,
1903 attachment->aspectMask,
1904 ds_att->iview->planes[0].isl.base_level,
1905 ds_att->iview->planes[0].isl.base_array_layer,
1906 pRects[0].layerCount, pRects->rect,
1907 &attachment->clearValue.depthStencil);
1908 return;
1909 }
1910
1911 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1912 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1913
1914 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1915 if (d_att->vk_format != VK_FORMAT_UNDEFINED) {
1916 depth_format = anv_get_isl_format(cmd_buffer->device->info,
1917 d_att->vk_format,
1918 VK_IMAGE_ASPECT_DEPTH_BIT,
1919 VK_IMAGE_TILING_OPTIMAL);
1920 }
1921
1922 uint32_t binding_table;
1923 VkResult result =
1924 binding_table_for_surface_state(cmd_buffer,
1925 gfx->null_surface_state,
1926 &binding_table);
1927 if (result != VK_SUCCESS)
1928 return;
1929
1930 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1931 if (gfx->view_mask) {
1932 u_foreach_bit(view_idx, gfx->view_mask) {
1933 for (uint32_t r = 0; r < rectCount; ++r) {
1934 const VkOffset2D offset = pRects[r].rect.offset;
1935 const VkExtent2D extent = pRects[r].rect.extent;
1936 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1937 blorp_clear_attachments(batch, binding_table,
1938 depth_format,
1939 gfx->samples,
1940 view_idx, 1,
1941 offset.x, offset.y,
1942 offset.x + extent.width,
1943 offset.y + extent.height,
1944 false, color_value,
1945 clear_depth, value.depth,
1946 clear_stencil ? 0xff : 0, value.stencil);
1947 }
1948 }
1949 return;
1950 }
1951
1952 for (uint32_t r = 0; r < rectCount; ++r) {
1953 const VkOffset2D offset = pRects[r].rect.offset;
1954 const VkExtent2D extent = pRects[r].rect.extent;
1955 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1956 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1957 blorp_clear_attachments(batch, binding_table,
1958 depth_format,
1959 gfx->samples,
1960 pRects[r].baseArrayLayer,
1961 pRects[r].layerCount,
1962 offset.x, offset.y,
1963 offset.x + extent.width, offset.y + extent.height,
1964 false, color_value,
1965 clear_depth, value.depth,
1966 clear_stencil ? 0xff : 0, value.stencil);
1967 }
1968 }
1969
anv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1970 void anv_CmdClearAttachments(
1971 VkCommandBuffer commandBuffer,
1972 uint32_t attachmentCount,
1973 const VkClearAttachment* pAttachments,
1974 uint32_t rectCount,
1975 const VkClearRect* pRects)
1976 {
1977 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1978
1979 /* Because this gets called within a render pass, we tell blorp not to
1980 * trash our depth and stencil buffers.
1981 */
1982 struct blorp_batch batch;
1983 enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
1984 if (cmd_buffer->state.conditional_render_enabled) {
1985 anv_cmd_emit_conditional_render_predicate(cmd_buffer);
1986 flags |= BLORP_BATCH_PREDICATE_ENABLE;
1987 }
1988 anv_blorp_batch_init(cmd_buffer, &batch, flags);
1989
1990 for (uint32_t a = 0; a < attachmentCount; ++a) {
1991 if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
1992 assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1993 clear_color_attachment(cmd_buffer, &batch,
1994 &pAttachments[a],
1995 rectCount, pRects);
1996 } else {
1997 clear_depth_stencil_attachment(cmd_buffer, &batch,
1998 &pAttachments[a],
1999 rectCount, pRects);
2000 }
2001 }
2002
2003 anv_blorp_batch_finish(&batch);
2004 }
2005
2006 static void
anv_image_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * src_image,enum isl_format src_format_override,enum isl_aux_usage src_aux_usage,uint32_t src_level,uint32_t src_base_layer,const struct anv_image * dst_image,enum isl_format dst_format_override,enum isl_aux_usage dst_aux_usage,uint32_t dst_level,uint32_t dst_base_layer,VkImageAspectFlagBits aspect,uint32_t src_x,uint32_t src_y,uint32_t dst_x,uint32_t dst_y,uint32_t width,uint32_t height,uint32_t layer_count,enum blorp_filter filter)2007 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
2008 const struct anv_image *src_image,
2009 enum isl_format src_format_override,
2010 enum isl_aux_usage src_aux_usage,
2011 uint32_t src_level, uint32_t src_base_layer,
2012 const struct anv_image *dst_image,
2013 enum isl_format dst_format_override,
2014 enum isl_aux_usage dst_aux_usage,
2015 uint32_t dst_level, uint32_t dst_base_layer,
2016 VkImageAspectFlagBits aspect,
2017 uint32_t src_x, uint32_t src_y,
2018 uint32_t dst_x, uint32_t dst_y,
2019 uint32_t width, uint32_t height,
2020 uint32_t layer_count,
2021 enum blorp_filter filter)
2022 {
2023 struct blorp_batch batch;
2024 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2025 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2026
2027 assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D);
2028 assert(src_image->vk.samples > 1);
2029 assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
2030 assert(dst_image->vk.samples == 1);
2031
2032 struct blorp_surf src_surf, dst_surf;
2033 get_blorp_surf_for_anv_image(cmd_buffer, src_image, aspect,
2034 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2035 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2036 src_aux_usage, &src_surf);
2037 if (src_aux_usage == ISL_AUX_USAGE_MCS) {
2038 src_surf.clear_color_addr = anv_to_blorp_address(
2039 anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
2040 VK_IMAGE_ASPECT_COLOR_BIT));
2041 }
2042 get_blorp_surf_for_anv_image(cmd_buffer, dst_image, aspect,
2043 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2044 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2045 dst_aux_usage, &dst_surf);
2046 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
2047 aspect, dst_aux_usage,
2048 dst_level, dst_base_layer, layer_count);
2049
2050 if (filter == BLORP_FILTER_NONE) {
2051 /* If no explicit filter is provided, then it's implied by the type of
2052 * the source image.
2053 */
2054 if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
2055 (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
2056 isl_format_has_int_channel(src_surf.surf->format)) {
2057 filter = BLORP_FILTER_SAMPLE_0;
2058 } else {
2059 filter = BLORP_FILTER_AVERAGE;
2060 }
2061 }
2062
2063 for (uint32_t l = 0; l < layer_count; l++) {
2064 blorp_blit(&batch,
2065 &src_surf, src_level, src_base_layer + l,
2066 src_format_override, ISL_SWIZZLE_IDENTITY,
2067 &dst_surf, dst_level, dst_base_layer + l,
2068 dst_format_override, ISL_SWIZZLE_IDENTITY,
2069 src_x, src_y, src_x + width, src_y + height,
2070 dst_x, dst_y, dst_x + width, dst_y + height,
2071 filter, false, false);
2072 }
2073
2074 anv_blorp_batch_finish(&batch);
2075 }
2076
2077 static enum blorp_filter
vk_to_blorp_resolve_mode(VkResolveModeFlagBits vk_mode)2078 vk_to_blorp_resolve_mode(VkResolveModeFlagBits vk_mode)
2079 {
2080 switch (vk_mode) {
2081 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
2082 return BLORP_FILTER_SAMPLE_0;
2083 case VK_RESOLVE_MODE_AVERAGE_BIT:
2084 return BLORP_FILTER_AVERAGE;
2085 case VK_RESOLVE_MODE_MIN_BIT:
2086 return BLORP_FILTER_MIN_SAMPLE;
2087 case VK_RESOLVE_MODE_MAX_BIT:
2088 return BLORP_FILTER_MAX_SAMPLE;
2089 default:
2090 return BLORP_FILTER_NONE;
2091 }
2092 }
2093
2094 void
anv_attachment_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_attachment * att,VkImageLayout layout,VkImageAspectFlagBits aspect)2095 anv_attachment_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
2096 const struct anv_attachment *att,
2097 VkImageLayout layout,
2098 VkImageAspectFlagBits aspect)
2099 {
2100 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
2101 const struct anv_image_view *src_iview = att->iview;
2102 const struct anv_image_view *dst_iview = att->resolve_iview;
2103
2104 enum isl_aux_usage src_aux_usage =
2105 anv_layout_to_aux_usage(cmd_buffer->device->info,
2106 src_iview->image, aspect,
2107 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2108 layout,
2109 cmd_buffer->queue_family->queueFlags);
2110
2111 enum isl_aux_usage dst_aux_usage =
2112 anv_layout_to_aux_usage(cmd_buffer->device->info,
2113 dst_iview->image, aspect,
2114 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2115 att->resolve_layout,
2116 cmd_buffer->queue_family->queueFlags);
2117
2118 enum blorp_filter filter = vk_to_blorp_resolve_mode(att->resolve_mode);
2119
2120 /* Depth/stencil should not use their view format for resolve because they
2121 * go in pairs.
2122 */
2123 enum isl_format src_format = ISL_FORMAT_UNSUPPORTED;
2124 enum isl_format dst_format = ISL_FORMAT_UNSUPPORTED;
2125 if (!(aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
2126 src_format = src_iview->planes[0].isl.format;
2127 dst_format = dst_iview->planes[0].isl.format;
2128 }
2129
2130 const VkRect2D render_area = gfx->render_area;
2131 if (gfx->view_mask == 0) {
2132 anv_image_msaa_resolve(cmd_buffer,
2133 src_iview->image, src_format, src_aux_usage,
2134 src_iview->planes[0].isl.base_level,
2135 src_iview->planes[0].isl.base_array_layer,
2136 dst_iview->image, dst_format, dst_aux_usage,
2137 dst_iview->planes[0].isl.base_level,
2138 dst_iview->planes[0].isl.base_array_layer,
2139 aspect,
2140 render_area.offset.x, render_area.offset.y,
2141 render_area.offset.x, render_area.offset.y,
2142 render_area.extent.width,
2143 render_area.extent.height,
2144 gfx->layer_count, filter);
2145 } else {
2146 uint32_t res_view_mask = gfx->view_mask;
2147 while (res_view_mask) {
2148 int i = u_bit_scan(&res_view_mask);
2149
2150 anv_image_msaa_resolve(cmd_buffer,
2151 src_iview->image, src_format, src_aux_usage,
2152 src_iview->planes[0].isl.base_level,
2153 src_iview->planes[0].isl.base_array_layer + i,
2154 dst_iview->image, dst_format, dst_aux_usage,
2155 dst_iview->planes[0].isl.base_level,
2156 dst_iview->planes[0].isl.base_array_layer + i,
2157 aspect,
2158 render_area.offset.x, render_area.offset.y,
2159 render_area.offset.x, render_area.offset.y,
2160 render_area.extent.width,
2161 render_area.extent.height,
2162 1, filter);
2163 }
2164 }
2165 }
2166
2167 static void
resolve_image(struct anv_cmd_buffer * cmd_buffer,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region)2168 resolve_image(struct anv_cmd_buffer *cmd_buffer,
2169 struct anv_image *src_image,
2170 VkImageLayout src_image_layout,
2171 struct anv_image *dst_image,
2172 VkImageLayout dst_image_layout,
2173 const VkImageResolve2 *region)
2174 {
2175 assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask);
2176 assert(vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource) ==
2177 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource));
2178
2179 const uint32_t layer_count =
2180 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource);
2181
2182 anv_foreach_image_aspect_bit(aspect_bit, src_image,
2183 region->srcSubresource.aspectMask) {
2184 enum isl_aux_usage src_aux_usage =
2185 anv_layout_to_aux_usage(cmd_buffer->device->info, src_image,
2186 (1 << aspect_bit),
2187 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2188 src_image_layout,
2189 cmd_buffer->queue_family->queueFlags);
2190 enum isl_aux_usage dst_aux_usage =
2191 anv_layout_to_aux_usage(cmd_buffer->device->info, dst_image,
2192 (1 << aspect_bit),
2193 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2194 dst_image_layout,
2195 cmd_buffer->queue_family->queueFlags);
2196
2197 anv_image_msaa_resolve(cmd_buffer,
2198 src_image, ISL_FORMAT_UNSUPPORTED, src_aux_usage,
2199 region->srcSubresource.mipLevel,
2200 region->srcSubresource.baseArrayLayer,
2201 dst_image, ISL_FORMAT_UNSUPPORTED, dst_aux_usage,
2202 region->dstSubresource.mipLevel,
2203 region->dstSubresource.baseArrayLayer,
2204 (1 << aspect_bit),
2205 region->srcOffset.x,
2206 region->srcOffset.y,
2207 region->dstOffset.x,
2208 region->dstOffset.y,
2209 region->extent.width,
2210 region->extent.height,
2211 layer_count, BLORP_FILTER_NONE);
2212 }
2213 }
2214
anv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)2215 void anv_CmdResolveImage2(
2216 VkCommandBuffer commandBuffer,
2217 const VkResolveImageInfo2* pResolveImageInfo)
2218 {
2219 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
2220 ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage);
2221 ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage);
2222
2223 for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
2224 resolve_image(cmd_buffer,
2225 src_image, pResolveImageInfo->srcImageLayout,
2226 dst_image, pResolveImageInfo->dstImageLayout,
2227 &pResolveImageInfo->pRegions[r]);
2228 }
2229 }
2230
2231 void
anv_image_clear_color(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,union isl_color_value clear_color)2232 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
2233 const struct anv_image *image,
2234 VkImageAspectFlagBits aspect,
2235 enum isl_aux_usage aux_usage,
2236 enum isl_format format, struct isl_swizzle swizzle,
2237 uint32_t level, uint32_t base_layer, uint32_t layer_count,
2238 VkRect2D area, union isl_color_value clear_color)
2239 {
2240 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
2241
2242 /* We don't support planar images with multisampling yet */
2243 assert(image->n_planes == 1);
2244
2245 struct blorp_batch batch;
2246 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2247
2248 struct blorp_surf surf;
2249 get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
2250 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2251 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2252 aux_usage, &surf);
2253 anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
2254 level, base_layer, layer_count);
2255
2256 blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
2257 level, base_layer, layer_count,
2258 area.offset.x, area.offset.y,
2259 area.offset.x + area.extent.width,
2260 area.offset.y + area.extent.height,
2261 clear_color, 0 /* color_write_disable */);
2262
2263 anv_blorp_batch_finish(&batch);
2264 }
2265
2266 void
anv_image_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,enum isl_aux_usage depth_aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,const VkClearDepthStencilValue * clear_value)2267 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
2268 const struct anv_image *image,
2269 VkImageAspectFlags aspects,
2270 enum isl_aux_usage depth_aux_usage,
2271 uint32_t level,
2272 uint32_t base_layer, uint32_t layer_count,
2273 VkRect2D area,
2274 const VkClearDepthStencilValue *clear_value)
2275 {
2276 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
2277 VK_IMAGE_ASPECT_STENCIL_BIT));
2278
2279 struct blorp_batch batch;
2280 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2281 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2282
2283 struct blorp_surf depth = {};
2284 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
2285 get_blorp_surf_for_anv_image(cmd_buffer,
2286 image, VK_IMAGE_ASPECT_DEPTH_BIT,
2287 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2288 depth_aux_usage, &depth);
2289 }
2290
2291 struct blorp_surf stencil = {};
2292 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2293 const uint32_t plane =
2294 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
2295 get_blorp_surf_for_anv_image(cmd_buffer,
2296 image, VK_IMAGE_ASPECT_STENCIL_BIT,
2297 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2298 image->planes[plane].aux_usage, &stencil);
2299 }
2300
2301 /* Blorp may choose to clear stencil using RGBA32_UINT for better
2302 * performance. If it does this, we need to flush it out of the depth
2303 * cache before rendering to it.
2304 */
2305 anv_add_pending_pipe_bits(cmd_buffer,
2306 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2307 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2308 "before clear DS");
2309
2310 blorp_clear_depth_stencil(&batch, &depth, &stencil,
2311 level, base_layer, layer_count,
2312 area.offset.x, area.offset.y,
2313 area.offset.x + area.extent.width,
2314 area.offset.y + area.extent.height,
2315 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
2316 clear_value->depth,
2317 (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
2318 clear_value->stencil);
2319
2320 /* Blorp may choose to clear stencil using RGBA32_UINT for better
2321 * performance. If it does this, we need to flush it out of the render
2322 * cache before someone starts trying to do stencil on it.
2323 */
2324 anv_add_pending_pipe_bits(cmd_buffer,
2325 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
2326 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2327 "after clear DS");
2328
2329 anv_blorp_batch_finish(&batch);
2330 }
2331
2332 void
anv_image_hiz_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op hiz_op)2333 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
2334 const struct anv_image *image,
2335 VkImageAspectFlagBits aspect, uint32_t level,
2336 uint32_t base_layer, uint32_t layer_count,
2337 enum isl_aux_op hiz_op)
2338 {
2339 assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
2340 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
2341 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
2342 assert(plane == 0);
2343
2344 struct blorp_batch batch;
2345 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2346 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2347
2348 struct blorp_surf surf;
2349 get_blorp_surf_for_anv_image(cmd_buffer,
2350 image, VK_IMAGE_ASPECT_DEPTH_BIT,
2351 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2352 image->planes[plane].aux_usage, &surf);
2353
2354 blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
2355
2356 anv_blorp_batch_finish(&batch);
2357 }
2358
2359 void
anv_image_hiz_clear(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,const VkClearDepthStencilValue * clear_value)2360 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
2361 const struct anv_image *image,
2362 VkImageAspectFlags aspects,
2363 uint32_t level,
2364 uint32_t base_layer, uint32_t layer_count,
2365 VkRect2D area,
2366 const VkClearDepthStencilValue *clear_value)
2367 {
2368 struct blorp_batch batch;
2369 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2370 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2371
2372 anv_fast_clear_depth_stencil(cmd_buffer, &batch, image, aspects, level,
2373 base_layer, layer_count, area, clear_value);
2374
2375 anv_blorp_batch_finish(&batch);
2376 }
2377
2378 void
anv_image_mcs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value,bool predicate)2379 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
2380 const struct anv_image *image,
2381 enum isl_format format, struct isl_swizzle swizzle,
2382 VkImageAspectFlagBits aspect,
2383 uint32_t base_layer, uint32_t layer_count,
2384 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
2385 bool predicate)
2386 {
2387 struct blorp_batch batch;
2388 anv_blorp_batch_init(cmd_buffer, &batch,
2389 BLORP_BATCH_PREDICATE_ENABLE * predicate);
2390 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2391
2392 exec_mcs_op(cmd_buffer, &batch, image, format, swizzle, aspect,
2393 base_layer, layer_count, mcs_op, clear_value);
2394
2395 anv_blorp_batch_finish(&batch);
2396 }
2397
2398 void
anv_image_ccs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value,bool predicate)2399 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
2400 const struct anv_image *image,
2401 enum isl_format format, struct isl_swizzle swizzle,
2402 VkImageAspectFlagBits aspect, uint32_t level,
2403 uint32_t base_layer, uint32_t layer_count,
2404 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
2405 bool predicate)
2406 {
2407 struct blorp_batch batch;
2408 anv_blorp_batch_init(cmd_buffer, &batch,
2409 BLORP_BATCH_PREDICATE_ENABLE * predicate);
2410 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2411
2412 exec_ccs_op(cmd_buffer, &batch, image, format, swizzle, aspect, level,
2413 base_layer, layer_count, ccs_op, clear_value);
2414
2415 anv_blorp_batch_finish(&batch);
2416 }
2417